├── bin ├── anyclass ├── trec_setup ├── terrier-env.sh ├── fq.bat ├── lcp.bat └── debug.bat ├── var └── index │ └── DEFAULT_INDEX_DIR ├── doc ├── javadoc │ └── DEFAULT_JAVADOC_DIR ├── requirements.txt ├── README.md ├── images │ ├── img1.png │ ├── img2.png │ ├── img4.png │ ├── img11.png │ ├── img14.png │ ├── img16.png │ ├── img19.png │ ├── img23.png │ ├── img24.png │ ├── img26.png │ ├── WT2GWebInterface.png │ ├── terrier-logo-web.jpg │ ├── SimpleWebInterface.png │ ├── indexing_architecture.png │ ├── retrieval_architecture.png │ └── indexing_architecture-source.odg ├── contacts.md ├── Makefile ├── make.bat ├── terrier_desktop.md └── todo.md ├── share ├── tests │ ├── files │ │ ├── helloworld.txt │ │ ├── helloworld.txt.bgz │ │ ├── helloworld.txt.bz2 │ │ └── helloworld.txt.gz │ ├── trec │ │ ├── wt2g.indexproperties │ │ └── wt2g.runs │ ├── shakespeare │ │ ├── test.shakespeare-merchant.phrase.topics │ │ ├── test.shakespeare-merchant.disjunctive.topics │ │ ├── test.shakespeare-merchant.field.topics │ │ ├── test.shakespeare-merchant.phrase-fields.topics │ │ ├── indices │ │ │ └── terrier-3.x │ │ │ │ ├── shak-basic.direct.bf │ │ │ │ ├── shak-basic.meta.idx │ │ │ │ ├── shak-blocks.meta.idx │ │ │ │ ├── shak-fields.meta.idx │ │ │ │ ├── shak-basic.inverted.bf │ │ │ │ ├── shak-basic.meta.zdata │ │ │ │ ├── shak-blocks.direct.bf │ │ │ │ ├── shak-blocks.meta.zdata │ │ │ │ ├── shak-fields.direct.bf │ │ │ │ ├── shak-fields.meta.zdata │ │ │ │ ├── shak-blocks.inverted.bf │ │ │ │ ├── shak-fields.inverted.bf │ │ │ │ ├── shak-basic.lexicon.fsomapid │ │ │ │ ├── shak-fieldsblocks.direct.bf │ │ │ │ ├── shak-fieldsblocks.meta.idx │ │ │ │ ├── shak-basic.lexicon.fsomapfile │ │ │ │ ├── shak-basic.lexicon.fsomaphash │ │ │ │ ├── shak-blocks.lexicon.fsomapfile │ │ │ │ ├── shak-blocks.lexicon.fsomaphash │ │ │ │ ├── shak-blocks.lexicon.fsomapid │ │ │ │ ├── shak-fields.lexicon.fsomapfile │ │ │ │ ├── shak-fields.lexicon.fsomaphash │ │ │ │ ├── shak-fields.lexicon.fsomapid │ │ │ │ ├── shak-fieldsblocks.inverted.bf │ │ │ │ ├── shak-fieldsblocks.meta.zdata │ │ │ │ ├── shak-basic.document.fsarrayfile │ │ │ │ ├── shak-blocks.document.fsarrayfile │ │ │ │ ├── shak-fields.document.fsarrayfile │ │ │ │ ├── shak-fieldsblocks.lexicon.fsomapid │ │ │ │ ├── shak-fieldsblocks.lexicon.fsomapfile │ │ │ │ ├── shak-fieldsblocks.lexicon.fsomaphash │ │ │ │ └── shak-fieldsblocks.document.fsarrayfile │ │ ├── test.shakespeare-merchant.basic.topics │ │ └── test.shakespeare-merchant.all.qrels │ ├── tweets │ │ ├── oscars.json.gz │ │ └── utf8-tweet.json │ └── simplefilecollection │ │ ├── document.doc │ │ ├── document.docx │ │ ├── document.pdf │ │ ├── document.ppt │ │ ├── document.pptx │ │ ├── document.xls │ │ ├── document.xlsx │ │ ├── document.txt │ │ └── document.html ├── images │ ├── terrier-logo-web.jpg │ ├── terrier-logo-large.jpg │ ├── terrier-desktop-search.gif │ ├── terrier-logo-web-transparent.png │ └── terrier-logo-large-transparent.png └── vaswani_npl │ └── README.md ├── modules ├── learning │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.terrier.applications.CLITool │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ ├── matching │ │ │ └── FilterMatching.java │ │ │ └── learning │ │ │ └── package.html │ └── pom.xml ├── rest-server │ └── src │ │ └── main │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.terrier.applications.CLITool │ │ └── java │ │ └── org │ │ └── terrier │ │ └── rest │ │ └── package-info.java ├── core │ └── src │ │ └── main │ │ ├── resources │ │ ├── META-INF │ │ │ └── services │ │ │ │ ├── org.terrier.querying.ManagerFactory$Builder │ │ │ │ ├── org.terrier.structures.IndexFactory$IndexLoader │ │ │ │ └── org.terrier.applications.CLITool │ │ └── terrier.default.properties │ │ ├── java │ │ ├── org │ │ │ └── terrier │ │ │ │ ├── querying │ │ │ │ ├── parser │ │ │ │ │ ├── .gitignore │ │ │ │ │ └── package.html │ │ │ │ ├── MQTRewritingProcess.java │ │ │ │ ├── summarisation │ │ │ │ │ └── package.html │ │ │ │ ├── package.html │ │ │ │ ├── PostProcess.java │ │ │ │ ├── ManagerRequisite.java │ │ │ │ ├── ProcessPhaseRequisites.java │ │ │ │ └── TerrierQLParser.java │ │ │ │ ├── matching │ │ │ │ ├── models │ │ │ │ │ ├── Null.java │ │ │ │ │ ├── CoordinateMatch.java │ │ │ │ │ ├── dependence │ │ │ │ │ │ ├── package-info.java │ │ │ │ │ │ └── pBiL2.java │ │ │ │ │ ├── queryexpansion │ │ │ │ │ │ └── package.html │ │ │ │ │ └── DFRNorm2BaseModel.java │ │ │ │ ├── package.html │ │ │ │ ├── dsms │ │ │ │ │ └── package.html │ │ │ │ ├── taat │ │ │ │ │ └── package.html │ │ │ │ ├── daat │ │ │ │ │ └── package.html │ │ │ │ ├── matchops │ │ │ │ │ └── package-info.java │ │ │ │ └── tsms │ │ │ │ │ └── package.html │ │ │ │ ├── tartarus │ │ │ │ └── snowball │ │ │ │ │ ├── package-info.java │ │ │ │ │ └── ext │ │ │ │ │ └── package-info.java │ │ │ │ ├── structures │ │ │ │ ├── UncompressedMetaIndex.java │ │ │ │ ├── postings │ │ │ │ │ └── bit │ │ │ │ │ │ └── package.html │ │ │ │ ├── restructure │ │ │ │ │ └── package-info.java │ │ │ │ ├── bit │ │ │ │ │ └── package.html │ │ │ │ ├── collections │ │ │ │ │ ├── package.html │ │ │ │ │ └── OrderedMap.java │ │ │ │ ├── seralization │ │ │ │ │ ├── package.html │ │ │ │ │ └── WriteableFactory.java │ │ │ │ ├── package.html │ │ │ │ └── IndexConfigurable.java │ │ │ │ ├── utility │ │ │ │ ├── io │ │ │ │ │ ├── package.html │ │ │ │ │ └── RandomDataOutput.java │ │ │ │ ├── package.html │ │ │ │ ├── restructure │ │ │ │ │ └── package.html │ │ │ │ └── MemoryChecker.java │ │ │ │ ├── statistics │ │ │ │ └── package.html │ │ │ │ ├── sorting │ │ │ │ └── package.html │ │ │ │ ├── terms │ │ │ │ ├── package.html │ │ │ │ ├── Stemmer.java │ │ │ │ ├── DutchSnowballStemmer.java │ │ │ │ ├── DanishSnowballStemmer.java │ │ │ │ ├── FrenchSnowballStemmer.java │ │ │ │ ├── EnglishSnowballStemmer.java │ │ │ │ ├── FinnishSnowballStemmer.java │ │ │ │ ├── GermanSnowballStemmer.java │ │ │ │ ├── ItalianSnowballStemmer.java │ │ │ │ ├── RussianSnowballStemmer.java │ │ │ │ ├── SpanishSnowballStemmer.java │ │ │ │ ├── SwedishSnowballStemmer.java │ │ │ │ ├── TurkishSnowballStemmer.java │ │ │ │ ├── RomanianSnowballStemmer.java │ │ │ │ ├── HungarianSnowballStemmer.java │ │ │ │ ├── NorwegianSnowballStemmer.java │ │ │ │ └── PortugueseSnowballStemmer.java │ │ │ │ ├── compression │ │ │ │ └── bit │ │ │ │ │ └── BitWritable.java │ │ │ │ └── indexing │ │ │ │ └── tokenisation │ │ │ │ └── TokenStream.java │ │ ├── package.html │ │ └── overview.html │ │ └── templates │ │ └── org │ │ └── terrier │ │ └── Version.java ├── concurrent │ ├── src │ │ ├── main │ │ │ └── resources │ │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ ├── org.terrier.querying.ManagerFactory$Builder │ │ │ │ ├── org.terrier.structures.IndexFactory$IndexLoader │ │ │ │ └── org.terrier.applications.CLITool │ │ └── test │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ └── structures │ │ │ └── concurrent │ │ │ ├── TestShakParallelTRECQueryingMem.java │ │ │ └── TestThreadSafeManager.java │ ├── README.md │ └── pom.xml ├── rest-client │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.terrier.querying.ManagerFactory$Builder │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ └── restclient │ │ │ └── package-info.java │ └── pom.xml ├── realtime │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.terrier.structures.IndexFactory$IndexLoader │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ └── realtime │ │ │ ├── multi │ │ │ ├── BlockMultiIterablePosting.java │ │ │ ├── BlockMultiDirectIterablePosting.java │ │ │ └── package.html │ │ │ ├── restructure │ │ │ └── package.html │ │ │ ├── memory │ │ │ ├── fields │ │ │ │ ├── package.html │ │ │ │ └── MemoryFieldsDirectIterablePosting.java │ │ │ ├── package.html │ │ │ ├── MemoryPostingList.java │ │ │ ├── MetaIndexMap.java │ │ │ └── MemoryPointer.java │ │ │ ├── package.html │ │ │ ├── matching │ │ │ └── package.html │ │ │ └── incremental │ │ │ └── package.html │ └── pom.xml ├── batch-retrieval │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.terrier.applications.CLITool │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ ├── structures │ │ │ ├── cache │ │ │ │ ├── package.html │ │ │ │ └── GrowingMapQueryStringResultCache.java │ │ │ └── outputformat │ │ │ │ └── package.html │ │ │ ├── applications │ │ │ └── batchquerying │ │ │ │ ├── QuerySourceUtils.java │ │ │ │ └── package.html │ │ │ └── evaluation │ │ │ └── package.html │ └── pom.xml ├── tests │ ├── src │ │ └── test │ │ │ ├── java │ │ │ └── org │ │ │ │ └── terrier │ │ │ │ ├── structures │ │ │ │ ├── TestLZ4MetaIndex.java │ │ │ │ ├── TestZstdMetaIndex.java │ │ │ │ ├── TestCompressingMetaIndex.java │ │ │ │ ├── TestUncompressingMetaIndex.java │ │ │ │ └── collections │ │ │ │ │ └── TestMergedIterator.java │ │ │ │ ├── matching │ │ │ │ ├── TestDAATFullMatching.java │ │ │ │ ├── TestTAATFullMatching.java │ │ │ │ └── models │ │ │ │ │ └── TestBM25.java │ │ │ │ ├── realtime │ │ │ │ └── multi │ │ │ │ │ └── TestMultiLexicon.java │ │ │ │ ├── applications │ │ │ │ ├── TestDirectQuerySource.java │ │ │ │ ├── TestCLITool.java │ │ │ │ └── TestShowDocumentCommand.java │ │ │ │ ├── utility │ │ │ │ ├── TestClassNameParser.java │ │ │ │ ├── TestVersion.java │ │ │ │ └── TestTimer.java │ │ │ │ ├── tests │ │ │ │ └── TRECWT2GEndtoEndTest.java │ │ │ │ └── fat │ │ │ │ ├── TestScoringMatchingWithFat.java │ │ │ │ └── FatTestSuite.java │ │ │ └── resources │ │ │ └── logback-test.xml │ └── share │ │ └── tests │ │ └── tweets │ │ └── utf8-tweet.json ├── index-api │ ├── src │ │ └── main │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ └── structures │ │ │ ├── package-info.java │ │ │ ├── ConcurrentReadable.java │ │ │ ├── FieldDocumentIndex.java │ │ │ ├── BlockEntryStatistics.java │ │ │ ├── FieldEntryStatistics.java │ │ │ └── NgramEntryStatistics.java │ └── TODO.md ├── retrieval-api │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── org │ │ └── terrier │ │ └── querying │ │ └── ScoredDocList.java ├── batch-indexers │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.terrier.applications.CLITool │ │ │ └── java │ │ │ └── org │ │ │ └── terrier │ │ │ └── structures │ │ │ ├── indexing │ │ │ ├── classical │ │ │ │ └── package.html │ │ │ ├── UncompressedMetaIndexBuilder.java │ │ │ ├── singlepass │ │ │ │ └── package.html │ │ │ ├── package.html │ │ │ ├── ZstdMetaIndexBuilder.java │ │ │ └── LZ4MetaIndexBuilder.java │ │ │ └── merging │ │ │ └── package.html │ └── pom.xml ├── logging │ └── pom.xml ├── assemblies │ └── src │ │ └── assembly │ │ └── module-jar-with-dependencies.xml └── docvectors │ └── pom.xml ├── .readthedocs.yml ├── .gitignore ├── src └── webapps │ ├── wt2g │ ├── style.css │ └── index.html │ └── simple │ ├── style.css │ └── index.html ├── etc └── logback.xml ├── licenses ├── apache │ └── APL-jarfiles.txt ├── textmining │ └── License.txt ├── trove │ └── README-license.txt ├── jforests │ └── CitationPolicy.txt ├── snowball │ └── bsd-license.txt └── pdfbox │ └── LICENSE ├── .settings └── org.eclipse.jdt.core.prefs └── .github └── workflows └── build-unit-tests.yml /bin/anyclass: -------------------------------------------------------------------------------- 1 | anyclass.sh -------------------------------------------------------------------------------- /bin/trec_setup: -------------------------------------------------------------------------------- 1 | trec_setup.sh -------------------------------------------------------------------------------- /var/index/DEFAULT_INDEX_DIR: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/javadoc/DEFAULT_JAVADOC_DIR: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | javasphinx 2 | -------------------------------------------------------------------------------- /share/tests/files/helloworld.txt: -------------------------------------------------------------------------------- 1 | hello world 2 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | See [index.md](index.md) for the documentation 2 | -------------------------------------------------------------------------------- /doc/images/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img1.png -------------------------------------------------------------------------------- /doc/images/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img2.png -------------------------------------------------------------------------------- /doc/images/img4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img4.png -------------------------------------------------------------------------------- /doc/images/img11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img11.png -------------------------------------------------------------------------------- /doc/images/img14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img14.png -------------------------------------------------------------------------------- /doc/images/img16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img16.png -------------------------------------------------------------------------------- /doc/images/img19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img19.png -------------------------------------------------------------------------------- /doc/images/img23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img23.png -------------------------------------------------------------------------------- /doc/images/img24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img24.png -------------------------------------------------------------------------------- /doc/images/img26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/img26.png -------------------------------------------------------------------------------- /share/tests/trec/wt2g.indexproperties: -------------------------------------------------------------------------------- 1 | num.Documents 247491 2 | #num.Terms 1002691 3 | index.meta.entries 247491 4 | -------------------------------------------------------------------------------- /doc/images/WT2GWebInterface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/WT2GWebInterface.png -------------------------------------------------------------------------------- /doc/images/terrier-logo-web.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/terrier-logo-web.jpg -------------------------------------------------------------------------------- /modules/learning/src/main/resources/META-INF/services/org.terrier.applications.CLITool: -------------------------------------------------------------------------------- 1 | org.terrier.learning.JForestsCommand -------------------------------------------------------------------------------- /share/tests/shakespeare/test.shakespeare-merchant.phrase.topics: -------------------------------------------------------------------------------- 1 | TEST20 "DRAMATIS PERSONAE" 2 | TEST21 "Sweet doctor" 3 | -------------------------------------------------------------------------------- /doc/images/SimpleWebInterface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/SimpleWebInterface.png -------------------------------------------------------------------------------- /modules/rest-server/src/main/resources/META-INF/services/org.terrier.applications.CLITool: -------------------------------------------------------------------------------- 1 | org.terrier.rest.SingleIndexRestServer -------------------------------------------------------------------------------- /share/images/terrier-logo-web.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/images/terrier-logo-web.jpg -------------------------------------------------------------------------------- /share/tests/tweets/oscars.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/tweets/oscars.json.gz -------------------------------------------------------------------------------- /doc/images/indexing_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/indexing_architecture.png -------------------------------------------------------------------------------- /share/images/terrier-logo-large.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/images/terrier-logo-large.jpg -------------------------------------------------------------------------------- /share/tests/files/helloworld.txt.bgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/files/helloworld.txt.bgz -------------------------------------------------------------------------------- /share/tests/files/helloworld.txt.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/files/helloworld.txt.bz2 -------------------------------------------------------------------------------- /share/tests/files/helloworld.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/files/helloworld.txt.gz -------------------------------------------------------------------------------- /share/tests/shakespeare/test.shakespeare-merchant.disjunctive.topics: -------------------------------------------------------------------------------- 1 | TEST40: {dramatis personae} 2 | TEST41: {dramatis suitor} 3 | -------------------------------------------------------------------------------- /share/tests/trec/wt2g.runs: -------------------------------------------------------------------------------- 1 | -Dtrec.model=BM25 -c 0.2381 0.3181 2 | -Dtrec.model=PL2 -c 26.04 0.3246 3 | -Dtrec.model=DFRee 0.2824 4 | -------------------------------------------------------------------------------- /doc/images/retrieval_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/retrieval_architecture.png -------------------------------------------------------------------------------- /share/images/terrier-desktop-search.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/images/terrier-desktop-search.gif -------------------------------------------------------------------------------- /modules/core/src/main/resources/META-INF/services/org.terrier.querying.ManagerFactory$Builder: -------------------------------------------------------------------------------- 1 | org.terrier.querying.LocalManager$Builder 2 | -------------------------------------------------------------------------------- /share/tests/shakespeare/test.shakespeare-merchant.field.topics: -------------------------------------------------------------------------------- 1 | TEST10 +title:street 2 | TEST11 +speaker:mOrOCCO 3 | TEST12 +speaker:SHYLOCK 4 | -------------------------------------------------------------------------------- /share/tests/shakespeare/test.shakespeare-merchant.phrase-fields.topics: -------------------------------------------------------------------------------- 1 | TEST30 title:"Belmont Avenue" 2 | TEST31 title:"court of justice" 3 | -------------------------------------------------------------------------------- /doc/images/indexing_architecture-source.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/doc/images/indexing_architecture-source.odg -------------------------------------------------------------------------------- /modules/concurrent/src/main/resources/META-INF/services/org.terrier.querying.ManagerFactory$Builder: -------------------------------------------------------------------------------- 1 | org.terrier.querying.ThreadSafeManager$Builder -------------------------------------------------------------------------------- /modules/concurrent/src/main/resources/META-INF/services/org.terrier.structures.IndexFactory$IndexLoader: -------------------------------------------------------------------------------- 1 | org.terrier.structures.ConcurrentIndexLoader -------------------------------------------------------------------------------- /modules/rest-client/src/main/resources/META-INF/services/org.terrier.querying.ManagerFactory$Builder: -------------------------------------------------------------------------------- 1 | org.terrier.restclient.RestClientManagerBuilder -------------------------------------------------------------------------------- /share/images/terrier-logo-web-transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/images/terrier-logo-web-transparent.png -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.doc -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.docx -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.pdf -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.ppt -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.pptx -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.xls -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/simplefilecollection/document.xlsx -------------------------------------------------------------------------------- /share/images/terrier-logo-large-transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/images/terrier-logo-large-transparent.png -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.direct.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.direct.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.meta.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.meta.idx -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.meta.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.meta.idx -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.meta.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.meta.idx -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.inverted.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.inverted.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.meta.zdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.meta.zdata -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.direct.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.direct.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.meta.zdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.meta.zdata -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.direct.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.direct.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.meta.zdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.meta.zdata -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.inverted.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.inverted.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.inverted.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.inverted.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.lexicon.fsomapid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.lexicon.fsomapid -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.direct.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.direct.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.meta.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.meta.idx -------------------------------------------------------------------------------- /modules/core/src/main/resources/META-INF/services/org.terrier.structures.IndexFactory$IndexLoader: -------------------------------------------------------------------------------- 1 | org.terrier.structures.IndexOnDisk$DiskIndexLoader 2 | org.terrier.structures.IndexFactory$DirectIndexLoader -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.lexicon.fsomapfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.lexicon.fsomapfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.lexicon.fsomaphash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.lexicon.fsomaphash -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.lexicon.fsomapfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.lexicon.fsomapfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.lexicon.fsomaphash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.lexicon.fsomaphash -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.lexicon.fsomapid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.lexicon.fsomapid -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.lexicon.fsomapfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.lexicon.fsomapfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.lexicon.fsomaphash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.lexicon.fsomaphash -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.lexicon.fsomapid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.lexicon.fsomapid -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.inverted.bf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.inverted.bf -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.meta.zdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.meta.zdata -------------------------------------------------------------------------------- /modules/realtime/src/main/resources/META-INF/services/org.terrier.structures.IndexFactory$IndexLoader: -------------------------------------------------------------------------------- 1 | org.terrier.realtime.incremental.IncrementalIndex$Loader 2 | org.terrier.realtime.memory.MemoryIndex$Loader 3 | -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-basic.document.fsarrayfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-basic.document.fsarrayfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-blocks.document.fsarrayfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-blocks.document.fsarrayfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fields.document.fsarrayfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fields.document.fsarrayfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.lexicon.fsomapid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.lexicon.fsomapid -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.lexicon.fsomapfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.lexicon.fsomapfile -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.lexicon.fsomaphash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.lexicon.fsomaphash -------------------------------------------------------------------------------- /share/tests/shakespeare/test.shakespeare-merchant.basic.topics: -------------------------------------------------------------------------------- 1 | TEST00 dramatis personae 2 | TEST01 portia 3 | TEST02 tubal 4 | TEST03 morocco 5 | TEST04 +ANTONIO +SALANIO 6 | TEST05 lies -twenty 7 | TEST06 +deny twenty 8 | -------------------------------------------------------------------------------- /share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.document.fsarrayfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrier-org/terrier-core/HEAD/share/tests/shakespeare/indices/terrier-3.x/shak-fieldsblocks.document.fsarrayfile -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/resources/META-INF/services/org.terrier.applications.CLITool: -------------------------------------------------------------------------------- 1 | org.terrier.applications.batchquerying.TRECQuerying$Command 2 | org.terrier.evaluation.TrecEvalEvaluation$Command 3 | org.terrier.evaluation.BatchEvaluationCommand -------------------------------------------------------------------------------- /share/vaswani_npl/README.md: -------------------------------------------------------------------------------- 1 | This is an IR test collection, i.e. document corpus, topics and qrels. It has been transformed into a traditional TREC format. The original version can be found at http://ir.dcs.gla.ac.uk/resources/test_collections/npl/ 2 | 3 | -------------------------------------------------------------------------------- /modules/concurrent/src/main/resources/META-INF/services/org.terrier.applications.CLITool: -------------------------------------------------------------------------------- 1 | org.terrier.applications.batchquerying.ParallelTRECQuerying$Command 2 | org.terrier.evaluation.TrecEvalEvaluation$Command 3 | org.terrier.evaluation.BatchEvaluationCommand -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Required 2 | version: 2 3 | 4 | # Build documentation in the docs/ directory with Sphinx 5 | sphinx: 6 | configuration: doc/conf.py 7 | 8 | python: 9 | version: 3.7 10 | install: 11 | - requirements: doc/requirements.txt 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /doc/javadoc/* 2 | /classes/ 3 | /target/ 4 | /logs/ 5 | /target/ 6 | modules/*/bin/ 7 | modules/*/target/ 8 | .DS_Store 9 | .classpath 10 | .settings 11 | .project 12 | pom.xml.versionsBackup 13 | *.class 14 | overview.html 15 | package.html 16 | .vscode/settings.json 17 | -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. -------------------------------------------------------------------------------- /bin/terrier-env.sh: -------------------------------------------------------------------------------- 1 | #You can use this file to setup any additional environment variables used by anyclass.sh 2 | 3 | #Extra entries can be added to the classpath 4 | #CLASSPATH= 5 | 6 | #For example, to use Hadoop support in Terrier, you need to add 7 | #the Hadoop conf/ folder to the classpath 8 | #CLASSPATH=/path/to/hadoop/conf 9 | 10 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/structures/TestLZ4MetaIndex.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures; 2 | import org.terrier.structures.indexing.*; 3 | 4 | public class TestLZ4MetaIndex extends BaseTestCompressedMetaIndex { 5 | 6 | public TestLZ4MetaIndex() { 7 | metaBuilderClass = LZ4MetaIndexBuilder.class; 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/structures/TestZstdMetaIndex.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures; 2 | import org.terrier.structures.indexing.*; 3 | 4 | public class TestZstdMetaIndex extends BaseTestCompressedMetaIndex { 5 | 6 | public TestZstdMetaIndex() { 7 | metaBuilderClass = ZstdMetaIndexBuilder.class; 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /modules/index-api/src/main/java/org/terrier/structures/package-info.java: -------------------------------------------------------------------------------- 1 | /** Provides the interfaces for the index data structures used 2 | for retrieval with the Terrier platform. 3 |

4 | These include the APIs of the inverted index, the 5 | direct index, the lexicon and the document index, as well as the Posting objects

6 | **/ 7 | package org.terrier.structures; -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/structures/TestCompressingMetaIndex.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures; 2 | import org.terrier.structures.indexing.*; 3 | 4 | public class TestCompressingMetaIndex extends BaseTestCompressedMetaIndex { 5 | 6 | public TestCompressingMetaIndex() { 7 | metaBuilderClass = CompressingMetaIndexBuilder.class; 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/structures/TestUncompressingMetaIndex.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures; 2 | import org.terrier.structures.indexing.*; 3 | 4 | public class TestUncompressingMetaIndex extends BaseTestCompressedMetaIndex { 5 | 6 | public TestUncompressingMetaIndex() { 7 | metaBuilderClass = UncompressedMetaIndexBuilder.class; 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/parser/.gitignore: -------------------------------------------------------------------------------- 1 | /MainTokenTypes.java 2 | /MainTokenTypes.txt 3 | /NumbersTokenTypes.java 4 | /NumbersTokenTypes.txt 5 | /TerrierFloatLexer.java 6 | /TerrierFloatLexer.smap 7 | /TerrierLexer.java 8 | /TerrierLexer.smap 9 | /TerrierQueryParser.java 10 | /TerrierQueryParser.smap 11 | /TerrierQueryParserTokenTypes.java 12 | /TerrierQueryParserTokenTypes.txt 13 | -------------------------------------------------------------------------------- /share/tests/simplefilecollection/document.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. 5 | 6 | 7 | At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. 8 | -------------------------------------------------------------------------------- /modules/core/src/main/resources/META-INF/services/org.terrier.applications.CLITool: -------------------------------------------------------------------------------- 1 | org.terrier.applications.CLITool$HelpCLITool 2 | org.terrier.applications.CLITool$HelpAliasCLITool 3 | org.terrier.applications.InteractiveQuerying$Command 4 | org.terrier.applications.ShowDocumentCommand 5 | org.terrier.structures.IndexStatsCommand 6 | org.terrier.structures.IndexUtil$Command 7 | org.terrier.utility.SimpleJettyHTTPServer$Command 8 | -------------------------------------------------------------------------------- /src/webapps/wt2g/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Arial, Helvetica, sans-serif; 3 | text-align: center; 4 | } 5 | 6 | #poweredby a { 7 | margin: 30px; 8 | font-size:75%; 9 | text-decoration:none; 10 | } 11 | 12 | #queryform { 13 | margin: 30px; 14 | } 15 | 16 | #summary { 17 | margin: 30px; 18 | background-color: #2a5a8a; 19 | } 20 | 21 | #pages { 22 | margin: 30px 23 | } 24 | 25 | 26 | #results { 27 | margin: 30px 28 | } 29 | 30 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/MQTRewritingProcess.java: -------------------------------------------------------------------------------- 1 | package org.terrier.querying; 2 | 3 | import java.io.IOException; 4 | 5 | import org.terrier.matching.MatchingQueryTerms; 6 | import org.terrier.structures.Index; 7 | 8 | public interface MQTRewritingProcess extends Process { 9 | 10 | public boolean expandQuery(MatchingQueryTerms mqt, Request rq) throws IOException; 11 | default public void configureIndex(Index index){} 12 | 13 | } 14 | -------------------------------------------------------------------------------- /doc/contacts.md: -------------------------------------------------------------------------------- 1 | Terrier Contacts 2 | ================ 3 | 4 | If you have any questions about using or extending Terrier, please use the Terrier [discussion forum](http://terrier.org/forum/). 5 | 6 | ------------------------------------------------------------------------ 7 | 8 | > Webpage: 9 | > Contact: [School of Computing Science](http://www.dcs.gla.ac.uk/) 10 | > Copyright (C) 2004-2020 [University of Glasgow](http://www.gla.ac.uk/). All Rights Reserved. 11 | -------------------------------------------------------------------------------- /etc/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/models/Null.java: -------------------------------------------------------------------------------- 1 | package org.terrier.matching.models; 2 | 3 | /** A weighting model that returns 0 for each match. */ 4 | public class Null extends WeightingModel { 5 | private static final long serialVersionUID = 1L; 6 | 7 | @Override 8 | public String getInfo() { 9 | return this.getClass().getSimpleName(); 10 | } 11 | 12 | @Override 13 | public double score(double tf, double docLength) { 14 | return keyFrequency * 0d; 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/matching/TestDAATFullMatching.java: -------------------------------------------------------------------------------- 1 | package org.terrier.matching; 2 | import org.terrier.structures.Index; 3 | public class TestDAATFullMatching extends TestMatching 4 | { 5 | @Override 6 | protected Matching makeMatching(Index i) 7 | { 8 | return new org.terrier.matching.daat.Full(i); 9 | } 10 | 11 | @Override 12 | protected Class getMatchingClass() { 13 | return org.terrier.matching.daat.Full.class; 14 | } 15 | } -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/matching/TestTAATFullMatching.java: -------------------------------------------------------------------------------- 1 | package org.terrier.matching; 2 | import org.terrier.structures.Index; 3 | public class TestTAATFullMatching extends TestMatching 4 | { 5 | @Override 6 | protected Matching makeMatching(Index i) 7 | { 8 | return new org.terrier.matching.taat.Full(i); 9 | } 10 | 11 | @Override 12 | protected Class getMatchingClass() { 13 | return org.terrier.matching.taat.Full.class; 14 | } 15 | } -------------------------------------------------------------------------------- /modules/tests/src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /licenses/apache/APL-jarfiles.txt: -------------------------------------------------------------------------------- 1 | The following jar files are covered by the Apache License 2.0: 2 | * commons-el.jar 3 | * commons-lang-2.3.jar 4 | * jasper-compiler.jar 5 | * jasper-runtime.jar 6 | * hadoop18.2-joined.jar 7 | 8 | Hadoop18.2-joined.jar also contains several jar files included in the hadoop 9 | release: 10 | commons-cli-2.0-SNAPSHOT.jar 11 | commons-httpclient-3.0.1.jar 12 | commons-logging-api-1.0.4.jar 13 | commons-codec-1.3.jar 14 | commons-logging-1.0.4.jar 15 | commons-net-1.4.1.jar 16 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/models/CoordinateMatch.java: -------------------------------------------------------------------------------- 1 | package org.terrier.matching.models; 2 | 3 | /** A weighting model that returns 1 for each match. */ 4 | public class CoordinateMatch extends WeightingModel { 5 | private static final long serialVersionUID = 1L; 6 | 7 | @Override 8 | public String getInfo() { 9 | return this.getClass().getSimpleName(); 10 | } 11 | 12 | @Override 13 | public double score(double tf, double docLength) { 14 | return keyFrequency * 1d; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /modules/retrieval-api/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | terrier-platform 6 | org.terrier 7 | 5.11 8 | ../../ 9 | 10 | 11 | terrier-retrieval-api 12 | 13 | 14 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/resources/META-INF/services/org.terrier.applications.CLITool: -------------------------------------------------------------------------------- 1 | org.terrier.applications.CLITool$HelpCLITool 2 | org.terrier.applications.InteractiveQuerying$Command 3 | org.terrier.applications.BatchIndexing$Command 4 | org.terrier.structures.IndexUtil$Command 5 | org.terrier.applications.batchquerying.TRECQuerying$Command 6 | org.terrier.evaluation.TrecEvalEvaluation$Command 7 | org.terrier.utility.SimpleJettyHTTPServer$Command 8 | org.terrier.structures.indexing.singlepass.Inverted2DirectCommand 9 | org.terrier.structures.merging.StructureMerger$Command 10 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/realtime/multi/TestMultiLexicon.java: -------------------------------------------------------------------------------- 1 | package org.terrier.realtime.multi; 2 | 3 | import org.junit.Test; 4 | import static org.junit.Assert.*; 5 | 6 | public class TestMultiLexicon { 7 | 8 | @Test public void testHashcode() 9 | { 10 | check("a"); 11 | check("abaca"); 12 | check("b"); 13 | check("various"); 14 | 15 | } 16 | 17 | void check(String t) { 18 | int hashcode = MultiLexicon.hashCode(t); 19 | //System.out.println(hashcode); 20 | assertEquals(t.charAt(0), MultiLexicon.hashCodePrefix(hashcode)); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/webapps/simple/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: Arial, Helvetica, sans-serif; 3 | text-align: center; 4 | } 5 | 6 | #poweredby a { 7 | margin: 30px; 8 | font-size:75%; 9 | text-decoration:none; 10 | } 11 | 12 | #queryform { 13 | margin: 30px; 14 | } 15 | 16 | #summary { 17 | margin: 30px; 18 | background-color: #CCCCCC; 19 | } 20 | 21 | #pages { 22 | margin: 30px 23 | } 24 | 25 | 26 | #results { 27 | margin: 30px 28 | } 29 | 30 | #results span { 31 | display:block; 32 | } 33 | 34 | #results span.results_score { 35 | //display:none; 36 | } 37 | #results span.results_rank { 38 | display:none; 39 | } -------------------------------------------------------------------------------- /modules/concurrent/README.md: -------------------------------------------------------------------------------- 1 | # terrier-concurrent 2 | 3 | This package is intended to allow Terrier's /standard/ index structures to be used by multiple retrieval threads concurrently. Use to speed up batch retrieval can be achieved by replacing TRECQuerying with ParallelTRECQuerying in any client code. 4 | 5 | An index has its data structures made re-entrant (i.e. thread safe) by use of `ConcurrentIndexUtils.makeConcurrentForRetrieval(Index)`. 6 | 7 | An indexref prefixed as `concurrent:` will be filtered through ConcurrentIndexUtils when loaded. E.g. 8 | 9 | IndexRef ref = IndexRef.of("concurrent:/path/to/my/index/data.properties") 10 | 11 | 12 | -------------------------------------------------------------------------------- /licenses/textmining/License.txt: -------------------------------------------------------------------------------- 1 | /* Copyright 2004 Ryan Ackley 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ -------------------------------------------------------------------------------- /modules/core/src/main/templates/org/terrier/Version.java: -------------------------------------------------------------------------------- 1 | package org.terrier; 2 | /** 3 | * Maven automatically populates this with the version number, etc. 4 | * See http://stackoverflow.com/questions/2469922/generate-a-version-java-file-in-maven 5 | * @since 4.1. 6 | */ 7 | public class Version { 8 | 9 | public static final String VERSION = "${project.version}"; 10 | public static final String BUILD_DATE = "${timestamp}"; 11 | public static final String BUILD_USER = "${user.name}"; 12 | 13 | public static int getMajorVersion() 14 | { 15 | return Integer.parseInt(VERSION.split("\\.", 2)[0]); 16 | } 17 | 18 | public static void main(String[] args) { 19 | System.out.println(VERSION); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /share/tests/tweets/utf8-tweet.json: -------------------------------------------------------------------------------- 1 | {"id":28965151080189952,"id_str":"28965151080189952","created_at":"Sun Jan 23 00:00:04 +0000 2011","text":"フォロー返しに興味が無い鳩なのですが、「一方的にフォローされている」という状況だけに注目しますと、「こちらから何のアクションもしていないのに鳩の事を憎からず思ってくれた人」だと舞い上がって。もうこのまま片思われという宝物のままにしておこうとまで思ったしそうしています。","truncated":false,"retweet_count":0,"in_reply_to_screen_name":null,"in_reply_to_user_id_str":null,"in_reply_to_user_id":null,"in_reply_to_status_id_str":null,"in_reply_to_status_id":null,"contributors":null,"user":{"screen_name":"Hato_ouma_bot","protected":false,"lang":"ja","name":"レプリカント鳩","profile_image_url":"http://a0.twimg.com/profile_images/1078046243/0925_wt03pcwestup_w3a379ouma_0001_bigger.jpg"},"entities":{"hashtags":[],"urls":[],"user_mentions":[]}} 2 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.8 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.8 13 | -------------------------------------------------------------------------------- /modules/tests/share/tests/tweets/utf8-tweet.json: -------------------------------------------------------------------------------- 1 | {"id":28965151080189952,"id_str":"28965151080189952","created_at":"Sun Jan 23 00:00:04 +0000 2011","text":"フォロー返しに興味が無い鳩なのですが、「一方的にフォローされている」という状況だけに注目しますと、「こちらから何のアクションもしていないのに鳩の事を憎からず思ってくれた人」だと舞い上がって。もうこのまま片思われという宝物のままにしておこうとまで思ったしそうしています。","truncated":false,"retweet_count":0,"in_reply_to_screen_name":null,"in_reply_to_user_id_str":null,"in_reply_to_user_id":null,"in_reply_to_status_id_str":null,"in_reply_to_status_id":null,"contributors":null,"user":{"screen_name":"Hato_ouma_bot","protected":false,"lang":"ja","name":"レプリカント鳩","profile_image_url":"http://a0.twimg.com/profile_images/1078046243/0925_wt03pcwestup_w3a379ouma_0001_bigger.jpg"},"entities":{"hashtags":[],"urls":[],"user_mentions":[]}} 2 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | javasphinx-apidoc -o javadoc/ ../modules/*/src/main/java 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/multi/BlockMultiIterablePosting.java: -------------------------------------------------------------------------------- 1 | package org.terrier.realtime.multi; 2 | 3 | import org.terrier.structures.postings.BlockPosting; 4 | import org.terrier.structures.postings.IterablePosting; 5 | 6 | public class BlockMultiIterablePosting extends MultiIterablePosting implements BlockPosting { 7 | 8 | BlockPosting[] bps; 9 | public BlockMultiIterablePosting(IterablePosting[] constituentIPs, int[] offsets) { 10 | super(constituentIPs, offsets); 11 | bps = new BlockPosting[constituentIPs.length]; 12 | for(int i=0;i 2 | 3 | 4 | 5 | Terrier Search 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 | 16 |
17 | 18 |
19 | 20 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/webapps/wt2g/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Terrier Search 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 | 16 |
17 | 18 |
19 | 20 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/applications/TestDirectQuerySource.java: -------------------------------------------------------------------------------- 1 | package org.terrier.applications; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Test; 6 | import org.terrier.applications.batchquerying.QuerySource; 7 | import org.terrier.applications.batchquerying.QuerySourceUtils; 8 | 9 | public class TestDirectQuerySource { 10 | 11 | @Test public void theTest() { 12 | QuerySource qs = QuerySourceUtils.create( 13 | new String[]{"q1", "q2"}, 14 | new String[]{"one", "one two"}, true); 15 | assertNotNull(qs); 16 | assertTrue(qs.hasNext()); 17 | assertEquals("one", qs.next()); 18 | assertEquals("q1", qs.getQueryId()); 19 | assertTrue(qs.hasNext()); 20 | assertEquals("one two", qs.next()); 21 | assertEquals("q2", qs.getQueryId()); 22 | assertFalse(qs.hasNext()); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /licenses/trove/README-license.txt: -------------------------------------------------------------------------------- 1 | The Trove library is licensed under the Lesser GNU Public License, 2 | which is included with the distribution in a file called LICENSE.txt. 3 | 4 | The PrimeFinder and HashFunctions classes in Trove are subject to the 5 | following license restrictions: 6 | 7 | Copyright (c) 1999 CERN - European Organization for Nuclear Research. 8 | 9 | Permission to use, copy, modify, distribute and sell this software and 10 | its documentation for any purpose is hereby granted without fee, 11 | provided that the above copyright notice appear in all copies and that 12 | both that copyright notice and this permission notice appear in 13 | supporting documentation. CERN makes no representations about the 14 | suitability of this software for any purpose. It is provided "as is" 15 | without expressed or implied warranty. 16 | 17 | 18 | -------------------------------------------------------------------------------- /modules/learning/src/main/java/org/terrier/matching/FilterMatching.java: -------------------------------------------------------------------------------- 1 | package org.terrier.matching; 2 | 3 | import java.io.IOException; 4 | 5 | public abstract class FilterMatching implements Matching { 6 | 7 | protected Matching parent; 8 | FilterMatching(Matching _parent) 9 | { 10 | this.parent = _parent; 11 | } 12 | 13 | @Override 14 | public ResultSet match(String queryNumber, MatchingQueryTerms queryTerms) 15 | throws IOException 16 | { 17 | return doMatch(queryNumber, queryTerms, parent.match(queryNumber, queryTerms), true); 18 | } 19 | 20 | public ResultSet doMatch(String queryNumber, MatchingQueryTerms queryTerms, ResultSet match) throws IOException { 21 | return this.doMatch(queryNumber, queryTerms, match, true); 22 | } 23 | 24 | public abstract ResultSet doMatch(String queryNumber, MatchingQueryTerms queryTerms, ResultSet match, boolean keepInputScores) throws IOException; 25 | } 26 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/utility/TestClassNameParser.java: -------------------------------------------------------------------------------- 1 | package org.terrier.utility; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.util.List; 6 | 7 | import org.junit.Test; 8 | import org.terrier.matching.dsms.DocumentScoreModifier; 9 | 10 | public class TestClassNameParser { 11 | 12 | List get(String names) { 13 | List rtr = null; 14 | try{ 15 | rtr = new ClassNameParser( 16 | names, DocumentScoreModifier.class.getPackage().getName(), DocumentScoreModifier.class) 17 | .parseAll(); 18 | }catch (Exception e) { 19 | throw new RuntimeException(e); 20 | } 21 | return rtr; 22 | } 23 | 24 | @Test 25 | public void test() { 26 | assertEquals(1, get("DFRDependenceScoreModifier").size()); 27 | assertEquals(2, get("MRFDependenceScoreModifier,DFRDependenceScoreModifier").size()); 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /modules/logging/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | terrier-platform 6 | org.terrier 7 | 5.11 8 | ../../ 9 | 10 | 11 | terrier-logging 12 | 13 | 14 | 15 | ch.qos.logback 16 | logback-classic 17 | ${logback.version} 18 | 19 | 20 | 21 | ch.qos.logback 22 | logback-core 23 | ${logback.version} 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /modules/realtime/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | terrier-platform 7 | org.terrier 8 | 5.11 9 | ../../ 10 | 11 | 12 | terrier-realtime 13 | 14 | 15 | 16 | org.terrier 17 | terrier-core 18 | ${project.version} 19 | 20 | 21 | 22 | org.terrier 23 | terrier-batch-indexers 24 | ${project.version} 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /licenses/jforests/CitationPolicy.txt: -------------------------------------------------------------------------------- 1 | Citation Policy 2 | If you use jforests for a research purpose, please use the following citation: 3 | 4 | Y. Ganjisaffar, R. Caruana, C. Lopes, Bagging Gradient-Boosted Trees for High Precision, Low Variance Ranking Models, in SIGIR 2011, Beijing, China. 5 | 6 | Bibtex: 7 | 8 | @inproceedings{Ganji:2011:SIGIR, 9 | author = {Yasser Ganjisaffar and Rich Caruana and Cristina Lopes}, 10 | title = {Bagging Gradient-Boosted Trees for High Precision, Low Variance Ranking Models}, 11 | booktitle = {Proceedings of the 34th international ACM SIGIR conference on Research and development in Information}, 12 | series = {SIGIR '11}, 13 | year = {2011}, 14 | isbn = {978-1-4503-0757-4}, 15 | location = {Beijing, China}, 16 | pages = {85--94}, 17 | numpages = {10}, 18 | doi = {http://doi.acm.org/10.1145/2009916.2009932}, 19 | acmid = {2009932}, 20 | publisher = {ACM}, 21 | address = {New York, NY, USA}, 22 | } -------------------------------------------------------------------------------- /.github/workflows/build-unit-tests.yml: -------------------------------------------------------------------------------- 1 | name: Terrier Maven CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | java: [11, 21] 10 | os: ['ubuntu-latest', 'macOs-latest', 'windows-latest'] 11 | architecture: ['x64'] 12 | 13 | runs-on: ${{ matrix.os }} 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Set up JDK 19 | uses: actions/setup-java@v3 20 | with: 21 | java-version: ${{ matrix.java }} 22 | architecture: ${{ matrix.architecture }} 23 | distribution: 'zulu' 24 | 25 | - name: Build with Maven 26 | run: mvn -B package --file pom.xml 27 | 28 | - name: Run on Windows 29 | if: matrix.os == 'windows-latest' 30 | run: | 31 | bin\terrier.bat 32 | 33 | - name: Run on Linux/MacOs 34 | if: matrix.os == 'ubuntu-latest' || matrix.os == 'macOs-latest' 35 | run: | 36 | bin/terrier 37 | -------------------------------------------------------------------------------- /modules/assemblies/src/assembly/module-jar-with-dependencies.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | jar-with-dependencies 6 | 7 | jar 8 | 9 | false 10 | 11 | 12 | / 13 | true 14 | true 15 | runtime 16 | 17 | 18 | 19 | 20 | 21 | metaInf-services 22 | 23 | 24 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/multi/BlockMultiDirectIterablePosting.java: -------------------------------------------------------------------------------- 1 | package org.terrier.realtime.multi; 2 | 3 | import org.terrier.structures.postings.BlockPosting; 4 | import org.terrier.structures.postings.BlockPostingImpl; 5 | import org.terrier.structures.postings.IterablePosting; 6 | import org.terrier.structures.postings.WritablePosting; 7 | 8 | public class BlockMultiDirectIterablePosting extends 9 | MultiDirectIterablePosting 10 | implements BlockPosting { 11 | 12 | BlockPosting bp; 13 | public BlockMultiDirectIterablePosting(IterablePosting posting, 14 | MultiLexicon lex, int shard) { 15 | super(posting, lex, shard); 16 | bp = (BlockPosting)posting; 17 | } 18 | 19 | @Override 20 | public int[] getPositions() { 21 | return bp.getPositions(); 22 | } 23 | 24 | @Override 25 | public WritablePosting asWritablePosting() { 26 | return new BlockPostingImpl(this.getId(), this.getFrequency(), this.getPositions()); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /modules/batch-retrieval/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | terrier-platform 6 | org.terrier 7 | 5.11 8 | ../../ 9 | 10 | 11 | terrier-batch-retrieval 12 | 13 | 14 | 15 | org.terrier 16 | terrier-core 17 | ${project.version} 18 | 19 | 20 | 21 | uk.ac.gla.dcs.terrierteam 22 | jtreceval 23 | 0.0.5 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/tartarus/snowball/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** Snowball stemmer API */ 19 | package org.terrier.tartarus.snowball; 20 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/structures/collections/TestMergedIterator.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures.collections; 2 | import static org.junit.Assert.*; 3 | 4 | import java.util.Iterator; 5 | 6 | import org.junit.Test; 7 | 8 | import com.google.common.collect.Iterators; 9 | import com.google.common.collect.Lists; 10 | import com.google.common.collect.Ordering; 11 | 12 | public class TestMergedIterator { 13 | @Test 14 | public void merge() { 15 | Iterator stream1 = Iterators.forArray(4, 6); 16 | Iterator stream3 = Iterators.forArray(1, 3, 5, 7, 9, 11); 17 | Iterator stream2 = Iterators.forArray(2, 8, 10); 18 | 19 | Iterator merged = IteratorUtils.merge(Ordering.natural(), stream1, stream2, stream3); 20 | 21 | var theList = Lists.newArrayList(merged); 22 | assertEquals(11, theList.size()); 23 | assertEquals(1, theList.get(0).intValue()); 24 | assertEquals(11, theList.get(theList.size()-1).intValue()); 25 | } 26 | } -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/tartarus/snowball/ext/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** Autogenerated snowball stemmer implementations. */ 19 | package org.terrier.tartarus.snowball.ext; 20 | -------------------------------------------------------------------------------- /bin/fq.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM Terrier - Terabyte Retriever 3 | REM Webpage: http://ir.dcs.gla.ac.uk/terrier 4 | REM Contact: terrier@dcs.gla.ac.uk 5 | REM 6 | REM The contents of this file are subject to the Mozilla Public 7 | REM License Version 1.1 (the "License"); you may not use this file 8 | REM except in compliance with the License. You may obtain a copy of 9 | REM the License at http://www.mozilla.org/MPL/ 10 | REM 11 | REM Software distributed under the License is distributed on an "AS 12 | REM IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 13 | REM implied. See the License for the specific language governing 14 | REM rights and limitations under the License. 15 | REM 16 | REM The Original Code is fq.bat 17 | REM 18 | REM The Initial Developer of the Original Code is the University of Glasgow. 19 | REM Portions created by The Initial Developer are Copyright (C) 2004-2008 20 | REM the initial Developer. All Rights Reserved. 21 | REM 22 | REM Contributor(s): 23 | REM Craig Macdonald (original author) 24 | REM 25 | SET FQ=%~f1 26 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/UncompressedMetaIndex.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures; 2 | import java.io.IOException; 3 | public class UncompressedMetaIndex extends CompressingMetaIndex { 4 | 5 | public static class InputStream extends CompressingMetaIndex.InputStream { 6 | 7 | public InputStream(IndexOnDisk _index, String structureName) throws IOException 8 | { 9 | super(_index, structureName); 10 | } 11 | 12 | public InputStream(IndexOnDisk _index, String _structureName, int _startingId, int _endId) throws IOException 13 | { 14 | super(_index, _structureName, _startingId, _endId); 15 | } 16 | 17 | protected byte[] decode(byte[] input) throws IOException { 18 | return input; 19 | } 20 | } 21 | 22 | public UncompressedMetaIndex(IndexOnDisk index, String structureName) 23 | throws IOException 24 | { 25 | super(index, structureName); 26 | } 27 | 28 | 29 | protected byte[] decode(byte[] input) throws IOException { 30 | return input; 31 | } 32 | } -------------------------------------------------------------------------------- /modules/rest-client/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | terrier-platform 7 | org.terrier 8 | 5.11 9 | ../../ 10 | 11 | 12 | terrier-rest-client 13 | 14 | 15 | 16 | org.slf4j 17 | slf4j-api 18 | ${slf4j.version} 19 | 20 | 21 | 22 | org.terrier 23 | terrier-retrieval-api 24 | ${project.version} 25 | 26 | 27 | 28 | org.apache.httpcomponents 29 | httpclient 30 | 4.5.13 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /modules/rest-server/src/main/java/org/terrier/rest/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is package-info.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | /** Provides a HTTP REST service for providing results */ 27 | package org.terrier.rest; 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/postings/bit/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.postings.bit package 5 | 24 | 25 | 26 |

Provides bit-level IterablePosting implementations.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/restructure/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is package-info.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | /** Contains legacy index data structure classes */ 27 | package org.terrier.structures.restructure; 28 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/restructure/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime.restructure package 5 | 24 | 25 | 26 |

Provides methods to alter the structure of a real-time index.

27 | 28 | -------------------------------------------------------------------------------- /bin/lcp.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM Terrier - Terabyte Retriever 3 | REM Webpage: http://terrier.org/ 4 | REM Contact: terrier@dcs.gla.ac.uk 5 | REM 6 | REM The contents of this file are subject to the Mozilla Public 7 | REM License Version 1.1 (the "License"); you may not use this file 8 | REM except in compliance with the License. You may obtain a copy of 9 | REM the License at http://www.mozilla.org/MPL/ 10 | REM 11 | REM Software distributed under the License is distributed on an "AS 12 | REM IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 13 | REM implied. See the License for the specific language governing 14 | REM rights and limitations under the License. 15 | REM 16 | REM The Original Code is lcp.bat 17 | REM 18 | REM The Initial Developer of the Original Code is the University of Glasgow. 19 | REM Portions created by The Initial Developer are Copyright (C) 2004-2011 20 | REM the initial Developer. All Rights Reserved. 21 | REM 22 | REM Contributor(s): 23 | REM Craig Macdonald (original author) 24 | REM 25 | rem echo Adding %1 to localclasspath 26 | SET LOCALCLASSPATH=%1;%LOCALCLASSPATH% 27 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/memory/fields/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime.memory.fields package 5 | 24 | 25 | 26 |

Provides MemoryIndex structures that support field search.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/models/dependence/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is package-info.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | /** Weighting models for term dependence models */ 27 | package org.terrier.matching.models.dependence; 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/bit/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.bit package 5 | 24 | 25 | 26 |

Provides on-disk index structures that support the older bit-level compression scheme.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/utility/io/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.utility.io package 5 | 24 | 25 | 26 | 27 |

This package includes classes that are used for various IO duties within Terrier.

28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/java/org/terrier/structures/cache/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.cache package 5 | 24 | 25 | 26 |

Provides caching of search results when performing batch querying operations.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/utility/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.utility package 5 | 24 | 25 | 26 | 27 |

This package includes classes that are used in various part of 28 | the Terrier Platform.

29 | 30 | 31 | -------------------------------------------------------------------------------- /modules/core/src/main/java/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Default package 5 | 24 | 25 | 26 |

Provides application-level code that use the Terrier platform to 27 | perform indexing and retrieval from either standard test collections

28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/models/queryexpansion/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.matching.models.queryexpansion package 5 | 24 | 25 | 26 |

Provides the classes that implement various query expansion models.

27 | 28 | 29 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/indexing/classical/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.indexing.classical package 5 | 24 | 25 | 26 |

Provides functionality for creating on-disk indices via indexer classes.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/statistics/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.sorting package 5 | 24 | 25 | 26 |

Provides the implementations of various statistical routines. In particular, 27 | the Gamma function is implemented.

28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/parser/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.querying.parser package 5 | 24 | 25 | 26 |

Provides the parser specification and the classes that implement 27 | the query language of the Terrier platform.

28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/learning/src/main/java/org/terrier/learning/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.learning package 5 | 24 | 25 | 26 |

Provides interfaces for Terrier 'Fat' ResultSets that contain features and decorators that add new features to those result sets.

27 | 28 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime package 5 | 24 | 25 | 26 |

Provides index structures that support updating and real-time retrieval. These are MemoryIndex, IncrementalIndex and MultiIndex.

27 | 28 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/indexing/UncompressedMetaIndexBuilder.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures.indexing; 2 | import org.terrier.structures.UncompressedMetaIndex; 3 | import org.terrier.structures.IndexOnDisk; 4 | import java.io.IOException; 5 | /** 6 | * Writes all metadata as uncompressed 7 | * @since 5.5 8 | */ 9 | public class UncompressedMetaIndexBuilder extends BaseMetaIndexBuilder { 10 | 11 | public UncompressedMetaIndexBuilder(IndexOnDisk _index, String[] _keyNames, int[] _valueLens, String[] _reverseKeys) 12 | { 13 | this(_index, "meta", _keyNames, _valueLens, _reverseKeys); 14 | } 15 | 16 | public UncompressedMetaIndexBuilder(IndexOnDisk _index, String _structureName, String[] _keyNames, int[] _valueLens, String[] _reverseKeys) 17 | { 18 | super(_index, _structureName, _keyNames, _valueLens, _reverseKeys); 19 | this.structureClass = UncompressedMetaIndex.class; 20 | this.structureInputStreamClass = UncompressedMetaIndex.InputStream.class; 21 | } 22 | 23 | protected int writeData(byte[] data) throws IOException { 24 | dataOutput.write(data); 25 | return data.length; 26 | } 27 | 28 | } -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/java/org/terrier/structures/outputformat/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.outputformat package 5 | 24 | 25 | 26 |

Provides functionality for writing Terrier ResultSets to disk as .res files, e.g. in TREC run formats.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/models/DFRNorm2BaseModel.java: -------------------------------------------------------------------------------- 1 | 2 | package org.terrier.matching.models; 3 | 4 | /** Base class for all DFR models what use Normalisation 2. To control the weight of the c length normalisation parameter, 5 | * set the dfr.c control in the SearchRequest object. 6 | */ 7 | public abstract class DFRNorm2BaseModel extends WeightingModel { 8 | /** The parameter c. This defaults to 1.0, but should be set using in the constructor 9 | * of each child weighting model to the sensible default for that weighting model. */ 10 | protected double c = 1.0d; 11 | 12 | @Override 13 | public void prepare() { 14 | if (rq != null) { 15 | if (rq.hasControl("dfr.c")) { 16 | c = Double.parseDouble(rq.getControl("dfr.c")); 17 | } 18 | } 19 | super.prepare(); 20 | } 21 | 22 | /** 23 | * Sets the c value 24 | * @param _c the term frequency normalisation parameter value. 25 | */ 26 | @Deprecated 27 | public void setParameter(double _c) { 28 | this.c = _c; 29 | } 30 | 31 | /** 32 | * Returns the parameter as set by setParameter() 33 | */ 34 | @Deprecated 35 | public double getParameter() { 36 | return this.c; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/sorting/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.sorting package 5 | 24 | 25 | 26 |

Provides the classes that implement the sorting of various arrays 27 | for the Terrier platform.

28 |

Used by indexing and retrieval

29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/memory/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime.memory package 5 | 24 | 25 | 26 |

Provides MemoryIndex structures. Memory indices are wholly stored in memory, are updatable and can (optionally) be written to disk.

27 | 28 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/applications/TestCLITool.java: -------------------------------------------------------------------------------- 1 | package org.terrier.applications; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.apache.commons.cli.CommandLine; 6 | import org.junit.Test; 7 | import org.terrier.applications.CLITool.CLIParsedCLITool; 8 | import org.terrier.utility.ApplicationSetup; 9 | 10 | public class TestCLITool { 11 | 12 | public static class testTool extends CLIParsedCLITool 13 | { 14 | public testTool(){} 15 | 16 | @Override public int run(CommandLine line) throws Exception { 17 | return 0; 18 | } 19 | } 20 | 21 | @Test public void testOneProperty() throws Exception { 22 | ApplicationSetup.clearAllProperties(); 23 | CLITool.run(testTool.class,new String[]{"-Dfoo=bar"}); 24 | assertEquals("bar", ApplicationSetup.getProperty("foo", null)); 25 | 26 | } 27 | 28 | @Test public void testTwoProperties() throws Exception { 29 | ApplicationSetup.clearAllProperties(); 30 | CLITool.run(testTool.class,new String[]{"-Dfoo=bar", "-Dfoo2=bar2"}); 31 | assertEquals("bar", ApplicationSetup.getProperty("foo", null)); 32 | assertEquals("bar2", ApplicationSetup.getProperty("foo2", null)); 33 | 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/java/org/terrier/applications/batchquerying/QuerySourceUtils.java: -------------------------------------------------------------------------------- 1 | package org.terrier.applications.batchquerying; 2 | 3 | import org.terrier.indexing.tokenisation.Tokeniser; 4 | 5 | public class QuerySourceUtils { 6 | 7 | public static QuerySource create(String[] qids, String[] qs, boolean tokenise) { 8 | return create(qids, qs, tokenise ? Tokeniser.getTokeniser() : null); 9 | } 10 | 11 | public static QuerySource create(String[] qids, String[] qs, Tokeniser tok) { 12 | return new QuerySource() { 13 | 14 | int i=-1; 15 | @Override 16 | public boolean hasNext() { 17 | return i < qids.length -1; 18 | } 19 | 20 | @Override 21 | public String next() { 22 | i++; 23 | String q = qs[i]; 24 | if (tok != null) 25 | { 26 | q = String.join(" ", tok.getTokens(q)); 27 | } 28 | return q; 29 | } 30 | 31 | @Override 32 | public String getQueryId() { 33 | return qids[i]; 34 | } 35 | 36 | @Override 37 | public void reset() { 38 | i = -1; 39 | } 40 | 41 | @Override 42 | public String[] getInfo() { 43 | return new String[]{"DirectQuerySource"}; 44 | } 45 | 46 | }; 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/java/org/terrier/applications/batchquerying/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.applications.batchquerying package 5 | 24 | 25 | 26 |

Contains application-level programs for performing batch-based query operations, such as generating runs for TREC.

27 | 28 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/indexing/singlepass/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.indexing.singlepass package 5 | 24 | 25 | 26 |

Provides implementation of the structures needed for performing a single 27 | pass indexing

28 | 29 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/summarisation/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.querying.parser package 5 | 24 | 25 | 26 |

Package for summarisation algorithms. Classes in this package are used to 27 | generate short summaries of documents, e.g. for snippet generation.

28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/collections/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.collections package 5 | 24 | 25 | 26 |

Provides data structures various data structures. These match are implementations compatible with 27 | Java Collections. Most are disk-backed structures.

28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/utility/restructure/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.utility.restructure package 5 | 24 | 25 | 26 |

Provides functionality to convert legacy Terrier indices into current indices. Currently conversion from Terrier 3.x to Terrier 4.x indices is supported.

27 | 28 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/matching/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime.matching package 5 | 24 | 25 | 26 |

Provides additional matching implementations specifically for real-time index structures. For instance, matching over a subset of the indices within a MultiIndex.

27 | 28 | -------------------------------------------------------------------------------- /modules/concurrent/src/test/java/org/terrier/structures/concurrent/TestShakParallelTRECQueryingMem.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures.concurrent; 2 | import org.terrier.structures.*; 3 | import org.terrier.tests.BatchEndToEndTest; 4 | import org.terrier.tests.BatchEndToEndTest.BatchEndToEndTestEventHooks; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | public class TestShakParallelTRECQueryingMem extends TestShakParallelTRECQuerying { 8 | 9 | static class Hook extends BatchEndToEndTestEventHooks 10 | { 11 | public void finishedIndexing(BatchEndToEndTest test) throws Exception 12 | { 13 | IndexOnDisk iod = IndexOnDisk.createIndex(); 14 | iod.setIndexProperty("index.inverted.data-source", "fileinmem"); 15 | iod.flush(); 16 | iod.close(); 17 | } 18 | 19 | public void checkIndex(BatchEndToEndTest test, Index index) throws Exception 20 | { 21 | IndexOnDisk iod = (IndexOnDisk) index; 22 | assertEquals("fileinmem", iod.getIndexProperty("index.inverted.data-source", null)); 23 | } 24 | } 25 | 26 | public TestShakParallelTRECQueryingMem() { 27 | super(); 28 | this.testHooks.add(new Hook()); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/multi/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime.multi package 5 | 24 | 25 | 26 |

Provides MultiIndex structures. A MultiIndex is a combination of many smaller indices. Each MultiIndex structure provides an abstraction over the related structure within each sub-index.

27 | 28 | -------------------------------------------------------------------------------- /modules/concurrent/src/test/java/org/terrier/structures/concurrent/TestThreadSafeManager.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures.concurrent; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import org.junit.Test; 6 | import org.terrier.indexing.IndexTestUtils; 7 | import org.terrier.querying.IndexRef; 8 | import org.terrier.querying.Manager; 9 | import org.terrier.querying.ManagerFactory; 10 | import org.terrier.querying.ThreadSafeManager; 11 | import org.terrier.structures.ConcurrentIndexLoader; 12 | import org.terrier.structures.Index; 13 | import org.terrier.structures.IndexFactory; 14 | import org.terrier.tests.ApplicationSetupBasedTest; 15 | 16 | public class TestThreadSafeManager extends ApplicationSetupBasedTest { 17 | 18 | @Test public void testOne() throws Exception 19 | { 20 | Index index = IndexTestUtils.makeIndex(new String[]{"doc1", "doc2"}, new String[]{"the quick fox", "and all that stuff"}); 21 | IndexRef ref = index.getIndexRef(); 22 | assertTrue(IndexFactory.isLoaded(ref)); 23 | System.out.println(ref.toString()); 24 | IndexRef concRef = ConcurrentIndexLoader.makeConcurrent(IndexRef.of(ref.toString())); 25 | Manager m = ManagerFactory.from(concRef); 26 | assertTrue(m instanceof ThreadSafeManager); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.querying package 5 | 24 | 25 | 26 |

Provides the interfaces and classes for the querying 27 | API of the Terrier platform, the controls, post processors 28 | and filters. As an example, query expansion is applied as 29 | a postprocess.

30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/seralization/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.serialization package 5 | 24 | 25 | 26 |

Provides classes for things that can be serializaed to disk. Invariably, these 27 | have a fixed-size, so that random-seeks etc can be utilised on persistent data structures.

28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/batch-indexers/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | terrier-platform 6 | org.terrier 7 | 5.11 8 | ../../ 9 | 10 | 11 | terrier-batch-indexers 12 | 13 | 14 | jitpack.io 15 | https://jitpack.io 16 | 17 | 18 | 19 | 20 | org.terrier 21 | terrier-core 22 | ${project.version} 23 | 24 | 25 | 30 | 31 | 32 | com.github.cmacdonald 33 | memory-measurer 34 | -SNAPSHOT 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/tests/TRECWT2GEndtoEndTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is TRECWT2GEndtoEndTest.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.tests; 28 | 29 | public class TRECWT2GEndtoEndTest extends TRECEndtoEndTest { 30 | 31 | public TRECWT2GEndtoEndTest() { 32 | super("wt2g"); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /modules/learning/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | terrier-platform 6 | org.terrier 7 | 5.11 8 | ../../ 9 | 10 | 11 | terrier-learning 12 | 13 | 14 | 15 | jitpack.io 16 | https://jitpack.io 17 | 18 | 19 | 20 | 21 | 22 | org.terrier 23 | terrier-core 24 | ${project.version} 25 | 26 | 27 | 28 | org.terrier 29 | terrier-batch-retrieval 30 | ${project.version} 31 | 32 | 33 | 34 | com.github.yasserg 35 | jforests 36 | v0.5 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/PostProcess.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is PostProcess.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * Vassilis Plachouras 26 | */ 27 | package org.terrier.querying; 28 | /** This class will be removed in a future version of Terrier */ 29 | @Deprecated 30 | public interface PostProcess extends Process {} -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/fat/TestScoringMatchingWithFat.java: -------------------------------------------------------------------------------- 1 | package org.terrier.fat; 2 | import org.terrier.utility.ApplicationSetup; 3 | import org.terrier.structures.*; 4 | import org.terrier.matching.*; 5 | import org.terrier.matching.daat.Full; 6 | import org.terrier.matching.models.*; 7 | import org.terrier.indexing.IndexTestUtils; 8 | import org.terrier.tests.*; 9 | import org.junit.*; 10 | import static org.junit.Assert.*; 11 | 12 | public class TestScoringMatchingWithFat extends ApplicationSetupBasedTest { 13 | 14 | @Test public void singleDocumentSingleTerm() throws Exception 15 | { 16 | ApplicationSetup.setProperty("termpipelines", ""); 17 | ApplicationSetup.setProperty("ignore.low.idf.terms", "false"); 18 | Index index = IndexTestUtils.makeIndex( 19 | new String[]{"doc1"}, 20 | new String[]{"term"}); 21 | MatchingQueryTerms mqt = new MatchingQueryTerms(); 22 | mqt.setQueryId("test"); 23 | mqt.setTermProperty("term", 1.0d); 24 | mqt.setDefaultTermWeightingModel(new TF_IDF()); 25 | Matching m = new ScoringMatchingWithFat(index, new Full(index), new PL2()); 26 | 27 | ResultSet r1 = m.match("test", mqt); 28 | assertTrue(r1 instanceof FatResultSet); 29 | FatResultSet fr1 = (FatResultSet)r1; 30 | 31 | } 32 | } -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures package 5 | 24 | 25 | 26 |

Provides the classes that implement the data structures used 27 | for retrieval with the Terrier platform.

28 | 29 |

These include the implementations of the inverted index, the 30 | direct index, the lexicon and the document index.

31 | 32 | 33 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/indexing/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.indexing package 5 | 24 | 25 | 26 |

Provides the classes used for creating the data structures of 27 | the Terrier platform.

28 | 29 |

These include the builders of the inverted index, the 30 | direct index, the lexicon and the document index.

31 | 32 | 33 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.matching package 5 | 24 | 25 | 26 |

Provides the classes and interfaces used for matching documents 27 | to queries. It includes the classes that perform the matching by 28 | reading information from the inverted index etc., and the classes 29 | that model the set of retrieved documents.

30 | 31 | 32 | -------------------------------------------------------------------------------- /doc/terrier_desktop.md: -------------------------------------------------------------------------------- 1 | 2 | Introducing the Desktop Terrier example search application: 3 | ============================================= 4 | 5 | Desktop Terrier is an example application we have provided with Terrier for two purposes: 6 | 7 | - To provide a Desktop Search application that will allow users to quickly test out features of Terrier such as for example the Terrier query language. 8 | 9 | - To give developers an example of using Terrier in an interactive setting. 10 | 11 | Importantly, Desktop Terrier is only a sample application to help users become used to the functionality that Terrier provides. We do not recommend Desktop Terrier to perform large or complex indexing jobs. Instead, once you are comfortable with the Terrier functionality, indexing and batch retrieval should be performed using the command line. You have been warned. 12 | 13 | Where to obtain Desktop Terrier 14 | ------------------------ 15 | 16 | Since version 4.2, the Terrier Desktop search application is available [separately from Github](https://github.com/terrier-org/terrier-desktop). 17 | 18 | ------------------ 19 | > Webpage: 20 | > Contact: [School of Computing Science](http://www.dcs.gla.ac.uk/) 21 | > Copyright (C) 2004-2020 [University of Glasgow](http://www.gla.ac.uk/). All Rights Reserved. 22 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/dsms/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.matching.dsms package 5 | 24 | 25 | 26 |

Provides the interface and the classes for modifying the scores 27 | of documents after an score has been assigned to documents, or 28 | implementing the combination of evidence. The implemented classes 29 | include phrase and proximity searching.

30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/index-api/TODO.md: -------------------------------------------------------------------------------- 1 | Index APIs Improvement Suggestions: 2 | 3 | * BitFilePosition: the method "void setOffset(BitFilePosition pos)" simpified to "void set(BitFilePosition pos)". 4 | * Index: transform the whole abstract class into an interface with default methods. 5 | introduce an Enum with toString() method and state for commonly used index structures. 6 | * Lexicon: the inner static class "public static class LexiconFileEntry" should be not exposed as public. It is mainly used in Lexicon builder for its own stuff and some other classes. 7 | * LexiconEntry: transfor the abstract class into an interface with default methods. 8 | Javadocs need improvements. 9 | * Pointer: the method "void setPointer(Pointer p)" simplified to "void set(Pointer p)". 10 | * PostingIndex: the generic type is not used in the interface. Is it necessary? 11 | the method "IterablePosting getPostings(Pointer lEntry)" throws an IOException. Throwing an IOException assumes everything is on file/network? 12 | * FieldPosting: the method "void setFieldLengths(int[] newLengths)" should not be exposed as public. 13 | * IterablePosting: the methods "int next()" and "int next(int targetId)" should not raise an IOException because implementation-dependent. Consider posting lists in memory for example. 14 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/merging/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.structures.merging package 5 | 24 | 25 | 26 |

Provides classes for merging two sets of data structures, created by Terrier, 27 | into one set of data structures. The output is equivalent to the output that would 28 | have been obtained if one set of data structures had been built.

29 | 30 | 31 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/incremental/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.realtime.incremental package 5 | 24 | 25 | 26 |

Provides incremental indexing functionality. An incremental index is a MultiIndex that is both updatable and can be written to disk. An incremental index has policies for deleting old indices, flushing partial indices to disk and merging on-disk indices.

27 | 28 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/taat/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.matching.taat package 5 | 24 | 25 | 26 |

Provides classes that implement a term-at-a-time (TAAT) matching strategy. In TAAT matching, the 27 | scoring for one term is complete before scoring moves onto the next term in the query. 28 | In general, TAAT is not suitable for large indices - consider using DAAT instead.

29 | 30 | 31 | -------------------------------------------------------------------------------- /modules/retrieval-api/src/main/java/org/terrier/querying/ScoredDocList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is ScoredDocList.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.querying; 27 | 28 | import java.util.List; 29 | 30 | 31 | public interface ScoredDocList extends List { 32 | 33 | public default double getMaxScore(){ 34 | return this.size() > 0 ? this.get(0).score : 0d; 35 | } 36 | 37 | public String[] getMetaKeys(); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/daat/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.matching.taat package 5 | 24 | 25 | 26 |

Provides classes that implement a document-at-a-time (DAAT) matching strategy. In DAAT matching, 27 | the postings lists for all query terms are processed in parallel. In general, DAAT techniques 28 | do not require as much memory during matching, and may be able to terminate early the matching process.

29 | 30 | 31 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.terms package 5 | 24 | 25 | 26 |

Provides the interface and classes for the term pipeline, 27 | a set of objects that process the terms during indexing and 28 | processing of queries.

29 | 30 |

This package includes implementations of a stop-word remover, 31 | as well as a full and a weak version of Porter's stemming algorithm.

32 | 33 | 34 | -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/java/org/terrier/evaluation/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.evaluation package 5 | 24 | 25 | 26 |

Provides an interface and the classes to process relevance assessments 27 | and perform standard evaluation of retrieval results. There are two 28 | types of evaluation supported. The first is based on the outuput of 29 | trec_eval and the other is for evaluation of known-item search task.

30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/matchops/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is package-info.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | /** This package contains matching Operators. 27 | * These allow multiple entries from the inverted index posting list to be 28 | * used within a single query. In general, operators are inspired by the 29 | * Indri/Galago query language 30 | * 31 | * @since 5.0 32 | */ 33 | package org.terrier.matching.matchops; 34 | 35 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/utility/TestVersion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is TestVersion.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.utility; 27 | 28 | import static org.junit.Assert.*; 29 | 30 | import org.junit.Test; 31 | 32 | public class TestVersion { 33 | 34 | @Test public void testVersion() 35 | { 36 | String versionString = ApplicationSetup.TERRIER_VERSION; 37 | assertTrue(versionString.contains(".")); 38 | 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/memory/MemoryPostingList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is MemoryPostingList.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Richard McCreadie 25 | * Stuart Mackie 26 | */ 27 | 28 | package org.terrier.realtime.memory; 29 | 30 | /** 31 | * A memory posting list interface 32 | * 33 | * @author Richard McCreadie, Stuart Mackie 34 | * @since 4.0 35 | * 36 | */ 37 | public interface MemoryPostingList { 38 | 39 | } 40 | -------------------------------------------------------------------------------- /modules/index-api/src/main/java/org/terrier/structures/ConcurrentReadable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is ConcurrentReadable.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.structures; 27 | import java.lang.annotation.RetentionPolicy; 28 | import java.lang.annotation.Retention; 29 | 30 | /** This annotation makes that an index data 31 | * structure can be read by more than one thread concurrently. */ 32 | @Retention(RetentionPolicy.RUNTIME) 33 | public @interface ConcurrentReadable 34 | { 35 | } 36 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/seralization/WriteableFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org/ 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is WriteableFactory.java 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original contributor) 25 | */ 26 | package org.terrier.structures.seralization; 27 | 28 | /** General interface for factories of Writable objects. 29 | * @since 3.0 30 | * @author Craig Macdonald 31 | * @param 32 | */ 33 | public interface WriteableFactory 34 | { 35 | /** Make a new instance of T */ 36 | T newInstance(); 37 | } -------------------------------------------------------------------------------- /modules/index-api/src/main/java/org/terrier/structures/FieldDocumentIndex.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org/ 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is FieldDocumentIndex.java 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original contributor) 25 | */ 26 | package org.terrier.structures; 27 | 28 | import java.io.IOException; 29 | /** 30 | * Interface for a fields document index 31 | */ 32 | public interface FieldDocumentIndex extends DocumentIndex { 33 | /** 34 | * Get the length of each field 35 | */ 36 | int[] getFieldLengths(int docid) throws IOException; 37 | } 38 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/indexing/ZstdMetaIndexBuilder.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures.indexing; 2 | import org.terrier.structures.ZstdCompressedMetaIndex; 3 | import com.github.luben.zstd.ZstdCompressCtx; 4 | import org.terrier.structures.IndexOnDisk; 5 | import java.io.IOException; 6 | /** 7 | * Writes all metadata using Zstandard compression. 8 | * @since 5.5 9 | */ 10 | public class ZstdMetaIndexBuilder extends BaseMetaIndexBuilder { 11 | 12 | ZstdCompressCtx compressor = new ZstdCompressCtx(); 13 | 14 | public ZstdMetaIndexBuilder(IndexOnDisk _index, String[] _keyNames, int[] _valueLens, String[] _reverseKeys) 15 | { 16 | this(_index, "meta", _keyNames, _valueLens, _reverseKeys); 17 | } 18 | 19 | public ZstdMetaIndexBuilder(IndexOnDisk _index, String _structureName, String[] _keyNames, int[] _valueLens, String[] _reverseKeys) 20 | { 21 | super(_index, _structureName, _keyNames, _valueLens, _reverseKeys); 22 | this.structureClass = ZstdCompressedMetaIndex.class; 23 | this.structureInputStreamClass = ZstdCompressedMetaIndex.InputStream.class; 24 | this.compressor.setChecksum(false); 25 | } 26 | 27 | protected int writeData(byte[] data) throws IOException { 28 | byte[] compressed = compressor.compress(data); 29 | dataOutput.write(compressed); 30 | return compressed.length; 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/ManagerRequisite.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is ManagerRequisite.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.querying; 27 | 28 | /** 29 | * 30 | * @since 5.0 31 | */ 32 | public enum ManagerRequisite { 33 | 34 | /** the original query must have been set */ 35 | RAWQUERY, 36 | /** a TerrierQL parsed query have been set */ 37 | TERRIERQL, 38 | /** MatchingQueryTerms has been populated */ 39 | MQT, 40 | /** A ResultSet has been obtained */ 41 | RESULTSET 42 | 43 | } 44 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/ProcessPhaseRequisites.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is ProcessPhaseRequisites.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.querying; 27 | 28 | import java.lang.annotation.ElementType; 29 | import java.lang.annotation.Retention; 30 | import java.lang.annotation.RetentionPolicy; 31 | import java.lang.annotation.Target; 32 | 33 | @Target(ElementType.TYPE) 34 | @Retention(RetentionPolicy.RUNTIME) 35 | public @interface ProcessPhaseRequisites { 36 | ManagerRequisite[] value(); 37 | } 38 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/Stemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is Stemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Interface for all stemmers. 30 | * @since 3.0 31 | * @author Craig Macdonald 32 | */ 33 | public interface Stemmer { 34 | 35 | /** 36 | * Returns the stem of a given term 37 | * @param s String the term to be stemmed. 38 | * @return String the stem of a given term. 39 | */ 40 | String stem(String s); 41 | } 42 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/tsms/package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | org.terrier.matching.tsms package 5 | 24 | 25 | 26 |

Provides the interface and classes that implement the term 27 | score modifiers, which modify the scores assigned to documents 28 | for a particular term. The classes include an implementation 29 | of searching for terms that appear in given fields. In general, the TermScoreModifiers 30 | is now deprecated. All implementations should use WeightingModel instead.

31 | 32 | 33 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/DutchSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is DutchSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Dutch stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class DutchSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * contructor 36 | * @param n 37 | */ 38 | public DutchSnowballStemmer(TermPipeline n) 39 | { 40 | super("Dutch", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/applications/TestShowDocumentCommand.java: -------------------------------------------------------------------------------- 1 | package org.terrier.applications; 2 | 3 | import org.junit.Test; 4 | import org.terrier.indexing.IndexTestUtils; 5 | import org.terrier.structures.Index; 6 | import org.terrier.structures.IndexOnDisk; 7 | import org.terrier.tests.ApplicationSetupBasedTest; 8 | import org.terrier.utility.ApplicationSetup; 9 | 10 | public class TestShowDocumentCommand extends ApplicationSetupBasedTest { 11 | 12 | @Test public void testNoBlocks() throws Exception 13 | { 14 | String mydoc = "hello there"; 15 | ApplicationSetup.setProperty("termpipelines", ""); 16 | Index index = IndexTestUtils.makeIndex(new String[]{"doc1"}, new String[]{mydoc}); 17 | ApplicationSetup.TERRIER_INDEX_PATH = ((IndexOnDisk)index).getPath(); 18 | ApplicationSetup.TERRIER_INDEX_PREFIX = ((IndexOnDisk)index).getPrefix(); 19 | new ShowDocumentCommand().run(new String[]{"--docid", "0"}); 20 | } 21 | 22 | @Test public void testBlocks() throws Exception 23 | { 24 | String mydoc = "hello there"; 25 | ApplicationSetup.setProperty("termpipelines", ""); 26 | Index index = IndexTestUtils.makeIndexBlocks(new String[]{"doc1"}, new String[]{mydoc}); 27 | ApplicationSetup.TERRIER_INDEX_PATH = ((IndexOnDisk)index).getPath(); 28 | ApplicationSetup.TERRIER_INDEX_PREFIX = ((IndexOnDisk)index).getPrefix(); 29 | new ShowDocumentCommand().run(new String[]{"--docid", "0"}); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/DanishSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is DanishSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Danish stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class DanishSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public DanishSnowballStemmer(TermPipeline n) 39 | { 40 | super("Danish", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/FrenchSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is FrenchSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** French stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class FrenchSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public FrenchSnowballStemmer(TermPipeline n) 39 | { 40 | super("French", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/matching/models/dependence/pBiL2.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is pBiL2.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.matching.models.dependence; 27 | 28 | public class pBiL2 extends pBiL { 29 | private static final long serialVersionUID = 1L; 30 | 31 | public pBiL2(){} 32 | 33 | public pBiL2(int _ngramLength){ 34 | super(_ngramLength); 35 | super.norm2 = true; 36 | } 37 | 38 | @Override 39 | public String getInfo() { 40 | return this.getClass().getSimpleName() + "c" + super.c; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/EnglishSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is EnglishSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** English stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class EnglishSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public EnglishSnowballStemmer(TermPipeline n) 39 | { 40 | super("English", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/FinnishSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is FinnishSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Finnish stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class FinnishSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public FinnishSnowballStemmer(TermPipeline n) 39 | { 40 | super("Finnish", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/GermanSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is GermanSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** German stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | 33 | public class GermanSnowballStemmer extends SnowballStemmer 34 | { 35 | /** 36 | * constructor 37 | * @param n 38 | */ 39 | public GermanSnowballStemmer(TermPipeline n) 40 | { 41 | super("German", n); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/ItalianSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is ItalianSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Italian stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class ItalianSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public ItalianSnowballStemmer(TermPipeline n) 39 | { 40 | super("Italian", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/RussianSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is RussianSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Russian stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class RussianSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public RussianSnowballStemmer(TermPipeline n) 39 | { 40 | super("Russian", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/SpanishSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is SpanishSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Spanish stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class SpanishSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public SpanishSnowballStemmer(TermPipeline n) 39 | { 40 | super("Spanish", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/SwedishSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is SwedishSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Swedish stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class SwedishSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public SwedishSnowballStemmer(TermPipeline n) 39 | { 40 | super("Swedish", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/TurkishSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is TurkishSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Turkish stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class TurkishSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public TurkishSnowballStemmer(TermPipeline n) 39 | { 40 | super("Turkish", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/RomanianSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is RomanianSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Romanian stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class RomanianSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public RomanianSnowballStemmer(TermPipeline n) 39 | { 40 | super("Romanian", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/HungarianSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is HungarianSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Hungerian stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class HungarianSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public HungarianSnowballStemmer(TermPipeline n) 39 | { 40 | super("Hungarian", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/NorwegianSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is NorwegianSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Norwegian stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class NorwegianSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public NorwegianSnowballStemmer(TermPipeline n) 39 | { 40 | super("Norwegian", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/docvectors/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | terrier-platform 7 | org.terrier 8 | 5.11 9 | ../../ 10 | 11 | 12 | terrier-docvectors 13 | terrier-docvectors 14 | 15 | 16 | 17 | org.terrier 18 | terrier-core 19 | ${project.version} 20 | 21 | 22 | org.terrier 23 | terrier-learning 24 | ${project.version} 25 | 26 | 27 | junit 28 | junit 29 | 4.13.1 30 | test 31 | 32 | 33 | org.terrier 34 | terrier-tests 35 | ${project.version} 36 | 37 | 38 | 39 | net.sf.trove4j 40 | trove4j 41 | 2.0.2 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/memory/MetaIndexMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is MetaIndexMap.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Richard McCreadie 25 | * Stuart Mackie 26 | */ 27 | 28 | package org.terrier.realtime.memory; 29 | 30 | /** 31 | * An interface defining a meta index structure 32 | * 33 | * @author Richard McCreadie, Stuart Mackie 34 | * @since 4.0 35 | * 36 | */ 37 | public interface MetaIndexMap { 38 | 39 | 40 | public void writeDocumentEntry(int docid, String[] data); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/terms/PortugueseSnowballStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is PortugueseSnowballStemmer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.terms; 28 | 29 | /** Portuguese stemmer implmented by Snowball. 30 | * @author Craig Macdonald 31 | */ 32 | public class PortugueseSnowballStemmer extends SnowballStemmer 33 | { 34 | /** 35 | * constructor 36 | * @param n 37 | */ 38 | public PortugueseSnowballStemmer(TermPipeline n) 39 | { 40 | super("Portuguese", n); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /modules/index-api/src/main/java/org/terrier/structures/BlockEntryStatistics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org/ 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is BlockEntryStatistics.java 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original contributor) 25 | */ 26 | package org.terrier.structures; 27 | /** 28 | * Interface describing the statistics for a block entry. It is just 29 | * an Entry with a block count. 30 | */ 31 | @Deprecated 32 | public interface BlockEntryStatistics extends EntryStatistics { 33 | /** The number of blocks that this term has. 34 | * Needed by the BlockInvertedIndexBuilder. */ 35 | int getBlockCount(); 36 | } 37 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/utility/TestTimer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original is in 'TestTimer.java' 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | * Richard McCreadie 26 | */ 27 | package org.terrier.utility; 28 | 29 | public class TestTimer { 30 | 31 | 32 | public static void main(String[] args) throws Exception 33 | { 34 | TerrierTimer tt = new TerrierTimer("TestTimer", 3); 35 | tt.start(); 36 | Thread.sleep(2000); 37 | tt.increment(); 38 | Thread.sleep(2000); 39 | tt.increment(); 40 | tt.increment(); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /modules/concurrent/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | terrier-platform 6 | org.terrier 7 | 5.11 8 | ../../ 9 | 10 | 11 | terrier-concurrent 12 | 13 | 14 | 15 | org.terrier 16 | terrier-core 17 | ${project.version} 18 | 19 | 20 | 21 | org.terrier 22 | terrier-batch-retrieval 23 | ${project.version} 24 | 25 | 26 | 27 | org.terrier 28 | terrier-tests 29 | ${project.version} 30 | 31 | 32 | 33 | 34 | 35 | junit 36 | junit 37 | 4.13.1 38 | test 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /bin/debug.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM Terrier - Terabyte Retriever 3 | REM Webpage: http://terrier.org 4 | REM Contact: terrier{a.}dcs.gla.ac.uk 5 | REM 6 | REM The contents of this file are subject to the Mozilla Public 7 | REM License Version 1.1 (the "License"); you may not use this file 8 | REM except in compliance with the License. You may obtain a copy of 9 | REM the License at http://www.mozilla.org/MPL/ 10 | REM 11 | REM Software distributed under the License is distributed on an "AS 12 | REM IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 13 | REM implied. See the License for the specific language governing 14 | REM rights and limitations under the License. 15 | REM 16 | REM The Original Code is debug.bat 17 | REM 18 | REM The Initial Developer of the Original Code is the University of Glasgow. 19 | REM Portions created by The Initial Developer are Copyright (C) 2004-2011 20 | REM the initial Developer. All Rights Reserved. 21 | REM 22 | REM Contributor(s): 23 | REM Craig Macdonald (original author) 24 | REM 25 | rem Used to detect if --debug is mentioned on the command line 26 | SET DEBUG=NO 27 | :again 28 | 29 | IF "%1"=="--debug" GOTO set 30 | 31 | IF "%1"=="--DEBUG" GOTO set 32 | 33 | IF "%1"=="-debug" GOTO set 34 | 35 | IF "%1"=="-DEBUG" GOTO set 36 | 37 | IF "%1"=="" GOTO end 38 | 39 | SHIFT 40 | GOTO again 41 | 42 | 43 | :set 44 | SET DEBUG=YES 45 | GOTO end 46 | 47 | :end 48 | rem echo %DEBUG% 49 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/utility/io/RandomDataOutput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is RandomDataInput.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (craigm{at}dcs.gla.ac.uk) 25 | */ 26 | package org.terrier.utility.io; 27 | import java.io.IOException; 28 | import java.io.DataOutput; 29 | 30 | /** This interface represents an interface on the writing behaviour of a RandomAccessFile. 31 | * @since 2.2 32 | * @author Craig Macdonald 33 | */ 34 | public interface RandomDataOutput extends DataOutput, RandomDataInput 35 | { 36 | /** Sets the length of this file. */ 37 | void setLength(long newLength) throws IOException; 38 | } -------------------------------------------------------------------------------- /modules/rest-client/src/main/java/org/terrier/restclient/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is package-info.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | /** Provides a proxy Manager implementation that can access remotely provided 27 | * Managers over a HTTP REST connection. 28 | * 29 | * To use, ensure that your terrier-rest-client is included on your classpath, 30 | * and then use a ManagerFactory as normal, on an IndexRef referring to a remote 31 | * REST server. 32 | * 33 | * Manager m = Manager.from(IndexRef.of("http://host/of/rest/")) 34 | * 35 | */ 36 | package org.terrier.restclient; 37 | 38 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/collections/OrderedMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org/ 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is OrderedMap.java 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original contributor) 25 | */ 26 | package org.terrier.structures.collections; 27 | import java.util.Map; 28 | /** Specified a map interface where keys are ordered, and can be retrieved 29 | * using an integer index. 30 | * @since 3.0 31 | * @param type of the key 32 | * @param type of the value 33 | */ 34 | public interface OrderedMap extends Map 35 | { 36 | /** Return the entry at the specified index */ 37 | Map.Entry get(int index); 38 | } 39 | -------------------------------------------------------------------------------- /share/tests/shakespeare/test.shakespeare-merchant.all.qrels: -------------------------------------------------------------------------------- 1 | TEST00 0 SHK-MOV-0-0 1 2 | TEST01 0 SHK-MOV-0-0 1 3 | TEST01 0 SHK-MOV-I-I 1 4 | TEST01 0 SHK-MOV-I-II 1 5 | TEST01 0 SHK-MOV-II-I 1 6 | TEST01 0 SHK-MOV-II-VII 1 7 | TEST01 0 SHK-MOV-II-IX 1 8 | TEST01 0 SHK-MOV-III-II 1 9 | TEST01 0 SHK-MOV-III-IV 1 10 | TEST01 0 SHK-MOV-III-V 1 11 | TEST01 0 SHK-MOV-IV-I 1 12 | TEST01 0 SHK-MOV-IV-II 1 13 | TEST01 0 SHK-MOV-V-I 1 14 | TEST02 0 SHK-MOV-0-0 1 15 | TEST02 0 SHK-MOV-III-I 1 16 | TEST03 0 SHK-MOV-0-0 1 17 | TEST03 0 SHK-MOV-II-I 1 18 | TEST03 0 SHK-MOV-II-VII 1 19 | TEST04 0 SHK-MOV-0-0 1 20 | TEST04 0 SHK-MOV-I-I 1 21 | TEST04 0 SHK-MOV-II-VIII 1 22 | TEST04 0 SHK-MOV-III-I 1 23 | TEST05 0 SHK-MOV-II-VII 1 24 | TEST06 0 SHK-MOV-II-II 1 25 | TEST06 0 SHK-MOV-III-II 1 26 | TEST06 0 SHK-MOV-III-III 1 27 | TEST06 0 SHK-MOV-III-IV 1 28 | TEST06 0 SHK-MOV-IV-I 1 29 | TEST06 0 SHK-MOV-V-I 1 30 | TEST10 0 SHK-MOV-I-I 1 31 | TEST10 0 SHK-MOV-II-II 1 32 | TEST10 0 SHK-MOV-II-IV 1 33 | TEST10 0 SHK-MOV-II-VIII 1 34 | TEST10 0 SHK-MOV-III-I 1 35 | TEST10 0 SHK-MOV-III-III 1 36 | TEST10 0 SHK-MOV-IV-II 1 37 | TEST11 0 SHK-MOV-II-I 1 38 | TEST11 0 SHK-MOV-II-VII 1 39 | TEST12 0 SHK-MOV-I-III 1 40 | TEST12 0 SHK-MOV-II-V 1 41 | TEST12 0 SHK-MOV-III-I 1 42 | TEST12 0 SHK-MOV-III-III 1 43 | TEST12 0 SHK-MOV-IV-I 1 44 | TEST20 0 SHK-MOV-0-0 1 45 | TEST21 0 SHK-MOV-V-I 1 46 | TEST30 0 SHK-MOV-V-I 1 47 | TEST31 0 SHK-MOV-IV-I 1 48 | TEST40 0 SHK-MOV-0-0 1 49 | TEST41 0 SHK-MOV-0-0 1 50 | TEST41 0 SHK-MOV-I-I 1 51 | TEST41 0 SHK-MOV-I-II 1 52 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/memory/fields/MemoryFieldsDirectIterablePosting.java: -------------------------------------------------------------------------------- 1 | package org.terrier.realtime.memory.fields; 2 | 3 | import org.terrier.realtime.memory.MemoryDirectIterablePosting; 4 | import org.terrier.structures.postings.FieldPostingImpl; 5 | import org.terrier.structures.postings.WritablePosting; 6 | import org.terrier.structures.postings.FieldPosting; 7 | import java.util.List; 8 | import gnu.trove.TIntArrayList; 9 | 10 | 11 | public class MemoryFieldsDirectIterablePosting extends MemoryDirectIterablePosting implements FieldPosting { 12 | 13 | List pl_fields; 14 | public MemoryFieldsDirectIterablePosting(TIntArrayList pl_termids, 15 | TIntArrayList pl_freq, List _pl_fields) 16 | { 17 | super(pl_termids, pl_freq); 18 | this.pl_fields = _pl_fields; 19 | } 20 | 21 | public WritablePosting asWritablePosting() { 22 | return new FieldPostingImpl(getId(), getFrequency(), getFieldFrequencies()); 23 | } 24 | 25 | /** Returns the frequencies of the term in each field of the document */ 26 | public int[] getFieldFrequencies() { 27 | return pl_fields.get(super.index); 28 | } 29 | 30 | /** Returns the lengths of the each fields in the current document */ 31 | public int[] getFieldLengths() 32 | { 33 | throw new UnsupportedOperationException(); 34 | } 35 | 36 | public void setFieldLengths(int[] newLengths){ 37 | throw new UnsupportedOperationException(); 38 | } 39 | } -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/matching/models/TestBM25.java: -------------------------------------------------------------------------------- 1 | package org.terrier.matching.models; 2 | 3 | import org.junit.Test; 4 | import org.terrier.tests.ApplicationSetupBasedTest; 5 | import org.terrier.indexing.IndexTestUtils; 6 | import org.terrier.utility.ApplicationSetup; 7 | import org.terrier.structures.Index; 8 | import org.terrier.querying.Manager; 9 | import org.terrier.querying.ManagerFactory; 10 | import org.terrier.querying.SearchRequest; 11 | import java.io.IOException; 12 | 13 | import static org.junit.Assert.*; 14 | 15 | public class TestBM25 extends ApplicationSetupBasedTest { 16 | public void testK1() throws Exception 17 | { 18 | ApplicationSetup.setProperty("termpipelines", ""); 19 | Index i = IndexTestUtils.makeIndex( 20 | new String[]{"doc1"}, 21 | new String[]{"this is a document document"} 22 | ); 23 | Manager m = ManagerFactory.from(i.getIndexRef()); 24 | SearchRequest srq; 25 | srq = m.newSearchRequest("q1", "document"); 26 | srq.setControl("wmodel", "BM25"); 27 | m.runSearchRequest(srq); 28 | double score1 = srq.getResults().get(0).getScore(); 29 | 30 | srq = m.newSearchRequest("q1", "document"); 31 | srq.setControl("wmodel", "BM25"); 32 | srq.setControl("bm25.k_1", "1.9"); 33 | 34 | m.runSearchRequest(srq); 35 | double score2 = srq.getResults().get(0).getScore(); 36 | assertTrue( Math.abs(score1 - score2) > 0.0d); 37 | } 38 | } -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/compression/bit/BitWritable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org/ 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is BitWritable.java 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original contributor) 25 | */ 26 | package org.terrier.compression.bit; 27 | 28 | /** Like o.a.h.io.Writable, but for using BitIn and BitOut 29 | * @since 3.0 */ 30 | public interface BitWritable 31 | { 32 | /** Write the object to the specified BitOut. The number of entries written is returned */ 33 | int writeFields(BitOut out); 34 | /** Read in the object to the specified BitIn. The number of entries to read is also required */ 35 | int readFields(BitIn in, int numEntries); 36 | } 37 | -------------------------------------------------------------------------------- /modules/index-api/src/main/java/org/terrier/structures/FieldEntryStatistics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org/ 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is FieldEntryStatistics.java 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original contributor) 25 | */ 26 | package org.terrier.structures; 27 | 28 | /** 29 | * The statistics for a field entry. 30 | */ 31 | public interface FieldEntryStatistics extends EntryStatistics 32 | { 33 | /** 34 | * Return the frequencies (total numbers of occurrences) of the term for each field. 35 | * 36 | * @return the frequencies (total numbers of occurrences) of the term for each field. 37 | */ 38 | int[] getFieldFrequencies(); 39 | } 40 | -------------------------------------------------------------------------------- /licenses/snowball/bsd-license.txt: -------------------------------------------------------------------------------- 1 | All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 7 | * Neither the name of the nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 10 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 11 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 12 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 13 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 14 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 15 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 16 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 17 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 18 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 19 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 20 | 21 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/utility/MemoryChecker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is MemoryChecker.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.utility; 28 | 29 | /** Interface for various implementation which are used to determine 30 | * if memory has been exhausted. 31 | * @author Craig Macdonald 32 | * @since 2.2 33 | */ 34 | public interface MemoryChecker 35 | { 36 | /** Check the amount of available memory. 37 | * Returns true if memory is low 38 | */ 39 | boolean checkMemory(); 40 | 41 | /** Reset the flag, memory has been released */ 42 | void reset(); 43 | 44 | } 45 | 46 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/querying/TerrierQLParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is TerrierQLParser.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.querying; 27 | 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | import org.terrier.querying.parser.QueryParser; 31 | 32 | @ProcessPhaseRequisites(ManagerRequisite.RAWQUERY) 33 | public class TerrierQLParser implements Process { 34 | 35 | protected static final Logger logger = LoggerFactory.getLogger(TerrierQLParser.class); 36 | 37 | @Override 38 | public void process(Manager manager, Request q) { 39 | QueryParser.parseQuery(q.getOriginalQuery(), q); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /modules/tests/src/test/java/org/terrier/fat/FatTestSuite.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is FatTestSuite.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | 27 | package org.terrier.fat; 28 | import org.junit.runner.RunWith; 29 | import org.junit.runners.Suite; 30 | import org.junit.runners.Suite.SuiteClasses; 31 | 32 | 33 | 34 | 35 | @RunWith(Suite.class) 36 | @SuiteClasses( { 37 | TestFatCandidateResultSet.class, 38 | TestFatFeaturedScoringMatching.class, 39 | TestFatFullMatching.class, 40 | TestFatScoringMatching.class, 41 | TestLinearModelMatching.class, 42 | TestScoringMatchingWithFat.class 43 | }) 44 | 45 | public class FatTestSuite { 46 | 47 | } 48 | -------------------------------------------------------------------------------- /modules/batch-retrieval/src/main/java/org/terrier/structures/cache/GrowingMapQueryStringResultCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is GrowingMapQueryStringResultCache.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Gianni Amati (original author) 25 | * Vassilis Plachouras 26 | * Ben He 27 | * Craig Macdonald 28 | */ 29 | 30 | package org.terrier.structures.cache; 31 | 32 | import org.terrier.querying.SearchRequest; 33 | 34 | public class GrowingMapQueryStringResultCache extends GrowingMapQueryResultCache { 35 | @Override 36 | protected String hashQuery(SearchRequest q) { 37 | return q.getOriginalQuery(); 38 | } 39 | } -------------------------------------------------------------------------------- /licenses/pdfbox/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2003, www.pdfbox.org 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 3. Neither the name of pdfbox; nor the names of its 13 | contributors may be used to endorse or promote products derived from this 14 | software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 20 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /modules/index-api/src/main/java/org/terrier/structures/NgramEntryStatistics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is NgramEntryStatistics.java. 19 | * 20 | * The Original Code is Copyright (C) 2017-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald 25 | */ 26 | package org.terrier.structures; 27 | 28 | /** Represent statistics of n-grams, such as those used in 29 | * sequence dependence models. These require to know the 30 | * windows size. 31 | */ 32 | public interface NgramEntryStatistics extends EntryStatistics 33 | { 34 | /** Get the size of the window used to calculate an n-gram frequency 35 | * @return number of tokens 36 | */ 37 | public int getWindowSize(); 38 | 39 | /** Update the window size */ 40 | public void setWindowSize(int ws); 41 | } 42 | -------------------------------------------------------------------------------- /doc/todo.md: -------------------------------------------------------------------------------- 1 | Terrier Future Features and Known Issues 2 | ======================================== 3 | 4 | List of features and known issues that are marked for future Terrier versions: 5 | 6 | Future Features 7 | --------------- 8 | 9 | - Better exception handling. Some retrieval errors should result in a query being aborted, rather than trying to blindly continue running the query with the index is an invalid state. 10 | - See also out [Github issue tracker](https://github.com/terrier-org/terrier-core/issues) 11 | 12 | Known Issues 13 | ------------ 14 | 15 | - The real-time index structures do not currently support block indexing 16 | - The real-time fields index cannot write direct index structures 17 | 18 | Deprecations, Scheduled Refactorings 19 | ------------------------------------ 20 | 21 | The following classes and interfaces are/may be deprecated in this version of Terrier and will likely be removed or refactored in a future release: 22 | 23 | - [ApplicationSetup](http://terrier.org/docs/v5.2/javadoc/org/terrier/utility/ApplicationSetup.html) will be replaced with a new non-global configuration API. 24 | 25 | **Contributions** 26 | All community contributions to the Terrier framework are welcome. In addition, you can find more information about contributing on the [Terrier website](http://terrier.org/). 27 | 28 | 29 | 30 | ------------------------ 31 | > Webpage: 32 | > Contact: [School of Computing Science](http://www.dcs.gla.ac.uk/) 33 | > Copyright (C) 2004-2020 [University of Glasgow](http://www.gla.ac.uk/). All Rights Reserved. 34 | -------------------------------------------------------------------------------- /modules/batch-indexers/src/main/java/org/terrier/structures/indexing/LZ4MetaIndexBuilder.java: -------------------------------------------------------------------------------- 1 | package org.terrier.structures.indexing; 2 | import java.io.IOException; 3 | import org.terrier.structures.LZ4CompressedMetaIndex; 4 | import net.jpountz.lz4.LZ4Factory; 5 | import net.jpountz.lz4.LZ4Compressor; 6 | import org.terrier.structures.IndexOnDisk; 7 | 8 | /** 9 | * Writes all metadata using Zstandard compression. 10 | * @since 5.5 11 | */ 12 | public class LZ4MetaIndexBuilder extends BaseMetaIndexBuilder { 13 | 14 | LZ4Compressor compressor = LZ4Factory.fastestInstance().fastCompressor(); 15 | byte[] compressedBuffer; 16 | 17 | public LZ4MetaIndexBuilder(IndexOnDisk _index, String[] _keyNames, int[] _valueLens, String[] _reverseKeys) 18 | { 19 | this(_index, "meta", _keyNames, _valueLens, _reverseKeys); 20 | } 21 | 22 | public LZ4MetaIndexBuilder(IndexOnDisk _index, String _structureName, String[] _keyNames, int[] _valueLens, String[] _reverseKeys) 23 | { 24 | super(_index, _structureName, _keyNames, _valueLens, _reverseKeys); 25 | this.structureClass = LZ4CompressedMetaIndex.class; 26 | this.structureInputStreamClass = LZ4CompressedMetaIndex.InputStream.class; 27 | this.compressedBuffer = new byte[this.compressor.maxCompressedLength(entryLengthBytes)]; 28 | } 29 | 30 | protected int writeData(byte[] data) throws IOException { 31 | int numBytes = compressor.compress(data, compressedBuffer); 32 | dataOutput.write(compressedBuffer, 0, numBytes); 33 | return numBytes; 34 | } 35 | 36 | } -------------------------------------------------------------------------------- /modules/core/src/main/resources/terrier.default.properties: -------------------------------------------------------------------------------- 1 | #default controls for manager 2 | querying.processes=terrierql:TerrierQLParser,parsecontrols:TerrierQLToControls,parseql:TerrierQLToMatchingQueryTerms,matchopql:MatchingOpQLParser,applypipeline:ApplyTermPipeline,localmatching:LocalManager$ApplyLocalMatching,rm1:RM1,rm3:RM3,qe:QueryExpansion,labels:org.terrier.learning.LabelDecorator,filters:LocalManager$PostFilterProcess,decorate:SimpleDecorateProcess 3 | #default controls for the web-based interface. SimpleDecorate 4 | #is the simplest metadata decorator. For more control, see Decorate. 5 | querying.postfilters=decorate:SimpleDecorate,site:SiteFilter,scope:Scope 6 | 7 | #default and allowed controls 8 | querying.default.controls=wmodel:DPH,parsecontrols:on,parseql:on,applypipeline:on,terrierql:on,localmatching:on,filters:on,decorate:on,decorate_batch:on 9 | querying.allowed.controls=scope,qe,qemodel,start,end,site,scope 10 | 11 | #the processing stages a term goes through 12 | termpipelines=Stopwords,PorterStemmer 13 | 14 | #document tags specification 15 | #for processing the contents of 16 | #the documents, ignoring DOCHDR 17 | TrecDocTags.doctag=DOC 18 | TrecDocTags.idtag=DOCNO 19 | TrecDocTags.skip=DOCHDR 20 | #set to true if the tags can be of various case 21 | TrecDocTags.casesensitive=false 22 | 23 | 24 | #starting from Terrier 5.3, we assume that documents are in UTF-8 25 | trec.encoding=UTF-8 26 | 27 | 28 | #query tags specification 29 | TrecQueryTags.doctag=TOP 30 | TrecQueryTags.idtag=NUM 31 | TrecQueryTags.process=TOP,NUM,TITLE 32 | TrecQueryTags.skip=DESC,NARR 33 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/structures/IndexConfigurable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.uk 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is IndexConfigurable.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | * 26 | */ 27 | package org.terrier.structures; 28 | /** Interface defining that an index structure wants access to the 29 | * Index object it is associated with. This is usually such that 30 | * it can configure itself further using various index properties. 31 | * @since 2.0 32 | * @author Craig Macdonald 33 | */ 34 | public interface IndexConfigurable 35 | { 36 | /** Tell the implementer which Index object it is associated with. 37 | * @param i Index object to use 38 | */ 39 | void setIndex(Index i); 40 | } 41 | -------------------------------------------------------------------------------- /modules/realtime/src/main/java/org/terrier/realtime/memory/MemoryPointer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is MemoryPointer.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Richard McCreadie 25 | * Stuart Mackie 26 | */ 27 | 28 | package org.terrier.realtime.memory; 29 | 30 | import org.terrier.structures.Pointer; 31 | 32 | /** 33 | * Pointer type for use with MemoryIndex. It is actually implemented as a simple integer 34 | * representing the term or document id. 35 | * 36 | * @author Richard McCreadie, Stuart Mackie 37 | * @since 4.0 38 | */ 39 | public interface MemoryPointer extends Pointer { 40 | 41 | /** 42 | * Get the integer pointer value. 43 | */ 44 | int getPointer(); 45 | } 46 | -------------------------------------------------------------------------------- /modules/core/src/main/java/org/terrier/indexing/tokenisation/TokenStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Terrier - Terabyte Retriever 3 | * Webpage: http://terrier.org 4 | * Contact: terrier{a.}dcs.gla.ac.uk 5 | * University of Glasgow - School of Computing Science 6 | * http://www.gla.ac.uk/ 7 | * 8 | * The contents of this file are subject to the Mozilla Public License 9 | * Version 1.1 (the "License"); you may not use this file except in 10 | * compliance with the License. You may obtain a copy of the License at 11 | * http://www.mozilla.org/MPL/ 12 | * 13 | * Software distributed under the License is distributed on an "AS IS" 14 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 15 | * the License for the specific language governing rights and limitations 16 | * under the License. 17 | * 18 | * The Original Code is UTFTokeniser.java. 19 | * 20 | * The Original Code is Copyright (C) 2004-2020 the University of Glasgow. 21 | * All Rights Reserved. 22 | * 23 | * Contributor(s): 24 | * Craig Macdonald (original author) 25 | */ 26 | package org.terrier.indexing.tokenisation; 27 | 28 | import java.util.Iterator; 29 | 30 | /** Represents a stream of tokens found by a tokeniser. 31 | * It is of note that a TokenStream may return null 32 | * for a next() method, even if hasNext() previously returned 33 | * true. 34 | * @since 3.5 35 | * @author Craig Macdonald 36 | */ 37 | public abstract class TokenStream implements Iterator { 38 | 39 | @Override 40 | public void remove() { 41 | throw new UnsupportedOperationException(); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /modules/core/src/main/java/overview.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Terrier Information Retrieval Platform 5 | 24 | 25 | 26 |

Terrier is a modular platform for the rapid development of 27 | large-scale Information Retrieval applications, providing 28 | indexing and retrieval functionalities. Terrier is based on 29 | the Divergence from Randomness (DFR) framework. It can index 30 | various document collections, including the standard TREC 31 | collections, such as AP, WSJ, WT10G, .GOV and .GOV2. It also 32 | provides a wide range of parameter-free weighting approaches 33 | and full-text search algorithms, aiming to offer a public 34 | testbed for performing Information Retrieval experiments.

35 | 36 | 37 | --------------------------------------------------------------------------------