├── .gitignore ├── LICSENSE ├── MATERIAL.md ├── README.md ├── data └── .gitkeep ├── evalR ├── CranfieldMCP │ ├── .Rbuildignore │ ├── DESCRIPTION │ ├── NAMESPACE │ ├── R │ │ ├── firstlib.r │ │ ├── pairwise.test.R │ │ └── read.eval.R │ └── man │ │ ├── pairwise.test.Rd │ │ └── read.eval.Rd ├── README.md └── Revaluate.R ├── evals ├── .gitkeep ├── input.bm25 ├── input.bm25va ├── input.tf-idf └── old_evals │ ├── README.MD │ ├── bm25_init │ ├── bm25_tf │ ├── bm25_without │ ├── bm25va_init │ ├── bm25va_tf │ ├── bm25va_without │ ├── tf-idf_init │ ├── tf-idf_tf │ └── tf-idf_without ├── mapReduceIndexing ├── README.md ├── build.sbt ├── project │ ├── build.properties │ └── plugins.sbt ├── scalastyle-config.xml ├── src │ └── main │ │ ├── resources │ │ ├── application.conf │ │ └── log4j.properties │ │ └── scala │ │ └── ir │ │ ├── CustomTextFile.scala │ │ ├── ExtractDocuments.scala │ │ └── IndexCreation.scala ├── sync.sh └── version.sbt ├── notebooks ├── concurrentTokenization.ipynb ├── indexingPrototyping.ipynb ├── readdocs.ipynb └── scorer.ipynb ├── scores ├── old_scores │ ├── README.MD │ ├── scores_bm25_init │ ├── scores_bm25_tf │ ├── scores_bm25_without │ ├── scores_bm25va_init │ ├── scores_bm25va_tf │ ├── scores_bm25va_without │ ├── scores_tf-idf_init │ ├── scores_tf-idf_tf │ └── scores_tf-idf_without ├── scores_bm25.txt ├── scores_bm25va.txt └── scores_tf-idf.txt └── src ├── README.MD ├── indexer.sh ├── indexing.py ├── pyskip.py ├── search.py └── searcher.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/.gitignore -------------------------------------------------------------------------------- /LICSENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/LICSENSE -------------------------------------------------------------------------------- /MATERIAL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/MATERIAL.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/README.md -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- 1 | #git keep -------------------------------------------------------------------------------- /evalR/CranfieldMCP/.Rbuildignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/CranfieldMCP/.Rbuildignore -------------------------------------------------------------------------------- /evalR/CranfieldMCP/DESCRIPTION: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/CranfieldMCP/DESCRIPTION -------------------------------------------------------------------------------- /evalR/CranfieldMCP/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | -------------------------------------------------------------------------------- /evalR/CranfieldMCP/R/firstlib.r: -------------------------------------------------------------------------------- 1 | .First.lib <- function(lib, pkg) 2 | { 3 | } 4 | -------------------------------------------------------------------------------- /evalR/CranfieldMCP/R/pairwise.test.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/CranfieldMCP/R/pairwise.test.R -------------------------------------------------------------------------------- /evalR/CranfieldMCP/R/read.eval.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/CranfieldMCP/R/read.eval.R -------------------------------------------------------------------------------- /evalR/CranfieldMCP/man/pairwise.test.Rd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/CranfieldMCP/man/pairwise.test.Rd -------------------------------------------------------------------------------- /evalR/CranfieldMCP/man/read.eval.Rd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/CranfieldMCP/man/read.eval.Rd -------------------------------------------------------------------------------- /evalR/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/README.md -------------------------------------------------------------------------------- /evalR/Revaluate.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evalR/Revaluate.R -------------------------------------------------------------------------------- /evals/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evals/input.bm25: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/input.bm25 -------------------------------------------------------------------------------- /evals/input.bm25va: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/input.bm25va -------------------------------------------------------------------------------- /evals/input.tf-idf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/input.tf-idf -------------------------------------------------------------------------------- /evals/old_evals/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/README.MD -------------------------------------------------------------------------------- /evals/old_evals/bm25_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/bm25_init -------------------------------------------------------------------------------- /evals/old_evals/bm25_tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/bm25_tf -------------------------------------------------------------------------------- /evals/old_evals/bm25_without: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/bm25_without -------------------------------------------------------------------------------- /evals/old_evals/bm25va_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/bm25va_init -------------------------------------------------------------------------------- /evals/old_evals/bm25va_tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/bm25va_tf -------------------------------------------------------------------------------- /evals/old_evals/bm25va_without: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/bm25va_without -------------------------------------------------------------------------------- /evals/old_evals/tf-idf_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/tf-idf_init -------------------------------------------------------------------------------- /evals/old_evals/tf-idf_tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/tf-idf_tf -------------------------------------------------------------------------------- /evals/old_evals/tf-idf_without: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/evals/old_evals/tf-idf_without -------------------------------------------------------------------------------- /mapReduceIndexing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/README.md -------------------------------------------------------------------------------- /mapReduceIndexing/build.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/build.sbt -------------------------------------------------------------------------------- /mapReduceIndexing/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.15 -------------------------------------------------------------------------------- /mapReduceIndexing/project/plugins.sbt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/project/plugins.sbt -------------------------------------------------------------------------------- /mapReduceIndexing/scalastyle-config.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/scalastyle-config.xml -------------------------------------------------------------------------------- /mapReduceIndexing/src/main/resources/application.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/src/main/resources/application.conf -------------------------------------------------------------------------------- /mapReduceIndexing/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /mapReduceIndexing/src/main/scala/ir/CustomTextFile.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/src/main/scala/ir/CustomTextFile.scala -------------------------------------------------------------------------------- /mapReduceIndexing/src/main/scala/ir/ExtractDocuments.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/src/main/scala/ir/ExtractDocuments.scala -------------------------------------------------------------------------------- /mapReduceIndexing/src/main/scala/ir/IndexCreation.scala: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/src/main/scala/ir/IndexCreation.scala -------------------------------------------------------------------------------- /mapReduceIndexing/sync.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/mapReduceIndexing/sync.sh -------------------------------------------------------------------------------- /mapReduceIndexing/version.sbt: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2016 Georg Heiler 2 | 3 | version := "0.0.1.SNAPSHOT" -------------------------------------------------------------------------------- /notebooks/concurrentTokenization.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/notebooks/concurrentTokenization.ipynb -------------------------------------------------------------------------------- /notebooks/indexingPrototyping.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/notebooks/indexingPrototyping.ipynb -------------------------------------------------------------------------------- /notebooks/readdocs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/notebooks/readdocs.ipynb -------------------------------------------------------------------------------- /notebooks/scorer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/notebooks/scorer.ipynb -------------------------------------------------------------------------------- /scores/old_scores/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/README.MD -------------------------------------------------------------------------------- /scores/old_scores/scores_bm25_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_bm25_init -------------------------------------------------------------------------------- /scores/old_scores/scores_bm25_tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_bm25_tf -------------------------------------------------------------------------------- /scores/old_scores/scores_bm25_without: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_bm25_without -------------------------------------------------------------------------------- /scores/old_scores/scores_bm25va_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_bm25va_init -------------------------------------------------------------------------------- /scores/old_scores/scores_bm25va_tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_bm25va_tf -------------------------------------------------------------------------------- /scores/old_scores/scores_bm25va_without: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_bm25va_without -------------------------------------------------------------------------------- /scores/old_scores/scores_tf-idf_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_tf-idf_init -------------------------------------------------------------------------------- /scores/old_scores/scores_tf-idf_tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_tf-idf_tf -------------------------------------------------------------------------------- /scores/old_scores/scores_tf-idf_without: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/old_scores/scores_tf-idf_without -------------------------------------------------------------------------------- /scores/scores_bm25.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/scores_bm25.txt -------------------------------------------------------------------------------- /scores/scores_bm25va.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/scores_bm25va.txt -------------------------------------------------------------------------------- /scores/scores_tf-idf.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/scores/scores_tf-idf.txt -------------------------------------------------------------------------------- /src/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/src/README.MD -------------------------------------------------------------------------------- /src/indexer.sh: -------------------------------------------------------------------------------- 1 | python indexing.py 2 | -------------------------------------------------------------------------------- /src/indexing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/src/indexing.py -------------------------------------------------------------------------------- /src/pyskip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/src/pyskip.py -------------------------------------------------------------------------------- /src/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/src/search.py -------------------------------------------------------------------------------- /src/searcher.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ScJa/document-search-engine/HEAD/src/searcher.sh --------------------------------------------------------------------------------