├── .github └── workflows │ ├── wheels-pypi.yml │ └── wheels-testpypi.yml ├── .gitignore ├── LICENSE ├── README.md ├── exps ├── accuracy │ ├── accuracy-results.md │ ├── benchmark_groundtruth.py │ ├── benchmark_jaccard.py │ ├── speed-results.md │ └── utils.py ├── end2end │ ├── README.md │ ├── __init__.py │ ├── deduplicator.py │ ├── fastsketch_deduplicator.py │ ├── fastsketch_thread_sweep.sh │ ├── rensa_deduplicator.py │ ├── results │ │ └── fastsketch_thread_scaling_BOOKS3.png │ ├── run.py │ ├── run_all_comparisons.sh │ └── util.py └── sketch │ ├── README.md │ ├── compare_sketch.py │ └── records │ ├── minhash_QPS_vs_k_n1000.png │ ├── minhash_QPS_vs_k_n1600.png │ ├── minhash_QPS_vs_n_k128.png │ ├── plot_comparison_results.py │ └── sketch_comparison_results.csv ├── fastsketchlsh_ext ├── CmakeLists.txt ├── LICENSE ├── LSH_PLAN.md ├── MANIFEST.in ├── cpp │ ├── LSH.cpp │ ├── fasthash_deprecated.cpp │ ├── fastsketch.cpp │ ├── init.cpp │ ├── murmurhash3.cpp │ └── rminhash.cpp ├── include │ ├── LSH.h │ ├── ankerl │ │ └── unordered_dense.h │ ├── fasthash.h │ ├── fastsketch.h │ ├── murmurhash.h │ └── rminhash.h ├── pyproject.toml ├── setup.py └── test │ └── test_rminhash_vs_fasthash_performance.cpp ├── prototype ├── simulation │ ├── README.md │ ├── display_jaccard_estimate_histograms.py │ ├── display_lsh_probdist.py │ ├── figures │ │ ├── combined_fast_and_kmins_hist.png │ │ ├── kmins_vs_fastsketch_in_lsh_probdist.png │ │ └── kmins_vs_fastsketch_probdist.png │ └── util.py ├── src │ ├── cmins_sketch.py │ ├── datasketch_sketch.py │ ├── fast_sketch.py │ ├── fast_sketch_lsh.py │ ├── kmins_sketch.py │ └── rmins_sketch.py └── test │ └── test_fast_sketch_lsh.py ├── requirements.txt └── test ├── test_accuracy.py ├── test_fast_sketch.py └── test_lsh_dedup_comparison.py /.github/workflows/wheels-pypi.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/.github/workflows/wheels-pypi.yml -------------------------------------------------------------------------------- /.github/workflows/wheels-testpypi.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/.github/workflows/wheels-testpypi.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/README.md -------------------------------------------------------------------------------- /exps/accuracy/accuracy-results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/accuracy/accuracy-results.md -------------------------------------------------------------------------------- /exps/accuracy/benchmark_groundtruth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/accuracy/benchmark_groundtruth.py -------------------------------------------------------------------------------- /exps/accuracy/benchmark_jaccard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/accuracy/benchmark_jaccard.py -------------------------------------------------------------------------------- /exps/accuracy/speed-results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/accuracy/speed-results.md -------------------------------------------------------------------------------- /exps/accuracy/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/accuracy/utils.py -------------------------------------------------------------------------------- /exps/end2end/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/README.md -------------------------------------------------------------------------------- /exps/end2end/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/__init__.py -------------------------------------------------------------------------------- /exps/end2end/deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/deduplicator.py -------------------------------------------------------------------------------- /exps/end2end/fastsketch_deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/fastsketch_deduplicator.py -------------------------------------------------------------------------------- /exps/end2end/fastsketch_thread_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/fastsketch_thread_sweep.sh -------------------------------------------------------------------------------- /exps/end2end/rensa_deduplicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/rensa_deduplicator.py -------------------------------------------------------------------------------- /exps/end2end/results/fastsketch_thread_scaling_BOOKS3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/results/fastsketch_thread_scaling_BOOKS3.png -------------------------------------------------------------------------------- /exps/end2end/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/run.py -------------------------------------------------------------------------------- /exps/end2end/run_all_comparisons.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/run_all_comparisons.sh -------------------------------------------------------------------------------- /exps/end2end/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/end2end/util.py -------------------------------------------------------------------------------- /exps/sketch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/README.md -------------------------------------------------------------------------------- /exps/sketch/compare_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/compare_sketch.py -------------------------------------------------------------------------------- /exps/sketch/records/minhash_QPS_vs_k_n1000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/records/minhash_QPS_vs_k_n1000.png -------------------------------------------------------------------------------- /exps/sketch/records/minhash_QPS_vs_k_n1600.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/records/minhash_QPS_vs_k_n1600.png -------------------------------------------------------------------------------- /exps/sketch/records/minhash_QPS_vs_n_k128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/records/minhash_QPS_vs_n_k128.png -------------------------------------------------------------------------------- /exps/sketch/records/plot_comparison_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/records/plot_comparison_results.py -------------------------------------------------------------------------------- /exps/sketch/records/sketch_comparison_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/exps/sketch/records/sketch_comparison_results.csv -------------------------------------------------------------------------------- /fastsketchlsh_ext/CmakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/CmakeLists.txt -------------------------------------------------------------------------------- /fastsketchlsh_ext/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/LICENSE -------------------------------------------------------------------------------- /fastsketchlsh_ext/LSH_PLAN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/LSH_PLAN.md -------------------------------------------------------------------------------- /fastsketchlsh_ext/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/MANIFEST.in -------------------------------------------------------------------------------- /fastsketchlsh_ext/cpp/LSH.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/cpp/LSH.cpp -------------------------------------------------------------------------------- /fastsketchlsh_ext/cpp/fasthash_deprecated.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/cpp/fasthash_deprecated.cpp -------------------------------------------------------------------------------- /fastsketchlsh_ext/cpp/fastsketch.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/cpp/fastsketch.cpp -------------------------------------------------------------------------------- /fastsketchlsh_ext/cpp/init.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/cpp/init.cpp -------------------------------------------------------------------------------- /fastsketchlsh_ext/cpp/murmurhash3.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/cpp/murmurhash3.cpp -------------------------------------------------------------------------------- /fastsketchlsh_ext/cpp/rminhash.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/cpp/rminhash.cpp -------------------------------------------------------------------------------- /fastsketchlsh_ext/include/LSH.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/include/LSH.h -------------------------------------------------------------------------------- /fastsketchlsh_ext/include/ankerl/unordered_dense.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/include/ankerl/unordered_dense.h -------------------------------------------------------------------------------- /fastsketchlsh_ext/include/fasthash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/include/fasthash.h -------------------------------------------------------------------------------- /fastsketchlsh_ext/include/fastsketch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/include/fastsketch.h -------------------------------------------------------------------------------- /fastsketchlsh_ext/include/murmurhash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/include/murmurhash.h -------------------------------------------------------------------------------- /fastsketchlsh_ext/include/rminhash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/include/rminhash.h -------------------------------------------------------------------------------- /fastsketchlsh_ext/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/pyproject.toml -------------------------------------------------------------------------------- /fastsketchlsh_ext/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/setup.py -------------------------------------------------------------------------------- /fastsketchlsh_ext/test/test_rminhash_vs_fasthash_performance.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/fastsketchlsh_ext/test/test_rminhash_vs_fasthash_performance.cpp -------------------------------------------------------------------------------- /prototype/simulation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/README.md -------------------------------------------------------------------------------- /prototype/simulation/display_jaccard_estimate_histograms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/display_jaccard_estimate_histograms.py -------------------------------------------------------------------------------- /prototype/simulation/display_lsh_probdist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/display_lsh_probdist.py -------------------------------------------------------------------------------- /prototype/simulation/figures/combined_fast_and_kmins_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/figures/combined_fast_and_kmins_hist.png -------------------------------------------------------------------------------- /prototype/simulation/figures/kmins_vs_fastsketch_in_lsh_probdist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/figures/kmins_vs_fastsketch_in_lsh_probdist.png -------------------------------------------------------------------------------- /prototype/simulation/figures/kmins_vs_fastsketch_probdist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/figures/kmins_vs_fastsketch_probdist.png -------------------------------------------------------------------------------- /prototype/simulation/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/simulation/util.py -------------------------------------------------------------------------------- /prototype/src/cmins_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/src/cmins_sketch.py -------------------------------------------------------------------------------- /prototype/src/datasketch_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/src/datasketch_sketch.py -------------------------------------------------------------------------------- /prototype/src/fast_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/src/fast_sketch.py -------------------------------------------------------------------------------- /prototype/src/fast_sketch_lsh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/src/fast_sketch_lsh.py -------------------------------------------------------------------------------- /prototype/src/kmins_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/src/kmins_sketch.py -------------------------------------------------------------------------------- /prototype/src/rmins_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/src/rmins_sketch.py -------------------------------------------------------------------------------- /prototype/test/test_fast_sketch_lsh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/prototype/test/test_fast_sketch_lsh.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/requirements.txt -------------------------------------------------------------------------------- /test/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/test/test_accuracy.py -------------------------------------------------------------------------------- /test/test_fast_sketch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/test/test_fast_sketch.py -------------------------------------------------------------------------------- /test/test_lsh_dedup_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pzcddm/FastSketchLSH/HEAD/test/test_lsh_dedup_comparison.py --------------------------------------------------------------------------------