├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── config ├── config-debug.toml ├── config-full_run.toml ├── config-open_data-full.toml ├── debug-open_data.toml ├── evaluation │ ├── aggregation │ │ ├── config-aggr-yadl.toml │ │ ├── config-open_data-aggr.toml │ │ └── config-open_data-schools-aggr.toml │ └── general │ │ ├── config-base.toml │ │ ├── config-binary.toml │ │ ├── config-open_data-regression.toml │ │ ├── config-open_data-schools.toml │ │ ├── config-starmie.toml │ │ ├── config-vldb-new_tables-open_data.toml │ │ └── config-vldb-new_tables-starmie.toml ├── required_configurations │ ├── config-aggregation_comparison.json │ ├── config-generic_comparison.json │ ├── config-retrieval_comparison.json │ ├── open_data_us │ │ ├── required_aggregation.json │ │ ├── required_aggregation_nn.json │ │ ├── required_general.json │ │ └── required_general_nn.json │ └── yadl │ │ ├── required_aggregation.json │ │ ├── required_aggregation_nn.json │ │ ├── required_general.json │ │ ├── required_general_nn.json │ │ ├── required_starmie_cpu.json │ │ └── required_starmie_nn.json └── retrieval │ ├── config-debug.toml │ ├── config-open_data-depleted.toml │ ├── config-open_data-full.toml │ ├── debug-movies.toml │ ├── prepare │ ├── prepare-debug.toml │ ├── prepare-exact_matching-binary_update.toml │ ├── prepare-exact_matching-new_tables.toml │ ├── prepare-exact_matching-open_data-new_tables.toml │ ├── prepare-exact_matching-open_data-schools.toml │ ├── prepare-exact_matching-open_data.toml │ ├── prepare-exact_matching-wordnet_10k.toml │ ├── prepare-exact_matching-wordnet_3k.toml │ ├── prepare-exact_matching-wordnet_50k.toml │ ├── prepare-exact_matching-wordnet_full-elections.toml │ ├── prepare-exact_matching-wordnet_full.toml │ ├── prepare-exact_matching-wordnet_vldb_wide.toml │ ├── prepare-minhash-binary_update-multi.toml │ ├── prepare-minhash-binary_update.toml │ ├── prepare-minhash-open_data.toml │ ├── prepare-minhash-wordnet_10k.toml │ ├── prepare-minhash-wordnet_3k.toml │ ├── prepare-minhash-wordnet_50k.toml │ ├── prepare-minhash-wordnet_full.toml │ ├── prepare-minhash-wordnet_vldb_wide.toml │ └── prepare-starmie-wordnet_10k.toml │ └── query │ ├── query-exact_matching-binary_update.toml │ ├── query-exact_matching-new_tables.toml │ ├── query-exact_matching-open_data.toml │ ├── query-exact_matching-wordnet_10k.toml │ ├── query-exact_matching-wordnet_3k.toml │ ├── query-exact_matching-wordnet_50k.toml │ ├── query-exact_matching-wordnet_full.toml │ ├── query-exact_matching.toml │ ├── query-minhash-binary_update.toml │ ├── query-minhash-open_data.toml │ ├── query-minhash-wordnet_full.toml │ ├── query-minhash-wordnet_vldb_wide.toml │ ├── query-minhash.toml │ ├── query-minhash_hybrid-binary_update.toml │ ├── query-open_data-schools.toml │ ├── query-open_data_us-new_tables.toml │ ├── query-starmie-wordnet_10k.toml │ ├── query-starmie.toml │ ├── query-wordnet_full-full_table.toml │ ├── query-wordnet_full-us_elections.toml │ └── query-wordnet_full.toml ├── data └── source_tables │ ├── batch │ ├── company-employees-yadl-depleted.parquet │ ├── company-employees-yadl.parquet │ ├── housing-prices-yadl-depleted.parquet │ ├── housing-prices-yadl.parquet │ ├── movies-vote-yadl-depleted.parquet │ ├── movies-vote-yadl.parquet │ ├── movies-yadl-depleted.parquet │ ├── movies-yadl.parquet │ ├── us-accidents-yadl-depleted.parquet │ ├── us-accidents-yadl.parquet │ ├── us-elections-yadl-depleted.parquet │ └── us-elections-yadl.parquet │ ├── movie_revenues.parquet │ ├── open_data_us │ ├── company_employees-depleted_name-open_data.parquet │ ├── company_employees-open_data.parquet │ ├── housing_prices-depleted_County-open_data.parquet │ ├── housing_prices-depleted_Metro-open_data.parquet │ ├── housing_prices-open_data.parquet │ ├── movies-depleted_title-open_data.parquet │ ├── movies-open_data.parquet │ ├── movies_large-depleted-open_data.parquet │ ├── movies_vote-depleted_title-open_data.parquet │ ├── movies_vote-open_data.parquet │ ├── schools-depleted-open_data.parquet │ ├── schools-open_data.parquet │ ├── schools.csv │ ├── us_accidents-depleted_County-open_data.parquet │ ├── us_accidents-open_data.parquet │ ├── us_accidents_2021-depleted-open_data_County.parquet │ ├── us_accidents_large-depleted-open_data_County.parquet │ ├── us_elections-depleted_county_name-open_data.parquet │ ├── us_elections-open_data.parquet │ ├── us_elections_dem-open_data-depleted.parquet │ └── us_elections_dem-open_data.parquet │ ├── us_accidents.parquet │ └── yadl │ ├── company_employees-yadl-depleted.parquet │ ├── company_employees-yadl.parquet │ ├── housing_prices-yadl-depleted.parquet │ ├── housing_prices-yadl.parquet │ ├── movies-yadl-depleted.parquet │ ├── movies-yadl.parquet │ ├── movies_large-yadl-depleted.parquet │ ├── movies_large-yadl.parquet │ ├── movies_vote-yadl-depleted.parquet │ ├── movies_vote-yadl.parquet │ ├── us_accidents-yadl-depleted.parquet │ ├── us_accidents-yadl.parquet │ ├── us_accidents_2021-yadl-depleted.parquet │ ├── us_accidents_large-yadl-depleted.parquet │ ├── us_county_population-yadl-depleted.parquet │ ├── us_elections-yadl-depleted.parquet │ ├── us_elections-yadl.parquet │ ├── us_elections_dem-yadl-depleted.parquet │ └── us_elections_dem-yadl.parquet ├── environment.yaml ├── main.py ├── main_slurm.py ├── notebooks ├── Additional plots.ipynb ├── Analyzing query results.ipynb ├── Prepare source table variants.ipynb ├── Preparing downsized tables.ipynb ├── Run cleanup.ipynb └── Stats on data lakes.ipynb ├── profile_retrieval.py ├── pyproject.toml ├── pyrightconfig.json ├── query_indices.py ├── recheck.py ├── requirements-torch.txt ├── requirements.txt ├── results_pivot.py ├── run_profile_retrieval.sh ├── scripts ├── analysis_query_results.py ├── archive_cand.py ├── batch_convert_csv_parquet.py ├── debug_plotting.py ├── evaluate_minhash_indices.py ├── evaluation │ ├── analysis_query_results.py │ ├── build_master_list.py │ ├── check_exp_content.py │ ├── handle_full_tables.py │ ├── prepare_results.py │ ├── prepare_topk_results.py │ └── summary_results.py ├── join_aggregator_metrics.py.py ├── manage_lazo.py ├── plotting │ ├── plot_all_combinations_variables.py │ ├── plot_boxplot_retrieval.py │ ├── plot_comparison_large.py │ ├── plot_containment_ranking.py │ ├── plot_containment_reg_bar.py │ ├── plot_containment_top200.py │ ├── plot_crit_diff.py │ ├── plot_pareto_comparison_with_querying.py │ ├── plot_pareto_topk.py │ ├── plot_pareto_topk_social.py │ ├── plot_performance_data_lakes.py │ ├── plot_regplot_containment.py │ ├── plot_results.py │ ├── plot_retrieval_time.py │ ├── plot_starmie_results.py │ ├── plot_time_breakdown.py │ ├── plot_topk_effect.py │ ├── plot_topk_fulljoin.py │ └── plot_tradeoffs.ipy ├── preparation │ ├── get_full_table_queries.py │ ├── import_from_starmie.py │ ├── metadata_creation.py │ ├── prepare_retrieval_methods.py │ └── readme.md ├── prepare_legend.py ├── prototype_countvectorizer.py ├── stats_base_tables.py └── stats_datalakes.py ├── src ├── __init__.py ├── data_structures │ ├── __init__.py │ ├── loggers.py │ ├── metadata.py │ └── retrieval_methods.py ├── methods │ ├── __init__.py │ ├── evaluation.py │ ├── join_selectors.py │ └── profiling.py ├── pipeline.py └── utils │ ├── constants.py │ ├── critical_difference_plot.py │ ├── indexing.py │ ├── joining.py │ ├── logging.py │ ├── notifications.py │ └── plotting.py ├── stats ├── analysis_query_results_binary_update_stats_all.csv ├── analysis_query_results_open_data_us_stats_all.csv ├── analysis_query_results_wordnet_full_stats_all.csv ├── analysis_query_results_wordnet_vldb_10_stats_all.csv ├── analysis_query_results_wordnet_vldb_50_stats_all.csv ├── avg_query_time_for_pareto_plot_all_datalakes.csv ├── avg_query_time_for_pareto_plot_retrieval.csv ├── compile_stats.py ├── formatted_retr_results.csv ├── index_sizes.csv ├── peak_ram.csv ├── results_indexing_time_cont.csv ├── stats_data_lakes.csv ├── stats_retrieval_others.csv ├── stats_retrieval_plot.csv ├── stats_retrieval_starmie.csv └── summarized_retrieval.csv ├── stats_data_lakes.csv └── uv.lock /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/README.md -------------------------------------------------------------------------------- /config/config-debug.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/config-debug.toml -------------------------------------------------------------------------------- /config/config-full_run.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/config-full_run.toml -------------------------------------------------------------------------------- /config/config-open_data-full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/config-open_data-full.toml -------------------------------------------------------------------------------- /config/debug-open_data.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/debug-open_data.toml -------------------------------------------------------------------------------- /config/evaluation/aggregation/config-aggr-yadl.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/aggregation/config-aggr-yadl.toml -------------------------------------------------------------------------------- /config/evaluation/aggregation/config-open_data-aggr.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/aggregation/config-open_data-aggr.toml -------------------------------------------------------------------------------- /config/evaluation/aggregation/config-open_data-schools-aggr.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/aggregation/config-open_data-schools-aggr.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-base.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-base.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-binary.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-binary.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-open_data-regression.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-open_data-regression.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-open_data-schools.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-open_data-schools.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-starmie.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-starmie.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-vldb-new_tables-open_data.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-vldb-new_tables-open_data.toml -------------------------------------------------------------------------------- /config/evaluation/general/config-vldb-new_tables-starmie.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/evaluation/general/config-vldb-new_tables-starmie.toml -------------------------------------------------------------------------------- /config/required_configurations/config-aggregation_comparison.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/config-aggregation_comparison.json -------------------------------------------------------------------------------- /config/required_configurations/config-generic_comparison.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/config-generic_comparison.json -------------------------------------------------------------------------------- /config/required_configurations/config-retrieval_comparison.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/config-retrieval_comparison.json -------------------------------------------------------------------------------- /config/required_configurations/open_data_us/required_aggregation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/open_data_us/required_aggregation.json -------------------------------------------------------------------------------- /config/required_configurations/open_data_us/required_aggregation_nn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/open_data_us/required_aggregation_nn.json -------------------------------------------------------------------------------- /config/required_configurations/open_data_us/required_general.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/open_data_us/required_general.json -------------------------------------------------------------------------------- /config/required_configurations/open_data_us/required_general_nn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/open_data_us/required_general_nn.json -------------------------------------------------------------------------------- /config/required_configurations/yadl/required_aggregation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/yadl/required_aggregation.json -------------------------------------------------------------------------------- /config/required_configurations/yadl/required_aggregation_nn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/yadl/required_aggregation_nn.json -------------------------------------------------------------------------------- /config/required_configurations/yadl/required_general.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/yadl/required_general.json -------------------------------------------------------------------------------- /config/required_configurations/yadl/required_general_nn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/yadl/required_general_nn.json -------------------------------------------------------------------------------- /config/required_configurations/yadl/required_starmie_cpu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/yadl/required_starmie_cpu.json -------------------------------------------------------------------------------- /config/required_configurations/yadl/required_starmie_nn.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/required_configurations/yadl/required_starmie_nn.json -------------------------------------------------------------------------------- /config/retrieval/config-debug.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/config-debug.toml -------------------------------------------------------------------------------- /config/retrieval/config-open_data-depleted.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/config-open_data-depleted.toml -------------------------------------------------------------------------------- /config/retrieval/config-open_data-full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/config-open_data-full.toml -------------------------------------------------------------------------------- /config/retrieval/debug-movies.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/debug-movies.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-debug.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-debug.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-binary_update.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-binary_update.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-new_tables.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-new_tables.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-open_data-new_tables.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-open_data-new_tables.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-open_data-schools.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-open_data-schools.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-open_data.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-open_data.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-wordnet_10k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-wordnet_10k.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-wordnet_3k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-wordnet_3k.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-wordnet_50k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-wordnet_50k.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-wordnet_full-elections.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-wordnet_full-elections.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-wordnet_full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-wordnet_full.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-exact_matching-wordnet_vldb_wide.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-exact_matching-wordnet_vldb_wide.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-binary_update-multi.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-binary_update-multi.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-binary_update.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-binary_update.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-open_data.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-open_data.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-wordnet_10k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-wordnet_10k.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-wordnet_3k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-wordnet_3k.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-wordnet_50k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-wordnet_50k.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-wordnet_full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-wordnet_full.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-minhash-wordnet_vldb_wide.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-minhash-wordnet_vldb_wide.toml -------------------------------------------------------------------------------- /config/retrieval/prepare/prepare-starmie-wordnet_10k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/prepare/prepare-starmie-wordnet_10k.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-binary_update.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-binary_update.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-new_tables.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-new_tables.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-open_data.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-open_data.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-wordnet_10k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-wordnet_10k.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-wordnet_3k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-wordnet_3k.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-wordnet_50k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-wordnet_50k.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching-wordnet_full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching-wordnet_full.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-exact_matching.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-exact_matching.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-minhash-binary_update.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-minhash-binary_update.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-minhash-open_data.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-minhash-open_data.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-minhash-wordnet_full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-minhash-wordnet_full.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-minhash-wordnet_vldb_wide.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-minhash-wordnet_vldb_wide.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-minhash.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-minhash.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-minhash_hybrid-binary_update.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-minhash_hybrid-binary_update.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-open_data-schools.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-open_data-schools.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-open_data_us-new_tables.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-open_data_us-new_tables.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-starmie-wordnet_10k.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-starmie-wordnet_10k.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-starmie.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-starmie.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-wordnet_full-full_table.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-wordnet_full-full_table.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-wordnet_full-us_elections.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-wordnet_full-us_elections.toml -------------------------------------------------------------------------------- /config/retrieval/query/query-wordnet_full.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/config/retrieval/query/query-wordnet_full.toml -------------------------------------------------------------------------------- /data/source_tables/batch/company-employees-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/company-employees-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/company-employees-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/company-employees-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/housing-prices-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/housing-prices-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/housing-prices-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/housing-prices-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/movies-vote-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/movies-vote-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/movies-vote-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/movies-vote-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/movies-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/movies-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/movies-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/movies-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/us-accidents-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/us-accidents-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/us-accidents-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/us-accidents-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/us-elections-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/us-elections-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/batch/us-elections-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/batch/us-elections-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/movie_revenues.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/movie_revenues.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/company_employees-depleted_name-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/company_employees-depleted_name-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/company_employees-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/company_employees-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/housing_prices-depleted_County-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/housing_prices-depleted_County-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/housing_prices-depleted_Metro-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/housing_prices-depleted_Metro-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/housing_prices-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/housing_prices-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/movies-depleted_title-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/movies-depleted_title-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/movies-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/movies-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/movies_large-depleted-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/movies_large-depleted-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/movies_vote-depleted_title-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/movies_vote-depleted_title-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/movies_vote-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/movies_vote-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/schools-depleted-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/schools-depleted-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/schools-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/schools-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/schools.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/schools.csv -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_accidents-depleted_County-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_accidents-depleted_County-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_accidents-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_accidents-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_accidents_2021-depleted-open_data_County.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_accidents_2021-depleted-open_data_County.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_accidents_large-depleted-open_data_County.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_accidents_large-depleted-open_data_County.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_elections-depleted_county_name-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_elections-depleted_county_name-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_elections-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_elections-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_elections_dem-open_data-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_elections_dem-open_data-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/open_data_us/us_elections_dem-open_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/open_data_us/us_elections_dem-open_data.parquet -------------------------------------------------------------------------------- /data/source_tables/us_accidents.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/us_accidents.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/company_employees-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/company_employees-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/company_employees-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/company_employees-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/housing_prices-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/housing_prices-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/housing_prices-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/housing_prices-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/movies-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/movies-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/movies-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/movies-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/movies_large-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/movies_large-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/movies_large-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/movies_large-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/movies_vote-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/movies_vote-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/movies_vote-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/movies_vote-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_accidents-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_accidents-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_accidents-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_accidents-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_accidents_2021-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_accidents_2021-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_accidents_large-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_accidents_large-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_county_population-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_county_population-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_elections-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_elections-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_elections-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_elections-yadl.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_elections_dem-yadl-depleted.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_elections_dem-yadl-depleted.parquet -------------------------------------------------------------------------------- /data/source_tables/yadl/us_elections_dem-yadl.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/data/source_tables/yadl/us_elections_dem-yadl.parquet -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/environment.yaml -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/main.py -------------------------------------------------------------------------------- /main_slurm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/main_slurm.py -------------------------------------------------------------------------------- /notebooks/Additional plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/notebooks/Additional plots.ipynb -------------------------------------------------------------------------------- /notebooks/Analyzing query results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/notebooks/Analyzing query results.ipynb -------------------------------------------------------------------------------- /notebooks/Prepare source table variants.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/notebooks/Prepare source table variants.ipynb -------------------------------------------------------------------------------- /notebooks/Preparing downsized tables.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/notebooks/Preparing downsized tables.ipynb -------------------------------------------------------------------------------- /notebooks/Run cleanup.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/notebooks/Run cleanup.ipynb -------------------------------------------------------------------------------- /notebooks/Stats on data lakes.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/notebooks/Stats on data lakes.ipynb -------------------------------------------------------------------------------- /profile_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/profile_retrieval.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/pyproject.toml -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/pyrightconfig.json -------------------------------------------------------------------------------- /query_indices.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/query_indices.py -------------------------------------------------------------------------------- /recheck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/recheck.py -------------------------------------------------------------------------------- /requirements-torch.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/requirements-torch.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/requirements.txt -------------------------------------------------------------------------------- /results_pivot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/results_pivot.py -------------------------------------------------------------------------------- /run_profile_retrieval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/run_profile_retrieval.sh -------------------------------------------------------------------------------- /scripts/analysis_query_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/analysis_query_results.py -------------------------------------------------------------------------------- /scripts/archive_cand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/archive_cand.py -------------------------------------------------------------------------------- /scripts/batch_convert_csv_parquet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/batch_convert_csv_parquet.py -------------------------------------------------------------------------------- /scripts/debug_plotting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/debug_plotting.py -------------------------------------------------------------------------------- /scripts/evaluate_minhash_indices.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluate_minhash_indices.py -------------------------------------------------------------------------------- /scripts/evaluation/analysis_query_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/analysis_query_results.py -------------------------------------------------------------------------------- /scripts/evaluation/build_master_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/build_master_list.py -------------------------------------------------------------------------------- /scripts/evaluation/check_exp_content.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/check_exp_content.py -------------------------------------------------------------------------------- /scripts/evaluation/handle_full_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/handle_full_tables.py -------------------------------------------------------------------------------- /scripts/evaluation/prepare_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/prepare_results.py -------------------------------------------------------------------------------- /scripts/evaluation/prepare_topk_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/prepare_topk_results.py -------------------------------------------------------------------------------- /scripts/evaluation/summary_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/evaluation/summary_results.py -------------------------------------------------------------------------------- /scripts/join_aggregator_metrics.py.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/join_aggregator_metrics.py.py -------------------------------------------------------------------------------- /scripts/manage_lazo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/manage_lazo.py -------------------------------------------------------------------------------- /scripts/plotting/plot_all_combinations_variables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_all_combinations_variables.py -------------------------------------------------------------------------------- /scripts/plotting/plot_boxplot_retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_boxplot_retrieval.py -------------------------------------------------------------------------------- /scripts/plotting/plot_comparison_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_comparison_large.py -------------------------------------------------------------------------------- /scripts/plotting/plot_containment_ranking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_containment_ranking.py -------------------------------------------------------------------------------- /scripts/plotting/plot_containment_reg_bar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_containment_reg_bar.py -------------------------------------------------------------------------------- /scripts/plotting/plot_containment_top200.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_containment_top200.py -------------------------------------------------------------------------------- /scripts/plotting/plot_crit_diff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_crit_diff.py -------------------------------------------------------------------------------- /scripts/plotting/plot_pareto_comparison_with_querying.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_pareto_comparison_with_querying.py -------------------------------------------------------------------------------- /scripts/plotting/plot_pareto_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_pareto_topk.py -------------------------------------------------------------------------------- /scripts/plotting/plot_pareto_topk_social.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_pareto_topk_social.py -------------------------------------------------------------------------------- /scripts/plotting/plot_performance_data_lakes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_performance_data_lakes.py -------------------------------------------------------------------------------- /scripts/plotting/plot_regplot_containment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_regplot_containment.py -------------------------------------------------------------------------------- /scripts/plotting/plot_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_results.py -------------------------------------------------------------------------------- /scripts/plotting/plot_retrieval_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_retrieval_time.py -------------------------------------------------------------------------------- /scripts/plotting/plot_starmie_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_starmie_results.py -------------------------------------------------------------------------------- /scripts/plotting/plot_time_breakdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_time_breakdown.py -------------------------------------------------------------------------------- /scripts/plotting/plot_topk_effect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_topk_effect.py -------------------------------------------------------------------------------- /scripts/plotting/plot_topk_fulljoin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_topk_fulljoin.py -------------------------------------------------------------------------------- /scripts/plotting/plot_tradeoffs.ipy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/plotting/plot_tradeoffs.ipy -------------------------------------------------------------------------------- /scripts/preparation/get_full_table_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/preparation/get_full_table_queries.py -------------------------------------------------------------------------------- /scripts/preparation/import_from_starmie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/preparation/import_from_starmie.py -------------------------------------------------------------------------------- /scripts/preparation/metadata_creation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/preparation/metadata_creation.py -------------------------------------------------------------------------------- /scripts/preparation/prepare_retrieval_methods.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/preparation/prepare_retrieval_methods.py -------------------------------------------------------------------------------- /scripts/preparation/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/preparation/readme.md -------------------------------------------------------------------------------- /scripts/prepare_legend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/prepare_legend.py -------------------------------------------------------------------------------- /scripts/prototype_countvectorizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/prototype_countvectorizer.py -------------------------------------------------------------------------------- /scripts/stats_base_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/stats_base_tables.py -------------------------------------------------------------------------------- /scripts/stats_datalakes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/scripts/stats_datalakes.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data_structures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data_structures/loggers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/data_structures/loggers.py -------------------------------------------------------------------------------- /src/data_structures/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/data_structures/metadata.py -------------------------------------------------------------------------------- /src/data_structures/retrieval_methods.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/data_structures/retrieval_methods.py -------------------------------------------------------------------------------- /src/methods/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/methods/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/methods/evaluation.py -------------------------------------------------------------------------------- /src/methods/join_selectors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/methods/join_selectors.py -------------------------------------------------------------------------------- /src/methods/profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/methods/profiling.py -------------------------------------------------------------------------------- /src/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/pipeline.py -------------------------------------------------------------------------------- /src/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/constants.py -------------------------------------------------------------------------------- /src/utils/critical_difference_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/critical_difference_plot.py -------------------------------------------------------------------------------- /src/utils/indexing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/indexing.py -------------------------------------------------------------------------------- /src/utils/joining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/joining.py -------------------------------------------------------------------------------- /src/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/logging.py -------------------------------------------------------------------------------- /src/utils/notifications.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/notifications.py -------------------------------------------------------------------------------- /src/utils/plotting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/src/utils/plotting.py -------------------------------------------------------------------------------- /stats/analysis_query_results_binary_update_stats_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/analysis_query_results_binary_update_stats_all.csv -------------------------------------------------------------------------------- /stats/analysis_query_results_open_data_us_stats_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/analysis_query_results_open_data_us_stats_all.csv -------------------------------------------------------------------------------- /stats/analysis_query_results_wordnet_full_stats_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/analysis_query_results_wordnet_full_stats_all.csv -------------------------------------------------------------------------------- /stats/analysis_query_results_wordnet_vldb_10_stats_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/analysis_query_results_wordnet_vldb_10_stats_all.csv -------------------------------------------------------------------------------- /stats/analysis_query_results_wordnet_vldb_50_stats_all.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/analysis_query_results_wordnet_vldb_50_stats_all.csv -------------------------------------------------------------------------------- /stats/avg_query_time_for_pareto_plot_all_datalakes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/avg_query_time_for_pareto_plot_all_datalakes.csv -------------------------------------------------------------------------------- /stats/avg_query_time_for_pareto_plot_retrieval.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/avg_query_time_for_pareto_plot_retrieval.csv -------------------------------------------------------------------------------- /stats/compile_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/compile_stats.py -------------------------------------------------------------------------------- /stats/formatted_retr_results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/formatted_retr_results.csv -------------------------------------------------------------------------------- /stats/index_sizes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/index_sizes.csv -------------------------------------------------------------------------------- /stats/peak_ram.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/peak_ram.csv -------------------------------------------------------------------------------- /stats/results_indexing_time_cont.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/results_indexing_time_cont.csv -------------------------------------------------------------------------------- /stats/stats_data_lakes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/stats_data_lakes.csv -------------------------------------------------------------------------------- /stats/stats_retrieval_others.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/stats_retrieval_others.csv -------------------------------------------------------------------------------- /stats/stats_retrieval_plot.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/stats_retrieval_plot.csv -------------------------------------------------------------------------------- /stats/stats_retrieval_starmie.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/stats_retrieval_starmie.csv -------------------------------------------------------------------------------- /stats/summarized_retrieval.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats/summarized_retrieval.csv -------------------------------------------------------------------------------- /stats_data_lakes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/stats_data_lakes.csv -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soda-inria/retrieve-merge-predict/HEAD/uv.lock --------------------------------------------------------------------------------