├── .github └── workflows │ ├── test_examples.yml │ └── test_tutorials.yml ├── .gitignore ├── README.md ├── data ├── fake_1000.csv ├── fake_1000.parquet ├── fake_1000_combined.json ├── fake_1000_labels.csv ├── fake_20000.csv ├── fake_df_l.parquet ├── fake_df_l_link_dedupe_test.parquet ├── fake_df_r.parquet ├── fake_df_r_link_dedupe_test.parquet ├── febrl │ ├── dataset3.csv │ ├── dataset4a.csv │ ├── dataset4b.csv │ └── source.txt ├── historical_figures_with_errors_50k.parquet ├── pairwise_labels_to_estimate_m.csv ├── transactions_left.parquet └── transactions_right.parquet ├── demo_settings ├── real_time_settings.json └── saved_model_from_demo.json ├── examples ├── athena │ ├── dashboards │ │ └── 50k_cluster.html │ └── deduplicate_50k_synthetic.ipynb ├── duckdb │ ├── accuracy_analysis_from_labels_column.ipynb │ ├── dashboards │ │ ├── 50k_cluster.html │ │ ├── 50k_deterministic_cluster.html │ │ └── comparison_viewer_transactions.html │ ├── deduplicate_50k_synthetic.ipynb │ ├── deterministic_dedupe.ipynb │ ├── febrl3.ipynb │ ├── febrl4.ipynb │ ├── link_only.ipynb │ ├── pairwise_labels.ipynb │ ├── quick_and_dirty_persons.ipynb │ ├── real_time_record_linkage.ipynb │ └── transactions.ipynb ├── spark │ └── deduplicate_1k_synthetic.ipynb └── sqlite │ ├── dashboards │ └── 50k_cluster.html │ └── deduplicate_50k_synthetic.ipynb ├── recreate_venv.sh ├── requirements.txt ├── runtime.txt ├── scv.html └── tutorials ├── 00_Tutorial_Introduction.ipynb ├── 01_Prerequisites.ipynb ├── 02_Exploratory_analysis.ipynb ├── 03_Blocking.ipynb ├── 04_Estimating_model_parameters.ipynb ├── 05_Predicting_results.ipynb ├── 06_Visualising_predictions.ipynb ├── 07_Quality_assurance.ipynb ├── cluster_studio.html └── scv.html /.github/workflows/test_examples.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/.github/workflows/test_examples.yml -------------------------------------------------------------------------------- /.github/workflows/test_tutorials.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/.github/workflows/test_tutorials.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/README.md -------------------------------------------------------------------------------- /data/fake_1000.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_1000.csv -------------------------------------------------------------------------------- /data/fake_1000.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_1000.parquet -------------------------------------------------------------------------------- /data/fake_1000_combined.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_1000_combined.json -------------------------------------------------------------------------------- /data/fake_1000_labels.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_1000_labels.csv -------------------------------------------------------------------------------- /data/fake_20000.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_20000.csv -------------------------------------------------------------------------------- /data/fake_df_l.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_df_l.parquet -------------------------------------------------------------------------------- /data/fake_df_l_link_dedupe_test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_df_l_link_dedupe_test.parquet -------------------------------------------------------------------------------- /data/fake_df_r.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_df_r.parquet -------------------------------------------------------------------------------- /data/fake_df_r_link_dedupe_test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/fake_df_r_link_dedupe_test.parquet -------------------------------------------------------------------------------- /data/febrl/dataset3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/febrl/dataset3.csv -------------------------------------------------------------------------------- /data/febrl/dataset4a.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/febrl/dataset4a.csv -------------------------------------------------------------------------------- /data/febrl/dataset4b.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/febrl/dataset4b.csv -------------------------------------------------------------------------------- /data/febrl/source.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/febrl/source.txt -------------------------------------------------------------------------------- /data/historical_figures_with_errors_50k.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/historical_figures_with_errors_50k.parquet -------------------------------------------------------------------------------- /data/pairwise_labels_to_estimate_m.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/pairwise_labels_to_estimate_m.csv -------------------------------------------------------------------------------- /data/transactions_left.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/transactions_left.parquet -------------------------------------------------------------------------------- /data/transactions_right.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/data/transactions_right.parquet -------------------------------------------------------------------------------- /demo_settings/real_time_settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/demo_settings/real_time_settings.json -------------------------------------------------------------------------------- /demo_settings/saved_model_from_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/demo_settings/saved_model_from_demo.json -------------------------------------------------------------------------------- /examples/athena/dashboards/50k_cluster.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/athena/dashboards/50k_cluster.html -------------------------------------------------------------------------------- /examples/athena/deduplicate_50k_synthetic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/athena/deduplicate_50k_synthetic.ipynb -------------------------------------------------------------------------------- /examples/duckdb/accuracy_analysis_from_labels_column.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/accuracy_analysis_from_labels_column.ipynb -------------------------------------------------------------------------------- /examples/duckdb/dashboards/50k_cluster.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/dashboards/50k_cluster.html -------------------------------------------------------------------------------- /examples/duckdb/dashboards/50k_deterministic_cluster.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/dashboards/50k_deterministic_cluster.html -------------------------------------------------------------------------------- /examples/duckdb/dashboards/comparison_viewer_transactions.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/dashboards/comparison_viewer_transactions.html -------------------------------------------------------------------------------- /examples/duckdb/deduplicate_50k_synthetic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/deduplicate_50k_synthetic.ipynb -------------------------------------------------------------------------------- /examples/duckdb/deterministic_dedupe.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/deterministic_dedupe.ipynb -------------------------------------------------------------------------------- /examples/duckdb/febrl3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/febrl3.ipynb -------------------------------------------------------------------------------- /examples/duckdb/febrl4.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/febrl4.ipynb -------------------------------------------------------------------------------- /examples/duckdb/link_only.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/link_only.ipynb -------------------------------------------------------------------------------- /examples/duckdb/pairwise_labels.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/pairwise_labels.ipynb -------------------------------------------------------------------------------- /examples/duckdb/quick_and_dirty_persons.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/quick_and_dirty_persons.ipynb -------------------------------------------------------------------------------- /examples/duckdb/real_time_record_linkage.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/real_time_record_linkage.ipynb -------------------------------------------------------------------------------- /examples/duckdb/transactions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/duckdb/transactions.ipynb -------------------------------------------------------------------------------- /examples/spark/deduplicate_1k_synthetic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/spark/deduplicate_1k_synthetic.ipynb -------------------------------------------------------------------------------- /examples/sqlite/dashboards/50k_cluster.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/sqlite/dashboards/50k_cluster.html -------------------------------------------------------------------------------- /examples/sqlite/deduplicate_50k_synthetic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/examples/sqlite/deduplicate_50k_synthetic.ipynb -------------------------------------------------------------------------------- /recreate_venv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/recreate_venv.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/requirements.txt -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.10 -------------------------------------------------------------------------------- /scv.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/scv.html -------------------------------------------------------------------------------- /tutorials/00_Tutorial_Introduction.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/00_Tutorial_Introduction.ipynb -------------------------------------------------------------------------------- /tutorials/01_Prerequisites.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/01_Prerequisites.ipynb -------------------------------------------------------------------------------- /tutorials/02_Exploratory_analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/02_Exploratory_analysis.ipynb -------------------------------------------------------------------------------- /tutorials/03_Blocking.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/03_Blocking.ipynb -------------------------------------------------------------------------------- /tutorials/04_Estimating_model_parameters.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/04_Estimating_model_parameters.ipynb -------------------------------------------------------------------------------- /tutorials/05_Predicting_results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/05_Predicting_results.ipynb -------------------------------------------------------------------------------- /tutorials/06_Visualising_predictions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/06_Visualising_predictions.ipynb -------------------------------------------------------------------------------- /tutorials/07_Quality_assurance.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/07_Quality_assurance.ipynb -------------------------------------------------------------------------------- /tutorials/cluster_studio.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/cluster_studio.html -------------------------------------------------------------------------------- /tutorials/scv.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/splink_demos/HEAD/tutorials/scv.html --------------------------------------------------------------------------------