├── .adr-dir ├── .github └── workflows │ ├── accuracy_from_labels_dataset.yml │ ├── edge_cases_github_comment.yml │ ├── pypi-release.yaml │ ├── run_address_test.py │ └── run_pytest.yml ├── .gitignore ├── AGENTS.md ├── _LICENCE.md ├── benchmarking ├── .config.json ├── analysis │ ├── README.md │ ├── __init__.py │ ├── accuracy.py │ ├── mismatches.py │ └── reporting.py ├── datasets │ ├── README.md │ ├── __init__.py │ ├── hackney_council.py │ ├── lambeth_council.py │ ├── registry.py │ └── sources.py ├── exact_match_benchmarking.py ├── full_e2e_benchmarking.py ├── individual_stage_runner.py └── utils │ ├── io.py │ ├── pipelines.py │ └── timing.py ├── docs └── adr │ ├── 0001-duckdb.md │ ├── 0002-address-strings-instead-of-tokens.md │ ├── 0003-trie-matching.md │ └── README.md ├── example_data ├── companies_house_addresess_postcode_overlap.parquet ├── companies_house_data_licence.md ├── fhrs_addresses_sample.parquet ├── fhrs_data_licence.md ├── numeric_token_tf_table.parquet ├── open_streetmap_licence.md └── rel_tok_freq.parquet ├── examples ├── example_matching.py ├── fhrs │ ├── download_fhrs_data.py │ └── match_fhrs_to_os.py ├── match_epc_to_os.py ├── match_one.py └── preclean_full_os.py ├── general_context.md ├── interactive_comparison.ipynb ├── match_example_data.ipynb ├── pyproject.toml ├── readme.md ├── scripts ├── accuracy_from_labels.py ├── analyse_test_cases.py ├── epc_accuracy_from_labels.py ├── generate_better_tfs.py ├── generate_common_end_tokens.py ├── improve_parameters_using_gradient_descent.py ├── improve_parameters_using_gradient_descent_non_spsa.py └── run_training.py ├── shell ├── bump_release_version.sh └── test_release_locally.sh ├── tests ├── __init__.py ├── cleaning │ ├── test_abbreviation_replacement.py │ ├── test_cleaning_steps.py │ └── test_ukam_address_id.py ├── conftest.py ├── edge_case_addresses.yaml ├── post_linkage │ └── test_match_candidate_selection.py ├── sql_pipeline │ ├── test_chunking.py │ ├── test_multiple_inputs.py │ ├── test_runner_show_plan.py │ ├── test_sql_pipeline.py │ └── test_updated_stage_decorators.py ├── test_accuracy_from_labels.py ├── test_bigrams.py ├── test_data │ ├── address_to_match.csv │ ├── epc_fake.csv │ ├── fhrs_fake.csv │ ├── one_clean_exact_matching_row_downing_street.parquet │ ├── one_clean_row_downing_street.parquet │ └── os_fake.csv ├── test_edge_cases.py ├── test_exact_matching.py ├── test_full_examples.py ├── test_linker.py ├── test_post_linkage_metrics.py ├── test_source_dataset.py ├── utils.py └── validation │ └── test_pyrelation_validation.py ├── uk_address_matcher ├── __init__.py ├── cleaning │ ├── README.md │ ├── chunking_strategies.py │ ├── pipelines.py │ └── steps │ │ ├── __init__.py │ │ ├── normalisation.py │ │ ├── regexes.py │ │ ├── term_frequencies.py │ │ ├── token_parsing.py │ │ └── tokenisation.py ├── data │ ├── address_abbreviations.json │ ├── address_token_frequencies.parquet │ ├── common_end_tokens.csv │ ├── numeric_token_frequencies.parquet │ └── splink_model.json ├── linking_model │ ├── blocking.py │ ├── exact_matching │ │ ├── __init__.py │ │ ├── annotate_exact_matches.py │ │ ├── input_filters.py │ │ ├── matching_stages.py │ │ └── resolve_with_trigrams.py │ ├── splink_model.py │ └── training.py ├── post_linkage │ ├── accuracy_from_labels.py │ ├── analyse_results.py │ ├── identify_distinguishing_tokens.py │ └── match_candidate_selection.py └── sql_pipeline │ ├── helpers.py │ ├── match_reasons.py │ ├── runner.py │ ├── steps.py │ └── validation.py └── uv.lock /.adr-dir: -------------------------------------------------------------------------------- 1 | docs/adr -------------------------------------------------------------------------------- /.github/workflows/accuracy_from_labels_dataset.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/.github/workflows/accuracy_from_labels_dataset.yml -------------------------------------------------------------------------------- /.github/workflows/edge_cases_github_comment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/.github/workflows/edge_cases_github_comment.yml -------------------------------------------------------------------------------- /.github/workflows/pypi-release.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/.github/workflows/pypi-release.yaml -------------------------------------------------------------------------------- /.github/workflows/run_address_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/.github/workflows/run_address_test.py -------------------------------------------------------------------------------- /.github/workflows/run_pytest.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/.github/workflows/run_pytest.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/.gitignore -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/AGENTS.md -------------------------------------------------------------------------------- /_LICENCE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/_LICENCE.md -------------------------------------------------------------------------------- /benchmarking/.config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/.config.json -------------------------------------------------------------------------------- /benchmarking/analysis/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/analysis/README.md -------------------------------------------------------------------------------- /benchmarking/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/analysis/__init__.py -------------------------------------------------------------------------------- /benchmarking/analysis/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/analysis/accuracy.py -------------------------------------------------------------------------------- /benchmarking/analysis/mismatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/analysis/mismatches.py -------------------------------------------------------------------------------- /benchmarking/analysis/reporting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/analysis/reporting.py -------------------------------------------------------------------------------- /benchmarking/datasets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/datasets/README.md -------------------------------------------------------------------------------- /benchmarking/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/datasets/__init__.py -------------------------------------------------------------------------------- /benchmarking/datasets/hackney_council.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/datasets/hackney_council.py -------------------------------------------------------------------------------- /benchmarking/datasets/lambeth_council.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/datasets/lambeth_council.py -------------------------------------------------------------------------------- /benchmarking/datasets/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/datasets/registry.py -------------------------------------------------------------------------------- /benchmarking/datasets/sources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/datasets/sources.py -------------------------------------------------------------------------------- /benchmarking/exact_match_benchmarking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/exact_match_benchmarking.py -------------------------------------------------------------------------------- /benchmarking/full_e2e_benchmarking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/full_e2e_benchmarking.py -------------------------------------------------------------------------------- /benchmarking/individual_stage_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/individual_stage_runner.py -------------------------------------------------------------------------------- /benchmarking/utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/utils/io.py -------------------------------------------------------------------------------- /benchmarking/utils/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/utils/pipelines.py -------------------------------------------------------------------------------- /benchmarking/utils/timing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/benchmarking/utils/timing.py -------------------------------------------------------------------------------- /docs/adr/0001-duckdb.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/docs/adr/0001-duckdb.md -------------------------------------------------------------------------------- /docs/adr/0002-address-strings-instead-of-tokens.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/docs/adr/0002-address-strings-instead-of-tokens.md -------------------------------------------------------------------------------- /docs/adr/0003-trie-matching.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/docs/adr/0003-trie-matching.md -------------------------------------------------------------------------------- /docs/adr/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/docs/adr/README.md -------------------------------------------------------------------------------- /example_data/companies_house_addresess_postcode_overlap.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/example_data/companies_house_addresess_postcode_overlap.parquet -------------------------------------------------------------------------------- /example_data/companies_house_data_licence.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/example_data/companies_house_data_licence.md -------------------------------------------------------------------------------- /example_data/fhrs_addresses_sample.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/example_data/fhrs_addresses_sample.parquet -------------------------------------------------------------------------------- /example_data/fhrs_data_licence.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/example_data/fhrs_data_licence.md -------------------------------------------------------------------------------- /example_data/numeric_token_tf_table.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/example_data/numeric_token_tf_table.parquet -------------------------------------------------------------------------------- /example_data/open_streetmap_licence.md: -------------------------------------------------------------------------------- 1 | See https://www.openstreetmap.org/copyright -------------------------------------------------------------------------------- /example_data/rel_tok_freq.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/example_data/rel_tok_freq.parquet -------------------------------------------------------------------------------- /examples/example_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/examples/example_matching.py -------------------------------------------------------------------------------- /examples/fhrs/download_fhrs_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/examples/fhrs/download_fhrs_data.py -------------------------------------------------------------------------------- /examples/fhrs/match_fhrs_to_os.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/examples/fhrs/match_fhrs_to_os.py -------------------------------------------------------------------------------- /examples/match_epc_to_os.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/examples/match_epc_to_os.py -------------------------------------------------------------------------------- /examples/match_one.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/examples/match_one.py -------------------------------------------------------------------------------- /examples/preclean_full_os.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/examples/preclean_full_os.py -------------------------------------------------------------------------------- /general_context.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/general_context.md -------------------------------------------------------------------------------- /interactive_comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/interactive_comparison.ipynb -------------------------------------------------------------------------------- /match_example_data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/match_example_data.ipynb -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/pyproject.toml -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/readme.md -------------------------------------------------------------------------------- /scripts/accuracy_from_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/accuracy_from_labels.py -------------------------------------------------------------------------------- /scripts/analyse_test_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/analyse_test_cases.py -------------------------------------------------------------------------------- /scripts/epc_accuracy_from_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/epc_accuracy_from_labels.py -------------------------------------------------------------------------------- /scripts/generate_better_tfs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/generate_better_tfs.py -------------------------------------------------------------------------------- /scripts/generate_common_end_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/generate_common_end_tokens.py -------------------------------------------------------------------------------- /scripts/improve_parameters_using_gradient_descent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/improve_parameters_using_gradient_descent.py -------------------------------------------------------------------------------- /scripts/improve_parameters_using_gradient_descent_non_spsa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/improve_parameters_using_gradient_descent_non_spsa.py -------------------------------------------------------------------------------- /scripts/run_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/scripts/run_training.py -------------------------------------------------------------------------------- /shell/bump_release_version.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/shell/bump_release_version.sh -------------------------------------------------------------------------------- /shell/test_release_locally.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/shell/test_release_locally.sh -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cleaning/test_abbreviation_replacement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/cleaning/test_abbreviation_replacement.py -------------------------------------------------------------------------------- /tests/cleaning/test_cleaning_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/cleaning/test_cleaning_steps.py -------------------------------------------------------------------------------- /tests/cleaning/test_ukam_address_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/cleaning/test_ukam_address_id.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/edge_case_addresses.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/edge_case_addresses.yaml -------------------------------------------------------------------------------- /tests/post_linkage/test_match_candidate_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/post_linkage/test_match_candidate_selection.py -------------------------------------------------------------------------------- /tests/sql_pipeline/test_chunking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/sql_pipeline/test_chunking.py -------------------------------------------------------------------------------- /tests/sql_pipeline/test_multiple_inputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/sql_pipeline/test_multiple_inputs.py -------------------------------------------------------------------------------- /tests/sql_pipeline/test_runner_show_plan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/sql_pipeline/test_runner_show_plan.py -------------------------------------------------------------------------------- /tests/sql_pipeline/test_sql_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/sql_pipeline/test_sql_pipeline.py -------------------------------------------------------------------------------- /tests/sql_pipeline/test_updated_stage_decorators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/sql_pipeline/test_updated_stage_decorators.py -------------------------------------------------------------------------------- /tests/test_accuracy_from_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_accuracy_from_labels.py -------------------------------------------------------------------------------- /tests/test_bigrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_bigrams.py -------------------------------------------------------------------------------- /tests/test_data/address_to_match.csv: -------------------------------------------------------------------------------- 1 | unique_id,address_concat,postcode 2 | 1,10 DOWNING STREET WESTMINSTER LONDON,SW1A 2AA -------------------------------------------------------------------------------- /tests/test_data/epc_fake.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_data/epc_fake.csv -------------------------------------------------------------------------------- /tests/test_data/fhrs_fake.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_data/fhrs_fake.csv -------------------------------------------------------------------------------- /tests/test_data/one_clean_exact_matching_row_downing_street.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_data/one_clean_exact_matching_row_downing_street.parquet -------------------------------------------------------------------------------- /tests/test_data/one_clean_row_downing_street.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_data/one_clean_row_downing_street.parquet -------------------------------------------------------------------------------- /tests/test_data/os_fake.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_data/os_fake.csv -------------------------------------------------------------------------------- /tests/test_edge_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_edge_cases.py -------------------------------------------------------------------------------- /tests/test_exact_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_exact_matching.py -------------------------------------------------------------------------------- /tests/test_full_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_full_examples.py -------------------------------------------------------------------------------- /tests/test_linker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_linker.py -------------------------------------------------------------------------------- /tests/test_post_linkage_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_post_linkage_metrics.py -------------------------------------------------------------------------------- /tests/test_source_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/test_source_dataset.py -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/utils.py -------------------------------------------------------------------------------- /tests/validation/test_pyrelation_validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/tests/validation/test_pyrelation_validation.py -------------------------------------------------------------------------------- /uk_address_matcher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/__init__.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/README.md -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/chunking_strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/chunking_strategies.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/pipelines.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/steps/__init__.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/steps/normalisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/steps/normalisation.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/steps/regexes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/steps/regexes.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/steps/term_frequencies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/steps/term_frequencies.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/steps/token_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/steps/token_parsing.py -------------------------------------------------------------------------------- /uk_address_matcher/cleaning/steps/tokenisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/cleaning/steps/tokenisation.py -------------------------------------------------------------------------------- /uk_address_matcher/data/address_abbreviations.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/data/address_abbreviations.json -------------------------------------------------------------------------------- /uk_address_matcher/data/address_token_frequencies.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/data/address_token_frequencies.parquet -------------------------------------------------------------------------------- /uk_address_matcher/data/common_end_tokens.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/data/common_end_tokens.csv -------------------------------------------------------------------------------- /uk_address_matcher/data/numeric_token_frequencies.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/data/numeric_token_frequencies.parquet -------------------------------------------------------------------------------- /uk_address_matcher/data/splink_model.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/data/splink_model.json -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/blocking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/blocking.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/exact_matching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/exact_matching/__init__.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/exact_matching/annotate_exact_matches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/exact_matching/annotate_exact_matches.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/exact_matching/input_filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/exact_matching/input_filters.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/exact_matching/matching_stages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/exact_matching/matching_stages.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/exact_matching/resolve_with_trigrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/exact_matching/resolve_with_trigrams.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/splink_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/splink_model.py -------------------------------------------------------------------------------- /uk_address_matcher/linking_model/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/linking_model/training.py -------------------------------------------------------------------------------- /uk_address_matcher/post_linkage/accuracy_from_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/post_linkage/accuracy_from_labels.py -------------------------------------------------------------------------------- /uk_address_matcher/post_linkage/analyse_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/post_linkage/analyse_results.py -------------------------------------------------------------------------------- /uk_address_matcher/post_linkage/identify_distinguishing_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/post_linkage/identify_distinguishing_tokens.py -------------------------------------------------------------------------------- /uk_address_matcher/post_linkage/match_candidate_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/post_linkage/match_candidate_selection.py -------------------------------------------------------------------------------- /uk_address_matcher/sql_pipeline/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/sql_pipeline/helpers.py -------------------------------------------------------------------------------- /uk_address_matcher/sql_pipeline/match_reasons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/sql_pipeline/match_reasons.py -------------------------------------------------------------------------------- /uk_address_matcher/sql_pipeline/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/sql_pipeline/runner.py -------------------------------------------------------------------------------- /uk_address_matcher/sql_pipeline/steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/sql_pipeline/steps.py -------------------------------------------------------------------------------- /uk_address_matcher/sql_pipeline/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uk_address_matcher/sql_pipeline/validation.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moj-analytical-services/uk_address_matcher/HEAD/uv.lock --------------------------------------------------------------------------------