├── LICENSE ├── README.md ├── chapter01 ├── 1.batch.py ├── 2.real_time_streaming.py ├── 3.semi_real_time.py ├── 4.work_with_queue.py ├── 5.sql_databases.py ├── 6.no_sql_databases.py └── 7.api.py ├── chapter02 ├── accuracy.py ├── average_timeliness.py ├── completeness.py ├── consistency.py ├── data_compliance.py ├── data_usage.py ├── duplication.py ├── timeliness.py └── uniqueness.py ├── chapter03 ├── great_expectations │ ├── code │ │ ├── 1.data_set_up.py │ │ ├── 2.mock_test_dataset.py │ │ └── 3.with_pandas_profiler.py │ └── great_expectations │ │ ├── checkpoints │ │ └── expect_iris_ckpnt.yml │ │ ├── expectations │ │ └── expect_iris.json │ │ ├── great_expectations.yml │ │ ├── plugins │ │ └── custom_data_docs │ │ │ └── styles │ │ │ └── data_docs_custom_styles.css │ │ └── uncommitted │ │ └── data_docs │ │ └── local_site │ │ ├── expectations │ │ └── expect_iris.html │ │ ├── index.html │ │ └── static │ │ ├── fonts │ │ └── HKGrotesk │ │ │ ├── HKGrotesk-Italic.otf │ │ │ ├── HKGrotesk-LightItalic.otf │ │ │ ├── HKGrotesk-MediumItalic.otf │ │ │ └── HKGrotesk-SemiBoldItalic.otf │ │ ├── images │ │ ├── favicon.ico │ │ ├── glossary_scroller.gif │ │ ├── iterative-dev-loop.png │ │ ├── logo-long-vector.svg │ │ ├── logo-long.png │ │ ├── short-logo-vector.svg │ │ ├── short-logo.png │ │ └── validation_failed_unexpected_values.gif │ │ └── styles │ │ ├── data_docs_custom_styles_template.css │ │ └── data_docs_default_styles.css ├── intoduction │ └── identify_trends.py └── pandas_profiling │ ├── data_profile_report.html │ ├── pandas_profiler.ipynb │ └── pandas_profiler.json ├── chapter04 ├── 1.descriptive_stats.py ├── 2.rename_columns.py ├── 3.dropping_columns.py ├── 4.data_types.py ├── 5.date_time.py ├── 6.format_date.py ├── 7.extract_datetime_components.py ├── 8.time_deltas.py └── 9.time_zones.py ├── chapter05 ├── 1.use_case.py ├── 2.inner_join.py ├── 3.outer_merge.py ├── 4.right_merge.py ├── 5.left_merge.py ├── 6a.manage_duplicates.py ├── 6b.manage_duplicates_validate.py ├── 6c.merge_and_aggregate.py ├── 6d.dmanage_duplicates_concatenation.py ├── 7a.managed_duplicated_columns.py ├── 7b.drop_columns_merge.py ├── 7c.use_keys_merge.py ├── 8a.perfomance_benchmark_set_index.py ├── 8b.performance_benchmark_sort_indexes.py ├── 8c.performance_benchmark_memory.py ├── 9a.concatenate_row_wise.py ├── 9b.reset_index.py └── 9c.concatenate_column_wise.py ├── chapter06 ├── 1.use_case.py ├── 2.groupby_full_example.py ├── 3.apply_axis0.py ├── 4.apply_axis1.py ├── 5.simple_filtering.py └── 6.advanced_filtering.py ├── chapter07 ├── 1.postgressql.py ├── 2.pymongo.py ├── 3.pymongo_expand.py ├── 4a.kafka_producer.py ├── 4b.kafka_consumer.py ├── 5.time_based_partitioning.py ├── 6.geo_partitioning.py ├── 7.hybrid_partitioning.py ├── __pycache__ │ └── pymongo.cpython-312.pyc ├── setup │ ├── cleanup_script.sh │ ├── docker-compose.yml │ └── setup_postgres.sh ├── template_aws_s3.py └── template_bigquery.py ├── chapter08 ├── 1.detect_missing_data.py ├── 10.winsorizing.py ├── 11.data_transformation.py ├── 12.mahalanobis_distance.py ├── 13.clustering.py ├── 14.multivariate_trimming.py ├── 2.delete_missing_data.py ├── 3.mean_imputation.py ├── 4.median_imputation.py ├── 5.indicator_imputation.py ├── 6.outliers_visualisation.py ├── 7.identify_univariate_outliers.py ├── 8.handle_univariate_outliers_deletions.py └── 9.trimming.py ├── chapter09 ├── min_max_scaling.py ├── robust_scaler.py └── zscaler.py ├── chapter10 ├── 1a.label_encoding.py ├── 1b.label_encoding_forced.py ├── 2.one_hot_encoding.py ├── 3.target_encoding.py ├── 4.frequency_encoding.py └── 5.binary_encoding.py ├── chapter11 ├── 1.decomposing_time_series │ ├── noise.py │ ├── seasonality.py │ └── trend.py ├── 2.types │ ├── multivariate.py │ └── univariate.py ├── 3.missing_values │ ├── 1.identify_missing_values.py │ ├── 2.remove_missing_values.py │ ├── 3.back_forward_fill.py │ └── 4.interpolation.py ├── 4.analisis │ └── autocorrelation.py ├── 5.outliers │ ├── 1.seasonal_decomposition.py │ ├── 2.autocorrelation.py │ ├── 3.arima.py │ └── 4.moving_average.py └── 6.feature_engineering │ ├── 1.lags.py │ └── 2.seasonal_differencing.py ├── chapter12 ├── 1.text_cleaning.py ├── 10.word_tokenisation.py ├── 11.bpe_tokeniser.py ├── 12.tokenisation_wordpiece.py ├── 13.specialised_tokenisers.py ├── 14.embedding_bert.py ├── 15.embedding_bge.py ├── 16.embedding_gte.py ├── 2.punctuation.py ├── 3.pii_detection.py ├── 4.rare_words.py ├── 5.spelling_checker.py ├── 6.fuzzy_matching.py ├── 7.fixed_chunking.py ├── 8.paragraph_chunking.py ├── 9.semantic_chunking.py └── 9.semantic_similarity.py └── chapter13 ├── 1.image_prerpocessing.py ├── 2.ocr.py ├── 3.ocr_with_llms.py ├── 4.image_captioning.py ├── 5.whisper.py ├── 6.emotion_detection.py ├── 7.write_highlights.py ├── audio └── 3.chain orchestrator.mp3 └── images ├── 1.png ├── 10.png ├── 11.png ├── 12.png ├── 13.png ├── 14.png ├── 15.png ├── 16.png ├── 17.png ├── 18.png ├── 19.png ├── 2.png ├── 20.png ├── 21.png ├── 22.png ├── 23.png ├── 24.png ├── 25.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── 7.png ├── 8.png └── 9.png /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/README.md -------------------------------------------------------------------------------- /chapter01/1.batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/1.batch.py -------------------------------------------------------------------------------- /chapter01/2.real_time_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/2.real_time_streaming.py -------------------------------------------------------------------------------- /chapter01/3.semi_real_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/3.semi_real_time.py -------------------------------------------------------------------------------- /chapter01/4.work_with_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/4.work_with_queue.py -------------------------------------------------------------------------------- /chapter01/5.sql_databases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/5.sql_databases.py -------------------------------------------------------------------------------- /chapter01/6.no_sql_databases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/6.no_sql_databases.py -------------------------------------------------------------------------------- /chapter01/7.api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter01/7.api.py -------------------------------------------------------------------------------- /chapter02/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/accuracy.py -------------------------------------------------------------------------------- /chapter02/average_timeliness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/average_timeliness.py -------------------------------------------------------------------------------- /chapter02/completeness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/completeness.py -------------------------------------------------------------------------------- /chapter02/consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/consistency.py -------------------------------------------------------------------------------- /chapter02/data_compliance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/data_compliance.py -------------------------------------------------------------------------------- /chapter02/data_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/data_usage.py -------------------------------------------------------------------------------- /chapter02/duplication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/duplication.py -------------------------------------------------------------------------------- /chapter02/timeliness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/timeliness.py -------------------------------------------------------------------------------- /chapter02/uniqueness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter02/uniqueness.py -------------------------------------------------------------------------------- /chapter03/great_expectations/code/1.data_set_up.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/code/1.data_set_up.py -------------------------------------------------------------------------------- /chapter03/great_expectations/code/2.mock_test_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/code/2.mock_test_dataset.py -------------------------------------------------------------------------------- /chapter03/great_expectations/code/3.with_pandas_profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/code/3.with_pandas_profiler.py -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/checkpoints/expect_iris_ckpnt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/checkpoints/expect_iris_ckpnt.yml -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/expectations/expect_iris.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/expectations/expect_iris.json -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/great_expectations.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/great_expectations.yml -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/expectations/expect_iris.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/expectations/expect_iris.html -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/index.html -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-Italic.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-Italic.otf -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-LightItalic.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-LightItalic.otf -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-MediumItalic.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-MediumItalic.otf -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-SemiBoldItalic.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/fonts/HKGrotesk/HKGrotesk-SemiBoldItalic.otf -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/favicon.ico -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/glossary_scroller.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/glossary_scroller.gif -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/iterative-dev-loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/iterative-dev-loop.png -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/logo-long-vector.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/logo-long-vector.svg -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/logo-long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/logo-long.png -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/short-logo-vector.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/short-logo-vector.svg -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/short-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/short-logo.png -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/validation_failed_unexpected_values.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/images/validation_failed_unexpected_values.gif -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/styles/data_docs_custom_styles_template.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/styles/data_docs_custom_styles_template.css -------------------------------------------------------------------------------- /chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/styles/data_docs_default_styles.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/great_expectations/great_expectations/uncommitted/data_docs/local_site/static/styles/data_docs_default_styles.css -------------------------------------------------------------------------------- /chapter03/intoduction/identify_trends.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/intoduction/identify_trends.py -------------------------------------------------------------------------------- /chapter03/pandas_profiling/data_profile_report.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/pandas_profiling/data_profile_report.html -------------------------------------------------------------------------------- /chapter03/pandas_profiling/pandas_profiler.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/pandas_profiling/pandas_profiler.ipynb -------------------------------------------------------------------------------- /chapter03/pandas_profiling/pandas_profiler.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter03/pandas_profiling/pandas_profiler.json -------------------------------------------------------------------------------- /chapter04/1.descriptive_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/1.descriptive_stats.py -------------------------------------------------------------------------------- /chapter04/2.rename_columns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/2.rename_columns.py -------------------------------------------------------------------------------- /chapter04/3.dropping_columns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/3.dropping_columns.py -------------------------------------------------------------------------------- /chapter04/4.data_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/4.data_types.py -------------------------------------------------------------------------------- /chapter04/5.date_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/5.date_time.py -------------------------------------------------------------------------------- /chapter04/6.format_date.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/6.format_date.py -------------------------------------------------------------------------------- /chapter04/7.extract_datetime_components.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/7.extract_datetime_components.py -------------------------------------------------------------------------------- /chapter04/8.time_deltas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/8.time_deltas.py -------------------------------------------------------------------------------- /chapter04/9.time_zones.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter04/9.time_zones.py -------------------------------------------------------------------------------- /chapter05/1.use_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/1.use_case.py -------------------------------------------------------------------------------- /chapter05/2.inner_join.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/2.inner_join.py -------------------------------------------------------------------------------- /chapter05/3.outer_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/3.outer_merge.py -------------------------------------------------------------------------------- /chapter05/4.right_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/4.right_merge.py -------------------------------------------------------------------------------- /chapter05/5.left_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/5.left_merge.py -------------------------------------------------------------------------------- /chapter05/6a.manage_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/6a.manage_duplicates.py -------------------------------------------------------------------------------- /chapter05/6b.manage_duplicates_validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/6b.manage_duplicates_validate.py -------------------------------------------------------------------------------- /chapter05/6c.merge_and_aggregate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/6c.merge_and_aggregate.py -------------------------------------------------------------------------------- /chapter05/6d.dmanage_duplicates_concatenation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/6d.dmanage_duplicates_concatenation.py -------------------------------------------------------------------------------- /chapter05/7a.managed_duplicated_columns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/7a.managed_duplicated_columns.py -------------------------------------------------------------------------------- /chapter05/7b.drop_columns_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/7b.drop_columns_merge.py -------------------------------------------------------------------------------- /chapter05/7c.use_keys_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/7c.use_keys_merge.py -------------------------------------------------------------------------------- /chapter05/8a.perfomance_benchmark_set_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/8a.perfomance_benchmark_set_index.py -------------------------------------------------------------------------------- /chapter05/8b.performance_benchmark_sort_indexes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/8b.performance_benchmark_sort_indexes.py -------------------------------------------------------------------------------- /chapter05/8c.performance_benchmark_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/8c.performance_benchmark_memory.py -------------------------------------------------------------------------------- /chapter05/9a.concatenate_row_wise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/9a.concatenate_row_wise.py -------------------------------------------------------------------------------- /chapter05/9b.reset_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/9b.reset_index.py -------------------------------------------------------------------------------- /chapter05/9c.concatenate_column_wise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter05/9c.concatenate_column_wise.py -------------------------------------------------------------------------------- /chapter06/1.use_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter06/1.use_case.py -------------------------------------------------------------------------------- /chapter06/2.groupby_full_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter06/2.groupby_full_example.py -------------------------------------------------------------------------------- /chapter06/3.apply_axis0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter06/3.apply_axis0.py -------------------------------------------------------------------------------- /chapter06/4.apply_axis1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter06/4.apply_axis1.py -------------------------------------------------------------------------------- /chapter06/5.simple_filtering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter06/5.simple_filtering.py -------------------------------------------------------------------------------- /chapter06/6.advanced_filtering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter06/6.advanced_filtering.py -------------------------------------------------------------------------------- /chapter07/1.postgressql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/1.postgressql.py -------------------------------------------------------------------------------- /chapter07/2.pymongo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/2.pymongo.py -------------------------------------------------------------------------------- /chapter07/3.pymongo_expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/3.pymongo_expand.py -------------------------------------------------------------------------------- /chapter07/4a.kafka_producer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/4a.kafka_producer.py -------------------------------------------------------------------------------- /chapter07/4b.kafka_consumer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/4b.kafka_consumer.py -------------------------------------------------------------------------------- /chapter07/5.time_based_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/5.time_based_partitioning.py -------------------------------------------------------------------------------- /chapter07/6.geo_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/6.geo_partitioning.py -------------------------------------------------------------------------------- /chapter07/7.hybrid_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/7.hybrid_partitioning.py -------------------------------------------------------------------------------- /chapter07/__pycache__/pymongo.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/__pycache__/pymongo.cpython-312.pyc -------------------------------------------------------------------------------- /chapter07/setup/cleanup_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/setup/cleanup_script.sh -------------------------------------------------------------------------------- /chapter07/setup/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/setup/docker-compose.yml -------------------------------------------------------------------------------- /chapter07/setup/setup_postgres.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/setup/setup_postgres.sh -------------------------------------------------------------------------------- /chapter07/template_aws_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/template_aws_s3.py -------------------------------------------------------------------------------- /chapter07/template_bigquery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter07/template_bigquery.py -------------------------------------------------------------------------------- /chapter08/1.detect_missing_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/1.detect_missing_data.py -------------------------------------------------------------------------------- /chapter08/10.winsorizing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/10.winsorizing.py -------------------------------------------------------------------------------- /chapter08/11.data_transformation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/11.data_transformation.py -------------------------------------------------------------------------------- /chapter08/12.mahalanobis_distance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/12.mahalanobis_distance.py -------------------------------------------------------------------------------- /chapter08/13.clustering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/13.clustering.py -------------------------------------------------------------------------------- /chapter08/14.multivariate_trimming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/14.multivariate_trimming.py -------------------------------------------------------------------------------- /chapter08/2.delete_missing_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/2.delete_missing_data.py -------------------------------------------------------------------------------- /chapter08/3.mean_imputation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/3.mean_imputation.py -------------------------------------------------------------------------------- /chapter08/4.median_imputation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/4.median_imputation.py -------------------------------------------------------------------------------- /chapter08/5.indicator_imputation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/5.indicator_imputation.py -------------------------------------------------------------------------------- /chapter08/6.outliers_visualisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/6.outliers_visualisation.py -------------------------------------------------------------------------------- /chapter08/7.identify_univariate_outliers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/7.identify_univariate_outliers.py -------------------------------------------------------------------------------- /chapter08/8.handle_univariate_outliers_deletions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/8.handle_univariate_outliers_deletions.py -------------------------------------------------------------------------------- /chapter08/9.trimming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter08/9.trimming.py -------------------------------------------------------------------------------- /chapter09/min_max_scaling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter09/min_max_scaling.py -------------------------------------------------------------------------------- /chapter09/robust_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter09/robust_scaler.py -------------------------------------------------------------------------------- /chapter09/zscaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter09/zscaler.py -------------------------------------------------------------------------------- /chapter10/1a.label_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter10/1a.label_encoding.py -------------------------------------------------------------------------------- /chapter10/1b.label_encoding_forced.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter10/1b.label_encoding_forced.py -------------------------------------------------------------------------------- /chapter10/2.one_hot_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter10/2.one_hot_encoding.py -------------------------------------------------------------------------------- /chapter10/3.target_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter10/3.target_encoding.py -------------------------------------------------------------------------------- /chapter10/4.frequency_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter10/4.frequency_encoding.py -------------------------------------------------------------------------------- /chapter10/5.binary_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter10/5.binary_encoding.py -------------------------------------------------------------------------------- /chapter11/1.decomposing_time_series/noise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/1.decomposing_time_series/noise.py -------------------------------------------------------------------------------- /chapter11/1.decomposing_time_series/seasonality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/1.decomposing_time_series/seasonality.py -------------------------------------------------------------------------------- /chapter11/1.decomposing_time_series/trend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/1.decomposing_time_series/trend.py -------------------------------------------------------------------------------- /chapter11/2.types/multivariate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/2.types/multivariate.py -------------------------------------------------------------------------------- /chapter11/2.types/univariate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/2.types/univariate.py -------------------------------------------------------------------------------- /chapter11/3.missing_values/1.identify_missing_values.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/3.missing_values/1.identify_missing_values.py -------------------------------------------------------------------------------- /chapter11/3.missing_values/2.remove_missing_values.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/3.missing_values/2.remove_missing_values.py -------------------------------------------------------------------------------- /chapter11/3.missing_values/3.back_forward_fill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/3.missing_values/3.back_forward_fill.py -------------------------------------------------------------------------------- /chapter11/3.missing_values/4.interpolation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/3.missing_values/4.interpolation.py -------------------------------------------------------------------------------- /chapter11/4.analisis/autocorrelation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/4.analisis/autocorrelation.py -------------------------------------------------------------------------------- /chapter11/5.outliers/1.seasonal_decomposition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/5.outliers/1.seasonal_decomposition.py -------------------------------------------------------------------------------- /chapter11/5.outliers/2.autocorrelation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/5.outliers/2.autocorrelation.py -------------------------------------------------------------------------------- /chapter11/5.outliers/3.arima.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/5.outliers/3.arima.py -------------------------------------------------------------------------------- /chapter11/5.outliers/4.moving_average.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/5.outliers/4.moving_average.py -------------------------------------------------------------------------------- /chapter11/6.feature_engineering/1.lags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/6.feature_engineering/1.lags.py -------------------------------------------------------------------------------- /chapter11/6.feature_engineering/2.seasonal_differencing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter11/6.feature_engineering/2.seasonal_differencing.py -------------------------------------------------------------------------------- /chapter12/1.text_cleaning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/1.text_cleaning.py -------------------------------------------------------------------------------- /chapter12/10.word_tokenisation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/10.word_tokenisation.py -------------------------------------------------------------------------------- /chapter12/11.bpe_tokeniser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/11.bpe_tokeniser.py -------------------------------------------------------------------------------- /chapter12/12.tokenisation_wordpiece.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/12.tokenisation_wordpiece.py -------------------------------------------------------------------------------- /chapter12/13.specialised_tokenisers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/13.specialised_tokenisers.py -------------------------------------------------------------------------------- /chapter12/14.embedding_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/14.embedding_bert.py -------------------------------------------------------------------------------- /chapter12/15.embedding_bge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/15.embedding_bge.py -------------------------------------------------------------------------------- /chapter12/16.embedding_gte.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/16.embedding_gte.py -------------------------------------------------------------------------------- /chapter12/2.punctuation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/2.punctuation.py -------------------------------------------------------------------------------- /chapter12/3.pii_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/3.pii_detection.py -------------------------------------------------------------------------------- /chapter12/4.rare_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/4.rare_words.py -------------------------------------------------------------------------------- /chapter12/5.spelling_checker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/5.spelling_checker.py -------------------------------------------------------------------------------- /chapter12/6.fuzzy_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/6.fuzzy_matching.py -------------------------------------------------------------------------------- /chapter12/7.fixed_chunking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/7.fixed_chunking.py -------------------------------------------------------------------------------- /chapter12/8.paragraph_chunking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/8.paragraph_chunking.py -------------------------------------------------------------------------------- /chapter12/9.semantic_chunking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/9.semantic_chunking.py -------------------------------------------------------------------------------- /chapter12/9.semantic_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter12/9.semantic_similarity.py -------------------------------------------------------------------------------- /chapter13/1.image_prerpocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/1.image_prerpocessing.py -------------------------------------------------------------------------------- /chapter13/2.ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/2.ocr.py -------------------------------------------------------------------------------- /chapter13/3.ocr_with_llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/3.ocr_with_llms.py -------------------------------------------------------------------------------- /chapter13/4.image_captioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/4.image_captioning.py -------------------------------------------------------------------------------- /chapter13/5.whisper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/5.whisper.py -------------------------------------------------------------------------------- /chapter13/6.emotion_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/6.emotion_detection.py -------------------------------------------------------------------------------- /chapter13/7.write_highlights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/7.write_highlights.py -------------------------------------------------------------------------------- /chapter13/audio/3.chain orchestrator.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/audio/3.chain orchestrator.mp3 -------------------------------------------------------------------------------- /chapter13/images/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/1.png -------------------------------------------------------------------------------- /chapter13/images/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/10.png -------------------------------------------------------------------------------- /chapter13/images/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/11.png -------------------------------------------------------------------------------- /chapter13/images/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/12.png -------------------------------------------------------------------------------- /chapter13/images/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/13.png -------------------------------------------------------------------------------- /chapter13/images/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/14.png -------------------------------------------------------------------------------- /chapter13/images/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/15.png -------------------------------------------------------------------------------- /chapter13/images/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/16.png -------------------------------------------------------------------------------- /chapter13/images/17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/17.png -------------------------------------------------------------------------------- /chapter13/images/18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/18.png -------------------------------------------------------------------------------- /chapter13/images/19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/19.png -------------------------------------------------------------------------------- /chapter13/images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/2.png -------------------------------------------------------------------------------- /chapter13/images/20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/20.png -------------------------------------------------------------------------------- /chapter13/images/21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/21.png -------------------------------------------------------------------------------- /chapter13/images/22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/22.png -------------------------------------------------------------------------------- /chapter13/images/23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/23.png -------------------------------------------------------------------------------- /chapter13/images/24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/24.png -------------------------------------------------------------------------------- /chapter13/images/25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/25.png -------------------------------------------------------------------------------- /chapter13/images/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/3.png -------------------------------------------------------------------------------- /chapter13/images/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/4.png -------------------------------------------------------------------------------- /chapter13/images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/5.png -------------------------------------------------------------------------------- /chapter13/images/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/6.png -------------------------------------------------------------------------------- /chapter13/images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/7.png -------------------------------------------------------------------------------- /chapter13/images/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/8.png -------------------------------------------------------------------------------- /chapter13/images/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Data-Cleaning-and-Preparation-Best-Practices/HEAD/chapter13/images/9.png --------------------------------------------------------------------------------