├── .circleci
    └── config.yml
├── .coveragerc
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── docs.md
    │   ├── feature_request.md
    │   └── jupyter-notebook-examples.md
    └── workflow
    │   └── workflow.yml
├── .gitignore
├── .readthedocs.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.md
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── _static
    │   ├── css
    │   │   └── feature-engine.css
    │   └── js
    │   │   └── copybutton.js
    ├── _templates
    │   ├── class.rst
    │   ├── layout.html
    │   └── numpydoc_docstring.rst
    ├── about
    │   ├── about.rst
    │   ├── authors.rst
    │   ├── former_authors.rst
    │   ├── governance.rst
    │   ├── index.rst
    │   └── roadmap.rst
    ├── api_doc
    │   ├── creation
    │   │   ├── CyclicalFeatures.rst
    │   │   ├── DecisionTreeFeatures.rst
    │   │   ├── MathFeatures.rst
    │   │   ├── RelativeFeatures.rst
    │   │   └── index.rst
    │   ├── datasets
    │   │   ├── index.rst
    │   │   └── titanic.rst
    │   ├── datetime
    │   │   ├── DatetimeFeatures.rst
    │   │   ├── DatetimeSubtraction.rst
    │   │   └── index.rst
    │   ├── discretisation
    │   │   ├── ArbitraryDiscretiser.rst
    │   │   ├── DecisionTreeDiscretiser.rst
    │   │   ├── EqualFrequencyDiscretiser.rst
    │   │   ├── EqualWidthDiscretiser.rst
    │   │   ├── GeometricWidthDiscretiser.rst
    │   │   └── index.rst
    │   ├── encoding
    │   │   ├── CountFrequencyEncoder.rst
    │   │   ├── DecisionTreeEncoder.rst
    │   │   ├── MeanEncoder.rst
    │   │   ├── OneHotEncoder.rst
    │   │   ├── OrdinalEncoder.rst
    │   │   ├── RareLabelEncoder.rst
    │   │   ├── StringSimilarityEncoder.rst
    │   │   ├── WoEEncoder.rst
    │   │   └── index.rst
    │   ├── imputation
    │   │   ├── AddMissingIndicator.rst
    │   │   ├── ArbitraryNumberImputer.rst
    │   │   ├── CategoricalImputer.rst
    │   │   ├── DropMissingData.rst
    │   │   ├── EndTailImputer.rst
    │   │   ├── MeanMedianImputer.rst
    │   │   ├── RandomSampleImputer.rst
    │   │   └── index.rst
    │   ├── index.rst
    │   ├── outliers
    │   │   ├── ArbitraryOutlierCapper.rst
    │   │   ├── OutlierTrimmer.rst
    │   │   ├── Winsorizer.rst
    │   │   └── index.rst
    │   ├── pipeline
    │   │   ├── Pipeline.rst
    │   │   ├── index.rst
    │   │   └── make_pipeline.rst
    │   ├── preprocessing
    │   │   ├── MatchCategories.rst
    │   │   ├── MatchVariables.rst
    │   │   └── index.rst
    │   ├── scaling
    │   │   ├── MeanNormalizationScaler.rst
    │   │   └── index.rst
    │   ├── selection
    │   │   ├── DropConstantFeatures.rst
    │   │   ├── DropCorrelatedFeatures.rst
    │   │   ├── DropDuplicateFeatures.rst
    │   │   ├── DropFeatures.rst
    │   │   ├── DropHighPSIFeatures.rst
    │   │   ├── MRMR.rst
    │   │   ├── ProbeFeatureSelection.rst
    │   │   ├── RecursiveFeatureAddition.rst
    │   │   ├── RecursiveFeatureElimination.rst
    │   │   ├── SelectByInformationValue.rst
    │   │   ├── SelectByShuffling.rst
    │   │   ├── SelectBySingleFeaturePerformance.rst
    │   │   ├── SelectByTargetMeanPerformance.rst
    │   │   ├── SmartCorrelatedSelection.rst
    │   │   └── index.rst
    │   ├── timeseries
    │   │   ├── forecasting
    │   │   │   ├── ExpandingWindowFeatures.rst
    │   │   │   ├── LagFeatures.rst
    │   │   │   ├── WindowFeatures.rst
    │   │   │   └── index.rst
    │   │   └── index.rst
    │   ├── transformation
    │   │   ├── ArcsinTransformer.rst
    │   │   ├── BoxCoxTransformer.rst
    │   │   ├── LogCpTransformer.rst
    │   │   ├── LogTransformer.rst
    │   │   ├── PowerTransformer.rst
    │   │   ├── ReciprocalTransformer.rst
    │   │   ├── YeoJohnsonTransformer.rst
    │   │   └── index.rst
    │   ├── variable_handling
    │   │   ├── check_all_variables.rst
    │   │   ├── check_categorical_variables.rst
    │   │   ├── check_datetime_variables.rst
    │   │   ├── check_numerical_variables.rst
    │   │   ├── find_all_variables.rst
    │   │   ├── find_categorical_and_numerical_variables.rst
    │   │   ├── find_categorical_variables.rst
    │   │   ├── find_datetime_variables.rst
    │   │   ├── find_numerical_variables.rst
    │   │   ├── index.rst
    │   │   └── retain_variables_if_in_df.rst
    │   └── wrappers
    │   │   ├── Wrapper.rst
    │   │   └── index.rst
    ├── conf.py
    ├── contribute
    │   ├── code_of_conduct.rst
    │   ├── contribute_code.rst
    │   ├── contribute_docs.rst
    │   ├── contribute_jup.rst
    │   ├── contribute_other.rst
    │   └── index.rst
    ├── donate.rst
    ├── images
    │   ├── 1024px-Relationship_between_mean_and_median_under_different_skewness.png
    │   ├── Discretisation.png
    │   ├── FeatureEnginePackageStructure.png
    │   ├── FeatureEnginePackageStructureCrossSectional.png
    │   ├── FeatureEnginePackageStructureDatetimeText.png
    │   ├── FeatureEnginePackageStructureTimeseries.png
    │   ├── PSI_distribution_case1.png
    │   ├── PSI_distribution_case3.png
    │   ├── PSI_distribution_case4.png
    │   ├── PSI_distribution_case5.png
    │   ├── Variable_Transformation.png
    │   ├── arbitraryvalueimputation.png
    │   ├── bmilogcp.png
    │   ├── bmiraw.png
    │   ├── boxplot-age-percentiles.png
    │   ├── boxplot-age.png
    │   ├── boxplot-fare-mad.png
    │   ├── boxplot-fare.png
    │   ├── boxplot-sibsp-fare-iqr.png
    │   ├── boxplot-sibsp.png
    │   ├── boxplot-titanic.png
    │   ├── breast_cancer_arcsin.png
    │   ├── breast_cancer_raw.png
    │   ├── cookbook.png
    │   ├── dmlm.png
    │   ├── endtailimputer.png
    │   ├── equalfrequencydiscretisation.png
    │   ├── equalfrequencydiscretisation_gaussian.png
    │   ├── equalfrequencydiscretisation_skewed.png
    │   ├── equalwidthdiscretisation.png
    │   ├── f_statistic.png
    │   ├── feml.png
    │   ├── fetsf.png
    │   ├── fork.png
    │   ├── frequentcategoryimputer.png
    │   ├── fsml.png
    │   ├── fsmlbook.png
    │   ├── fwml.png
    │   ├── hour_sin.png
    │   ├── hour_sin2.png
    │   ├── hour_sin3.png
    │   ├── hour_sin4.png
    │   ├── increasingwidthdisc.png
    │   ├── ivml_logo.png
    │   ├── logcpraw.png
    │   ├── logcptransform.png
    │   ├── logo
    │   │   ├── FeatureEngine.png
    │   │   ├── Logo.png
    │   │   ├── Logo_name.png
    │   │   ├── favicon.png
    │   │   └── logo.svg
    │   ├── lotarea_pt.png
    │   ├── lotarea_pt_custom_exp.png
    │   ├── lotarea_raw.png
    │   ├── lotareaboxcox.png
    │   ├── lotarealog.png
    │   ├── lotareapower.png
    │   ├── lotarearaw.png
    │   ├── lotareareciprocal.png
    │   ├── lotareayeojohnson.png
    │   ├── lotshape-price-per-cat-enc.png
    │   ├── lotshape-price-per-cat.png
    │   ├── meanmedianimputater_distributions.png
    │   ├── medianimputation.png
    │   ├── medinc_disc_arbitrarily.png
    │   ├── medinc_disc_arbitrarily2.png
    │   ├── medinc_hist.png
    │   ├── missingcategoryimputer.png
    │   ├── missingindicator.png
    │   ├── mli_logo.png
    │   ├── monotonic.png
    │   ├── mzoning-price-per-cat-enc.png
    │   ├── mzoning-price-per-cat.png
    │   ├── nonnormalvars2.png
    │   ├── nonnormalvars2logtransformed.png
    │   ├── nonnormalvars2transformed.png
    │   ├── ordinal_encoding_monotonic.png
    │   ├── pipelineprediction.png
    │   ├── probe-importance-std.png
    │   ├── probe_feature_normal.png
    │   ├── probe_features.png
    │   ├── quasiconstant.png
    │   ├── randomsampleimputation.png
    │   ├── reciprocal_transformer
    │   │   ├── reciprocal_transfomer_inverse.png
    │   │   ├── reciprocal_transfomer_new.png
    │   │   ├── reciprocal_transfomer_original.png
    │   │   ├── reciprocal_transformer_3plots_new.png
    │   │   └── reciprocal_transformer_3plots_original.png
    │   ├── rfa_linreg_imp.png
    │   ├── rfa_perf_drifts.png
    │   ├── rfe_perf_drift.png
    │   ├── rfimportancemrmr.png
    │   ├── selectionChart.png
    │   ├── shuffle-features-std.png
    │   ├── single-feature-perf-std.png
    │   ├── single_feature_probes_imp.png
    │   ├── sponsors
    │   │   ├── call_for_sponsors.png
    │   │   ├── how-did-you-discover.png
    │   │   └── trainindata.png
    │   ├── summary
    │   │   ├── imputersSummary.png
    │   │   └── selectionSummary.png
    │   ├── target-mean-sel-std.png
    │   ├── toydata_pt_raw.png
    │   ├── toydata_pt_transformed.png
    │   ├── toydata_pt_transformed_custom_exp.png
    │   ├── transformedcoupleYJ.png
    │   ├── treediscretisation.png
    │   ├── treemonotonicprediction.png
    │   ├── treepredictionrounded.png
    │   ├── untransformedcoupleYJ.png
    │   ├── woe_encoding.png
    │   ├── woe_prediction.png
    │   └── yeojohnsonformula.png
    ├── index.rst
    ├── quickstart
    │   ├── datasets.rst
    │   └── index.rst
    ├── requirements.txt
    ├── resources
    │   ├── blogs.rst
    │   ├── books.rst
    │   ├── courses.rst
    │   ├── index.rst
    │   └── tutorials.rst
    ├── sphinxext
    │   ├── LICENSE.txt
    │   ├── README.txt
    │   └── github_link.py
    ├── user_guide
    │   ├── creation
    │   │   ├── CyclicalFeatures.rst
    │   │   ├── DecisionTreeFeatures.rst
    │   │   ├── MathFeatures.rst
    │   │   ├── RelativeFeatures.rst
    │   │   └── index.rst
    │   ├── datetime
    │   │   ├── DatetimeFeatures.rst
    │   │   ├── DatetimeSubtraction.rst
    │   │   └── index.rst
    │   ├── discretisation
    │   │   ├── ArbitraryDiscretiser.rst
    │   │   ├── DecisionTreeDiscretiser.rst
    │   │   ├── EqualFrequencyDiscretiser.rst
    │   │   ├── EqualWidthDiscretiser.rst
    │   │   ├── GeometricWidthDiscretiser.rst
    │   │   └── index.rst
    │   ├── encoding
    │   │   ├── CountFrequencyEncoder.rst
    │   │   ├── DecisionTreeEncoder.rst
    │   │   ├── MeanEncoder.rst
    │   │   ├── OneHotEncoder.rst
    │   │   ├── OrdinalEncoder.rst
    │   │   ├── RareLabelEncoder.rst
    │   │   ├── StringSimilarityEncoder.rst
    │   │   ├── WoEEncoder.rst
    │   │   └── index.rst
    │   ├── imputation
    │   │   ├── AddMissingIndicator.rst
    │   │   ├── ArbitraryNumberImputer.rst
    │   │   ├── CategoricalImputer.rst
    │   │   ├── DropMissingData.rst
    │   │   ├── EndTailImputer.rst
    │   │   ├── MeanMedianImputer.rst
    │   │   ├── RandomSampleImputer.rst
    │   │   └── index.rst
    │   ├── index.rst
    │   ├── outliers
    │   │   ├── ArbitraryOutlierCapper.rst
    │   │   ├── OutlierTrimmer.rst
    │   │   ├── Winsorizer.rst
    │   │   └── index.rst
    │   ├── pipeline
    │   │   ├── Pipeline.rst
    │   │   ├── index.rst
    │   │   └── make_pipeline.rst
    │   ├── preprocessing
    │   │   ├── MatchCategories.rst
    │   │   ├── MatchVariables.rst
    │   │   └── index.rst
    │   ├── scaling
    │   │   ├── MeanNormalizationScaler.rst
    │   │   └── index.rst
    │   ├── selection
    │   │   ├── DropConstantFeatures.rst
    │   │   ├── DropCorrelatedFeatures.rst
    │   │   ├── DropDuplicateFeatures.rst
    │   │   ├── DropFeatures.rst
    │   │   ├── DropHighPSIFeatures.rst
    │   │   ├── MRMR.rst
    │   │   ├── ProbeFeatureSelection.rst
    │   │   ├── RecursiveFeatureAddition.rst
    │   │   ├── RecursiveFeatureElimination.rst
    │   │   ├── SelectByInformationValue.rst
    │   │   ├── SelectByShuffling.rst
    │   │   ├── SelectBySingleFeaturePerformance.rst
    │   │   ├── SelectByTargetMeanPerformance.rst
    │   │   ├── SmartCorrelatedSelection.rst
    │   │   └── index.rst
    │   ├── timeseries
    │   │   ├── forecasting
    │   │   │   ├── ExpandingWindowFeatures.rst
    │   │   │   ├── LagFeatures.rst
    │   │   │   ├── WindowFeatures.rst
    │   │   │   └── index.rst
    │   │   └── index.rst
    │   ├── transformation
    │   │   ├── ArcsinTransformer.rst
    │   │   ├── BoxCoxTransformer.rst
    │   │   ├── LogCpTransformer.rst
    │   │   ├── LogTransformer.rst
    │   │   ├── PowerTransformer.rst
    │   │   ├── ReciprocalTransformer.rst
    │   │   ├── YeoJohnsonTransformer.rst
    │   │   └── index.rst
    │   ├── variable_handling
    │   │   ├── check_all_variables.rst
    │   │   ├── check_categorical_variables.rst
    │   │   ├── check_datetime_variables.rst
    │   │   ├── check_numerical_variables.rst
    │   │   ├── find_all_variables.rst
    │   │   ├── find_categorical_and_numerical_variables.rst
    │   │   ├── find_categorical_variables.rst
    │   │   ├── find_datetime_variables.rst
    │   │   ├── find_numerical_variables.rst
    │   │   ├── index.rst
    │   │   └── retain_variables_if_in_df.rst
    │   └── wrappers
    │   │   ├── Wrapper.rst
    │   │   └── index.rst
    ├── versions
    │   └── index.rst
    └── whats_new
    │   ├── index.rst
    │   ├── v_06.rst
    │   ├── v_1.rst
    │   ├── v_120.rst
    │   ├── v_130.rst
    │   ├── v_140.rst
    │   ├── v_150.rst
    │   ├── v_160.rst
    │   ├── v_170.rst
    │   └── v_180.rst
├── feature_engine
    ├── VERSION
    ├── __init__.py
    ├── _base_transformers
    │   ├── __init__.py
    │   ├── base_numerical.py
    │   └── mixins.py
    ├── _check_init_parameters
    │   ├── __init__.py
    │   ├── check_init_input_params.py
    │   ├── check_input_dictionary.py
    │   └── check_variables.py
    ├── _docstrings
    │   ├── __init__.py
    │   ├── fit_attributes.py
    │   ├── init_parameters
    │   │   ├── __init__.py
    │   │   ├── all_trasnformers.py
    │   │   ├── creation.py
    │   │   ├── discretisers.py
    │   │   ├── encoders.py
    │   │   ├── outliers.py
    │   │   └── selection.py
    │   ├── methods.py
    │   ├── selection
    │   │   ├── __init__.py
    │   │   └── _docstring.py
    │   └── substitute.py
    ├── _prediction
    │   ├── __init__.py
    │   ├── base_predictor.py
    │   ├── target_mean_classifier.py
    │   └── target_mean_regressor.py
    ├── creation
    │   ├── __init__.py
    │   ├── base_creation.py
    │   ├── cyclical_features.py
    │   ├── decision_tree_features.py
    │   ├── math_features.py
    │   └── relative_features.py
    ├── dataframe_checks.py
    ├── datasets
    │   ├── __init__.py
    │   └── titanic.py
    ├── datetime
    │   ├── __init__.py
    │   ├── _datetime_constants.py
    │   ├── datetime.py
    │   └── datetime_subtraction.py
    ├── discretisation
    │   ├── __init__.py
    │   ├── arbitrary.py
    │   ├── base_discretiser.py
    │   ├── decision_tree.py
    │   ├── equal_frequency.py
    │   ├── equal_width.py
    │   └── geometric_width.py
    ├── encoding
    │   ├── __init__.py
    │   ├── _helper_functions.py
    │   ├── base_encoder.py
    │   ├── count_frequency.py
    │   ├── decision_tree.py
    │   ├── mean_encoding.py
    │   ├── one_hot.py
    │   ├── ordinal.py
    │   ├── rare_label.py
    │   ├── similarity_encoder.py
    │   └── woe.py
    ├── imputation
    │   ├── __init__.py
    │   ├── arbitrary_number.py
    │   ├── base_imputer.py
    │   ├── categorical.py
    │   ├── drop_missing_data.py
    │   ├── end_tail.py
    │   ├── mean_median.py
    │   ├── missing_indicator.py
    │   └── random_sample.py
    ├── outliers
    │   ├── __init__.py
    │   ├── artbitrary.py
    │   ├── base_outlier.py
    │   ├── trimmer.py
    │   └── winsorizer.py
    ├── pipeline
    │   ├── __init__.py
    │   └── pipeline.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── match_categories.py
    │   └── match_columns.py
    ├── py.typed
    ├── scaling
    │   ├── __init__.py
    │   └── mean_normalization.py
    ├── selection
    │   ├── __init__.py
    │   ├── _selection_constants.py
    │   ├── base_recursive_selector.py
    │   ├── base_selection_functions.py
    │   ├── base_selector.py
    │   ├── drop_constant_features.py
    │   ├── drop_correlated_features.py
    │   ├── drop_duplicate_features.py
    │   ├── drop_features.py
    │   ├── drop_psi_features.py
    │   ├── information_value.py
    │   ├── mrmr.py
    │   ├── probe_feature_selection.py
    │   ├── recursive_feature_addition.py
    │   ├── recursive_feature_elimination.py
    │   ├── shuffle_features.py
    │   ├── single_feature_performance.py
    │   ├── smart_correlation_selection.py
    │   └── target_mean_selection.py
    ├── tags.py
    ├── timeseries
    │   ├── __init__.py
    │   └── forecasting
    │   │   ├── __init__.py
    │   │   ├── base_forecast_transformers.py
    │   │   ├── expanding_window_features.py
    │   │   ├── lag_features.py
    │   │   └── window_features.py
    ├── transformation
    │   ├── __init__.py
    │   ├── arcsin.py
    │   ├── boxcox.py
    │   ├── log.py
    │   ├── power.py
    │   ├── reciprocal.py
    │   └── yeojohnson.py
    ├── variable_handling
    │   ├── __init__.py
    │   ├── _variable_type_checks.py
    │   ├── check_variables.py
    │   ├── dtypes.py
    │   ├── find_variables.py
    │   └── retain_variables.py
    └── wrappers
    │   ├── __init__.py
    │   └── wrappers.py
├── mypy.ini
├── paper
    ├── paper.bib
    └── paper.md
├── pytest.ini
├── requirements.txt
├── setup.py
├── test_requirements.txt
├── tests
    ├── __init__.py
    ├── check_estimators_with_parametrize_tests.py
    ├── conftest.py
    ├── estimator_checks
    │   ├── __init__.py
    │   ├── dataframe_for_checks.py
    │   ├── estimator_checks.py
    │   ├── fit_functionality_checks.py
    │   ├── get_feature_names_out_checks.py
    │   ├── init_params_allowed_values_checks.py
    │   ├── init_params_triggered_functionality_checks.py
    │   ├── non_fitted_error_checks.py
    │   └── variable_selection_checks.py
    ├── parametrize_with_checks_creation_v16.py
    ├── parametrize_with_checks_discretization_v16.py
    ├── parametrize_with_checks_encoders_v16.py
    ├── parametrize_with_checks_outliers_v16.py
    ├── parametrize_with_checks_prediction_v16.py
    ├── parametrize_with_checks_selection_v16.py
    ├── test_base_transformers
    │   ├── test_base_numerical_transformer.py
    │   ├── test_get_feature_names_out_mixin.py
    │   └── test_transform_xy_mixin.py
    ├── test_check_init_parameters
    │   ├── __init__.py
    │   ├── test_check_init_input_params.py
    │   ├── test_check_input_dictionary.py
    │   └── test_check_variables.py
    ├── test_creation
    │   ├── __init__.py
    │   ├── test_check_estimator_creation.py
    │   ├── test_cyclical_features.py
    │   ├── test_decision_tree_features.py
    │   ├── test_math_features.py
    │   └── test_relative_features.py
    ├── test_dataframe_checks.py
    ├── test_datasets
    │   ├── __init__().py
    │   └── datasets.py
    ├── test_datetime
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_check_estimator_datetime.py
    │   ├── test_datetime_features.py
    │   └── test_datetime_subtraction.py
    ├── test_discretisation
    │   ├── __init__.py
    │   ├── test_arbitrary_discretiser.py
    │   ├── test_base_discretizer.py
    │   ├── test_check_estimator_discretisers.py
    │   ├── test_decision_tree_discretiser.py
    │   ├── test_equal_frequency_discretiser.py
    │   ├── test_equal_width_discretiser.py
    │   └── test_geometric_width_discretiser.py
    ├── test_encoding
    │   ├── __init__.py
    │   ├── test_base_encoders
    │   │   ├── __init__.py
    │   │   ├── test_categorical_init_mixin.py
    │   │   ├── test_categorical_init_mixin_na.py
    │   │   └── test_categorical_method_mixin.py
    │   ├── test_check_estimator_encoders.py
    │   ├── test_count_frequency_encoder.py
    │   ├── test_decision_tree_encoder.py
    │   ├── test_helper_functions.py
    │   ├── test_mean_encoder.py
    │   ├── test_onehot_encoder.py
    │   ├── test_ordinal_encoder.py
    │   ├── test_rare_label_encoder.py
    │   ├── test_similarity_encoder.py
    │   └── test_woe
    │   │   ├── __init__.py
    │   │   ├── test_woe_class.py
    │   │   └── test_woe_encoder.py
    ├── test_imputation
    │   ├── __init__.py
    │   ├── test_arbitrary_number_imputer.py
    │   ├── test_categorical_imputer.py
    │   ├── test_check_estimator_imputers.py
    │   ├── test_drop_missing_data.py
    │   ├── test_end_tail_imputer.py
    │   ├── test_mean_mdian_imputer.py
    │   ├── test_missing_indicator.py
    │   └── test_random_sample_imputer.py
    ├── test_outliers
    │   ├── __init__.py
    │   ├── test_arbitrary_capper.py
    │   ├── test_check_estimator_outliers.py
    │   ├── test_outlier_trimmer.py
    │   └── test_winsorizer.py
    ├── test_pipeline
    │   ├── test_pipeline.py
    │   └── test_pipeline_sklearn.py
    ├── test_prediction
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_check_estimator_prediction.py
    │   ├── test_target_mean_classifier.py
    │   └── test_target_mean_regressor.py
    ├── test_preprocessing
    │   ├── __init__.py
    │   ├── test_check_estimator_preprocessing.py
    │   ├── test_match_categories.py
    │   └── test_match_columns.py
    ├── test_scaling
    │   ├── __init__.py
    │   └── test_mean_normalization.py
    ├── test_selection
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_base_selection_functions.py
    │   ├── test_base_selector.py
    │   ├── test_check_estimator_selectors.py
    │   ├── test_drop_constant_features.py
    │   ├── test_drop_correlated_features.py
    │   ├── test_drop_duplicate_features.py
    │   ├── test_drop_features.py
    │   ├── test_drop_high_psi_features.py
    │   ├── test_information_value.py
    │   ├── test_mrmr.py
    │   ├── test_probe_feature_selection.py
    │   ├── test_recursive_feature_addition.py
    │   ├── test_recursive_feature_elimination.py
    │   ├── test_recursive_feature_selectors.py
    │   ├── test_shuffle_features.py
    │   ├── test_single_feature_performance.py
    │   ├── test_smart_correlation_selection.py
    │   └── test_target_mean_selection.py
    ├── test_sklearn_compatible
    │   └── test_set_output.py
    ├── test_time_series
    │   ├── __init__.py
    │   └── test_forecasting
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_check_estimator_forecasting.py
    │   │   ├── test_expanding_window_features.py
    │   │   ├── test_lag_features.py
    │   │   └── test_window_features.py
    ├── test_transformation
    │   ├── __init__.py
    │   ├── test_arcsin_transformer.py
    │   ├── test_boxcox_transformer.py
    │   ├── test_check_estimator_transformers.py
    │   ├── test_log_transformer.py
    │   ├── test_logcp_transformer.py
    │   ├── test_power_transformer.py
    │   ├── test_reciprocal_transformer.py
    │   └── test_yeojohnson_transformer.py
    ├── test_variable_handling
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_check_variables.py
    │   ├── test_find_variables.py
    │   └── test_remove_variables.py
    └── test_wrappers
    │   ├── __init__.py
    │   ├── test_check_estimator_wrappers.py
    │   └── test_sklearn_wrapper.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
 1 | # configuration for coverage.py
 2 | 
 3 | [run]
 4 | branch = True
 5 | source = feature_engine
 6 | include = */feature_engine/*
 7 | omit =
 8 |     */setup.py
 9 | 
10 | 
11 | [report]
12 | exclude_lines =
13 |     pragma: no cover
14 | 
15 | show_missing = True


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [solegalli]
4 | buy_me_a_coffee: solegalliy
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/docs.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Docs
 3 | about: What documentation is missing?
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Please let us know if you think there is information missing, or how else we can improve the documentation from Feature-engine.
11 | 
12 | If you are referring to an existing page, please paste the url.
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/jupyter-notebook-examples.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Jupyter notebook examples
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Please let us know what is missing from existing Jupyter notebook demos, or suggest which new demo you think it would be useful for the community.
11 | 


--------------------------------------------------------------------------------
/.github/workflow/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: CodeCov
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   run:
 5 |     runs-on: ubuntu-latest
 6 |     env:
 7 |       OS: ubuntu-latest
 8 |       PYTHON: '3.9'
 9 |     steps:
10 |     - uses: checkout@v3
11 |       with:
12 |         fetch-depth: ‘2’
13 | 
14 |     - name: Setup Python
15 |       uses: actions/setup-python@master
16 |       with:
17 |         python-version: 3.9
18 |     - name: Generate Report
19 |       run: |
20 |         pip install coverage
21 |         coverage run -m pytest
22 |     - name: Upload Coverage to Codecov
23 |       uses: codecov/codecov-action@v3.1.1
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | docs/build/
 69 | build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 
108 | # Miscelaneous
109 | .idea
110 | .vscode
111 | *.csv
112 | *.DS_Store
113 | *.db
114 | *.pptx


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/conf.py
17 | 
18 | # Build documentation with MkDocs
19 | #mkdocs:
20 | #  configuration: mkdocs.yml
21 | 
22 | # Optionally build your docs in additional formats such as PDF and ePub
23 | formats: all
24 | 
25 | # Optionally set the version of Python and requirements required to build your docs
26 | python:
27 |   install:
28 |     - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing to Feature-engine
 2 | ==============================
 3 | 
 4 | Feature-engine is a community-driven open-source project that relies on contributions from 
 5 | people like you. Every contribution, no matter how big or small, can make a significant 
 6 | impact on the project. If you've never contributed to an open-source project before, don't 
 7 | worry! Feature-engine is a great place to start. Your help will be appreciated and welcomed 
 8 | with gratitude.
 9 | 
10 | The latest contributing guide is available online at:
11 | 
12 | https://feature-engine.trainindata.com/en/latest/contribute/index.html
13 | 
14 | There are many ways to contribute to Feature-engine, with the most common ones
15 | being contribution of code or documentation to the project. Improving the
16 | documentation is no less important than improving the library itself. If you
17 | find a typo in the documentation, or have made improvements, do not hesitate to
18 | submit a GitHub pull request.
19 | 
20 | Documentation can be found under the
21 | [doc/](https://github.com/feature-engine/feature_engine/tree/main/docs) directory.
22 | 
23 | You can check out requested enhancements and current bugs on the
24 | [issue tracker](https://github.com/feature-engine/feature_engine/issues),
25 | and suggest a PR with the fix. Every contribution is valuable and decreases the burden 
26 | on the project maintainer.
27 | 
28 | Another way to contribute is to report issues you're facing, and give a "thumbs
29 | up" on issues that others reported and that are relevant to you. It also helps
30 | us if you spread the word: reference the project from your blog and articles,
31 | link to it from your website, or simply star it in GitHub to say "I use it".
32 | 
33 | Quick links
34 | -----------
35 | 
36 | * [Submitting a bug report or feature request](https://github.com/feature-engine/feature_engine/issues)
37 | * [Contributing code](https://feature-engine.trainindata.com/en/latest/contribute/contribute_code.html)
38 | * [Contributing docs](https://feature-engine.trainindata.com/en/latest/contribute/contribute_docs.html)
39 | * [Other ways to contribute](https://feature-engine.trainindata.com/en/latest/contribute/contribute_other.html)
40 | 
41 | Code of Conduct
42 | ---------------
43 | 
44 | We abide by the principles of openness, respect, and consideration of others
45 | of the Python Software Foundation: https://www.python.org/psf/codeofconduct/.


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018-2024 The Feature-engine developers.
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.txt
 2 | include *.md
 3 | include *.pkl
 4 | recursive-include ./feature_engine/*
 5 | 
 6 | include feature_engine/VERSION
 7 | 
 8 | include ./requirements.txt
 9 | include ./LICENSE
10 | exclude *.log
11 | exclude *.cfg
12 | 
13 | recursive-exclude * __pycache__
14 | recursive-exclude * *.py[co]


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = feature_engine
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/css/feature-engine.css:
--------------------------------------------------------------------------------
 1 | @import url("theme.css");
 2 | 
 3 | 
 4 | /* Css template from sklearn:
 5 | https://github.com/scikit-learn/scikit-learn/blob/f71c0313142c4e5f2f35a0021c36075cf8dba611/doc/themes/scikit-learn-modern/static/css/theme.css
 6 | */
 7 | 
 8 | /* authors */
 9 | .authors-container {
10 |   display: flex;
11 |   flex-wrap: wrap;
12 |   justify-content: center;
13 | }
14 | 
15 | 
16 | /* sponsors and testimonials */
17 | 
18 | div.sk-sponsor-div, div.sk-testimonial-div {
19 |   display: flex;
20 |   flex-wrap: wrap;
21 |   -webkit-flex-align: center;
22 |   -ms-flex-align: center;
23 |   -webkit-align-items: center;
24 |   align-items: center;
25 | }
26 | 
27 | div.sk-sponsor-div-box, div.sk-testimonial-div-box {
28 |   width: 100%;
29 | }
30 | 
31 | @media screen and (min-width: 500px) {
32 |   div.sk-sponsor-div-box, div.sk-testimonial-div-box {
33 |     width: 50%;
34 |   }
35 | }
36 | 
37 | .caption {
38 |   text-align: center
39 | }


--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{objname}}
 2 | {{ underline }}==============
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 | 
10 |    {% if methods %}
11 |    .. rubric:: Methods
12 | 
13 |    .. autosummary::
14 |    {% for item in methods %}
15 |       {% if '__init__' not in item %}
16 |         ~{{ name }}.{{ item }}
17 |       {% endif %}
18 |    {%- endfor %}
19 |    {% endif %}
20 |    {% endblock %}
21 | 
22 | .. include:: {{module}}.{{objname}}.examples
23 | 
24 | .. raw:: html
25 | 
26 |     <div style='clear:both'></div>
27 | 


--------------------------------------------------------------------------------
/docs/_templates/numpydoc_docstring.rst:
--------------------------------------------------------------------------------
 1 | {{index}}
 2 | {{summary}}
 3 | {{extended_summary}}
 4 | {{parameters}}
 5 | {{returns}}
 6 | {{yields}}
 7 | {{other_parameters}}
 8 | {{attributes}}
 9 | {{raises}}
10 | {{warns}}
11 | {{warnings}}
12 | {{see_also}}
13 | {{notes}}
14 | {{references}}
15 | {{examples}}
16 | {{methods}}


--------------------------------------------------------------------------------
/docs/about/authors.rst:
--------------------------------------------------------------------------------
 1 | .. raw :: html
 2 | 
 3 |     <!-- Generated by generate_authors_table.py -->
 4 |     <div class="authors-container">
 5 |         <style>
 6 |           img.avatar {border-radius: 15px; padding: 10px;}
 7 |           .author {text-align: center;}
 8 |         </style>
 9 |         <div class="author">
10 |             <a href='https://github.com/solegalli'><img src='https://avatars.githubusercontent.com/solegalli?v=4' class='avatar' width="120" height="120" /></a> <br />
11 |             <p>Soledad Galli</p>
12 |         </div>
13 |         <div class="author">
14 |             <a href='https://github.com/morgan-sell'><img src='https://avatars.githubusercontent.com/morgan-sell?v=4' class='avatar' width="120" height="120" /></a> <br />
15 |             <p>Morgan Sell</p>
16 |         </div>
17 |     </div>
18 | 


--------------------------------------------------------------------------------
/docs/about/former_authors.rst:
--------------------------------------------------------------------------------
 1 | .. raw :: html
 2 | 
 3 |     <!-- Generated by generate_authors_table.py -->
 4 |     <div class="authors-container">
 5 |         <style>
 6 |           img.avatar {border-radius: 15px; padding: 10px;}
 7 |           .author {text-align: center;}
 8 |         </style>
 9 |         <div class="author">
10 |             <a href='https://github.com/christophergs'><img src='https://avatars.githubusercontent.com/christophergs?v=4' width="120" height="120"class='avatar' /></a> <br />
11 |             <p>Chris Samiullah</p>
12 |        </div>
13 |         <div class="author">
14 |             <a href='https://github.com/nicogalli'><img src='https://avatars.githubusercontent.com/nicogalli?v=4' class='avatar'width="120" height="120"/></a> <br />
15 |             <p>Nicolas Galli</p>
16 |         </div>
17 |     </div>


--------------------------------------------------------------------------------
/docs/about/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. _about:
 3 | 
 4 | About
 5 | =====
 6 | 
 7 | In this section you will find information about the Feature-engine's origin, main
 8 | developers, roadmap and overall vision for the package. You will also find information
 9 | about how to cite Feature-engine and our main sponsors.
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    about
15 |    governance
16 |    roadmap


--------------------------------------------------------------------------------
/docs/api_doc/creation/CyclicalFeatures.rst:
--------------------------------------------------------------------------------
1 | CyclicalFeatures
2 | ================
3 | 
4 | .. autoclass:: feature_engine.creation.CyclicalFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/creation/DecisionTreeFeatures.rst:
--------------------------------------------------------------------------------
1 | DecisionTreeFeatures
2 | ====================
3 | 
4 | .. autoclass:: feature_engine.creation.DecisionTreeFeatures
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/creation/MathFeatures.rst:
--------------------------------------------------------------------------------
1 | MathFeatures
2 | ============
3 | 
4 | .. autoclass:: feature_engine.creation.MathFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/creation/RelativeFeatures.rst:
--------------------------------------------------------------------------------
1 | RelativeFeatures
2 | ================
3 | 
4 | .. autoclass:: feature_engine.creation.RelativeFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/creation/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Feature Creation
 4 | ================
 5 | 
 6 | Feature-engine's creation transformers create and add new features to the dataframe
 7 | by either combining or transforming existing features.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    MathFeatures
13 |    RelativeFeatures
14 |    CyclicalFeatures
15 |    DecisionTreeFeatures
16 | 
17 | 
18 | Transformers in other Libraries
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 | 
21 | Check also the following transformer from Scikit-learn:
22 | 
23 | * `PolynomialFeatures <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html>`_
24 | * `SplineTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.SplineTransformer.html>`_
25 | 


--------------------------------------------------------------------------------
/docs/api_doc/datasets/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Datasets
 4 | ========
 5 | 
 6 | We are starting to build a library of functions that allow you and us to quickly load
 7 | datasets to demonstrate and test the functionality of Feature-engine (and, why not,
 8 | other Python libraries).
 9 | 
10 | At the moment, we support the following functions:
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 | 
15 |    titanic
16 | 


--------------------------------------------------------------------------------
/docs/api_doc/datasets/titanic.rst:
--------------------------------------------------------------------------------
1 | ﻿load__titanic
2 | =============
3 | 
4 | .. currentmodule:: feature_engine.datasets
5 | 
6 | .. autofunction:: load_titanic


--------------------------------------------------------------------------------
/docs/api_doc/datetime/DatetimeFeatures.rst:
--------------------------------------------------------------------------------
1 | DatetimeFeatures
2 | ================
3 | 
4 | .. autoclass:: feature_engine.datetime.DatetimeFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/datetime/DatetimeSubtraction.rst:
--------------------------------------------------------------------------------
1 | DatetimeSubtraction
2 | ===================
3 | 
4 | .. autoclass:: feature_engine.datetime.DatetimeSubtraction
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/datetime/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Datetime Features
 4 | =================
 5 | 
 6 | Feature-engine's datetime transformers are able to extract a wide variety of datetime
 7 | features from existing datetime or object-like data.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    DatetimeFeatures
13 |    DatetimeSubtraction
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/api_doc/discretisation/ArbitraryDiscretiser.rst:
--------------------------------------------------------------------------------
1 | ArbitraryDiscretiser
2 | ====================
3 | 
4 | .. autoclass:: feature_engine.discretisation.ArbitraryDiscretiser
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/discretisation/DecisionTreeDiscretiser.rst:
--------------------------------------------------------------------------------
1 | DecisionTreeDiscretiser
2 | =======================
3 | 
4 | .. autoclass:: feature_engine.discretisation.DecisionTreeDiscretiser
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/discretisation/EqualFrequencyDiscretiser.rst:
--------------------------------------------------------------------------------
1 | EqualFrequencyDiscretiser
2 | =========================
3 | 
4 | .. autoclass:: feature_engine.discretisation.EqualFrequencyDiscretiser
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/discretisation/EqualWidthDiscretiser.rst:
--------------------------------------------------------------------------------
1 | EqualWidthDiscretiser
2 | =====================
3 | 
4 | .. autoclass:: feature_engine.discretisation.EqualWidthDiscretiser
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/discretisation/GeometricWidthDiscretiser.rst:
--------------------------------------------------------------------------------
1 | GeometricWidthDiscretiser
2 | =========================
3 | 
4 | .. autoclass:: feature_engine.discretisation.GeometricWidthDiscretiser
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/discretisation/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. currentmodule:: feature_engine.discretisation
 3 | 
 4 | Discretisation
 5 | ==============
 6 | 
 7 | Feature-engine's discretisation transformers transform continuous variables into
 8 | discrete features. This is accomplished, in general, by sorting the variable values
 9 | into continuous intervals.
10 | 
11 | **Summary**
12 | 
13 | =====================================  ========================================================================
14 |       Transformer                           Functionality
15 | =====================================  ========================================================================
16 | :class:`EqualFrequencyDiscretiser()`     Sorts values into intervals with similar number of observations.
17 | :class:`EqualWidthDiscretiser()`         Sorts values into intervals of equal size.
18 | :class:`ArbitraryDiscretiser()`          Sorts values into intervals predefined by the user.
19 | :class:`DecisionTreeDiscretiser()`       Replaces values by predictions of a decision tree, which are discrete.
20 | :class:`GeometricWidthDiscretiser()`     Sorts variable into geometrical intervals.
21 | =====================================  ========================================================================
22 | 
23 | 
24 | .. toctree::
25 |    :maxdepth: 1
26 |    :hidden:
27 | 
28 |    EqualFrequencyDiscretiser
29 |    EqualWidthDiscretiser
30 |    ArbitraryDiscretiser
31 |    DecisionTreeDiscretiser
32 |    GeometricWidthDiscretiser
33 | 
34 | Additional transformers for discretisation
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
36 | 
37 | For discretisation using K-means, check Scikit-learn's
38 | `KBinsDiscretizer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.KBinsDiscretizer.html>`_.
39 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/CountFrequencyEncoder.rst:
--------------------------------------------------------------------------------
1 | CountFrequencyEncoder
2 | =====================
3 | 
4 | .. autoclass:: feature_engine.encoding.CountFrequencyEncoder
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/DecisionTreeEncoder.rst:
--------------------------------------------------------------------------------
1 | DecisionTreeEncoder
2 | ===================
3 | 
4 | .. autoclass:: feature_engine.encoding.DecisionTreeEncoder
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/MeanEncoder.rst:
--------------------------------------------------------------------------------
1 | MeanEncoder
2 | ===========
3 | 
4 | .. autoclass:: feature_engine.encoding.MeanEncoder
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/OneHotEncoder.rst:
--------------------------------------------------------------------------------
1 | OneHotEncoder
2 | =============
3 | 
4 | .. autoclass:: feature_engine.encoding.OneHotEncoder
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/OrdinalEncoder.rst:
--------------------------------------------------------------------------------
1 | OrdinalEncoder
2 | ==============
3 | 
4 | .. autoclass:: feature_engine.encoding.OrdinalEncoder
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/RareLabelEncoder.rst:
--------------------------------------------------------------------------------
1 | RareLabelEncoder
2 | ================
3 | 
4 | 
5 | .. autoclass:: feature_engine.encoding.RareLabelEncoder
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/StringSimilarityEncoder.rst:
--------------------------------------------------------------------------------
1 | StringSimilarityEncoder
2 | =======================
3 | 
4 | .. autoclass:: feature_engine.encoding.StringSimilarityEncoder
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/WoEEncoder.rst:
--------------------------------------------------------------------------------
1 | WoEEncoder
2 | ==========
3 | 
4 | .. autoclass:: feature_engine.encoding.WoEEncoder
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/encoding/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Categorical Encoding
 4 | ====================
 5 | 
 6 | Feature-engine's categorical encoders replace the categories of the variable with
 7 | estimated or arbitrary numbers.
 8 | 
 9 | **Summary of Feature-engine's encoders characteristics**
10 | 
11 | ================================= ============ ================= ============== ===============================================================
12 |     Transformer                    Regression	 Classification	   Multi-class    Description
13 | ================================= ============ ================= ============== ===============================================================
14 | :class:`OneHotEncoder()`	           √	            √               √         Adds dummy variables to represent each category
15 | :class:`OrdinalEncoder()`	           √	            √    	        √         Replaces categories with an integer
16 | :class:`CountFreuencyEncoder()`	       √	            √               √         Replaces categories with their count or frequency
17 | :class:`MeanEncoder()`                 √	            √               x         Replaces categories with the targe mean value
18 | :class:`WoEEncoder()`	               x	            √	            x         Replaces categories with the weight of the evidence
19 | :class:`DecisionTreeEncoder()`	       √	            √     	        √         Replaces categories with the predictions of a decision tree
20 | :class:`RareLabelEncoder()`	           √	            √     	        √         Groups infrequent categories into a single one
21 | ================================= ============ ================= ============== ===============================================================
22 | 
23 | Feature-engine's categorical encoders encode only variables of type categorical or
24 | object by default. From version 1.1.0, you have the option to set the parameter
25 | `ignore_format` to True to make the transformers also accept numerical variables as
26 | input.
27 | 
28 | 
29 | .. toctree::
30 |    :maxdepth: 1
31 | 
32 |    OneHotEncoder
33 |    CountFrequencyEncoder
34 |    OrdinalEncoder
35 |    MeanEncoder
36 |    WoEEncoder
37 |    DecisionTreeEncoder
38 |    RareLabelEncoder
39 |    StringSimilarityEncoder
40 | 
41 | Other categorical encoding libraries
42 | ------------------------------------
43 | 
44 | For additional categorical encoding transformations, visit the open-source package
45 | `Category encoders <https://contrib.scikit-learn.org/category_encoders/>`_.
46 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/AddMissingIndicator.rst:
--------------------------------------------------------------------------------
1 | AddMissingIndicator
2 | ===================
3 | 
4 | .. autoclass:: feature_engine.imputation.AddMissingIndicator
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/ArbitraryNumberImputer.rst:
--------------------------------------------------------------------------------
1 | ArbitraryNumberImputer
2 | ======================
3 | 
4 | .. autoclass:: feature_engine.imputation.ArbitraryNumberImputer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/CategoricalImputer.rst:
--------------------------------------------------------------------------------
1 | CategoricalImputer
2 | ==================
3 | 
4 | .. autoclass:: feature_engine.imputation.CategoricalImputer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/DropMissingData.rst:
--------------------------------------------------------------------------------
1 | DropMissingData
2 | ===============
3 | 
4 | .. autoclass:: feature_engine.imputation.DropMissingData
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/EndTailImputer.rst:
--------------------------------------------------------------------------------
1 | EndTailImputer
2 | ==============
3 | 
4 | .. autoclass:: feature_engine.imputation.EndTailImputer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/MeanMedianImputer.rst:
--------------------------------------------------------------------------------
1 | MeanMedianImputer
2 | =================
3 | 
4 | .. autoclass:: feature_engine.imputation.MeanMedianImputer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/imputation/RandomSampleImputer.rst:
--------------------------------------------------------------------------------
1 | RandomSampleImputer
2 | ===================
3 | 
4 | .. autoclass:: feature_engine.imputation.RandomSampleImputer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | API
 4 | ===
 5 | 
 6 | Full API documentation for Feature-engine transformers.
 7 | 
 8 | Transformation
 9 | --------------
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    imputation/index
15 |    encoding/index
16 |    discretisation/index
17 |    outliers/index
18 |    transformation/index
19 | 
20 | Creation
21 | --------
22 | 
23 | .. toctree::
24 |    :maxdepth: 1
25 | 
26 |    creation/index
27 |    datetime/index
28 | 
29 | 
30 | Selection
31 | ---------
32 | .. toctree::
33 |    :maxdepth: 1
34 | 
35 |    selection/index
36 | 
37 | Time series
38 | -----------
39 | 
40 | .. toctree::
41 |    :maxdepth: 1
42 | 
43 |    timeseries/index
44 | 
45 | Other
46 | -----
47 | .. toctree::
48 |    :maxdepth: 1
49 | 
50 |    preprocessing/index
51 |    scaling/index
52 |    wrappers/index
53 | 
54 | Pipeline
55 | --------
56 | .. toctree::
57 |    :maxdepth: 1
58 | 
59 |    pipeline/index
60 | 
61 | Datasets
62 | --------
63 | .. toctree::
64 |    :maxdepth: 1
65 | 
66 |    datasets/index
67 | 
68 | Tools
69 | -----
70 | .. toctree::
71 |    :maxdepth: 1
72 | 
73 |    variable_handling/index


--------------------------------------------------------------------------------
/docs/api_doc/outliers/ArbitraryOutlierCapper.rst:
--------------------------------------------------------------------------------
1 | ArbitraryOutlierCapper
2 | ======================
3 | 
4 | .. autoclass:: feature_engine.outliers.ArbitraryOutlierCapper
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/outliers/OutlierTrimmer.rst:
--------------------------------------------------------------------------------
1 | OutlierTrimmer
2 | ==============
3 | 
4 | .. autoclass:: feature_engine.outliers.OutlierTrimmer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/outliers/Winsorizer.rst:
--------------------------------------------------------------------------------
1 | Winsorizer
2 | ==========
3 | 
4 | .. autoclass:: feature_engine.outliers.Winsorizer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/outliers/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | .. currentmodule:: feature_engine.outliers
 4 | 
 5 | Outlier Handling
 6 | ================
 7 | 
 8 | Feature-engine's outlier transformers cap maximum or minimum values of a variable at an
 9 | arbitrary or derived value. The OutlierTrimmer removes outliers from the dataset.
10 | 
11 | =================================== ==============================================================
12 |  Transformer                          Description
13 | =================================== ==============================================================
14 | :class:`Winsorizer()`                 Caps variables at automatically determined extreme values
15 | :class:`ArbitraryOutlierCapper()`     Caps variables at values determined by the user
16 | :class:`OutlierTrimmer()`             Removes outliers from the dataframe
17 | =================================== ==============================================================
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 |    :hidden:
22 | 
23 |    Winsorizer
24 |    ArbitraryOutlierCapper
25 |    OutlierTrimmer


--------------------------------------------------------------------------------
/docs/api_doc/pipeline/Pipeline.rst:
--------------------------------------------------------------------------------
1 | Pipeline
2 | ========
3 | 
4 | .. autoclass:: feature_engine.pipeline.Pipeline
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/pipeline/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | .. currentmodule:: feature_engine.pipeline
 4 | 
 5 | Pipeline
 6 | ========
 7 | 
 8 | Feature-engine's Pipeline is equivalent to Scikit-learn's pipeline, and in addition,
 9 | it accepts the method `transform_x_y`, to adjust both X and y, in those cases where
10 | rows are removed from X.
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 | 
15 |    Pipeline
16 |    make_pipeline
17 | 


--------------------------------------------------------------------------------
/docs/api_doc/pipeline/make_pipeline.rst:
--------------------------------------------------------------------------------
1 | make_pipeline
2 | =============
3 | 
4 | .. currentmodule:: feature_engine.pipeline
5 | 
6 | .. autofunction:: make_pipeline


--------------------------------------------------------------------------------
/docs/api_doc/preprocessing/MatchCategories.rst:
--------------------------------------------------------------------------------
1 | MatchCategories
2 | ===============
3 | 
4 | .. autoclass:: feature_engine.preprocessing.MatchCategories
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/preprocessing/MatchVariables.rst:
--------------------------------------------------------------------------------
1 | MatchVariables
2 | ==============
3 | 
4 | .. autoclass:: feature_engine.preprocessing.MatchVariables
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/preprocessing/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Preprocessing
 4 | =============
 5 | 
 6 | Feature-engine's preprocessing transformers apply general data pre-processing
 7 | and transformation procedures.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    MatchCategories
13 |    MatchVariables
14 | 


--------------------------------------------------------------------------------
/docs/api_doc/scaling/MeanNormalizationScaler.rst:
--------------------------------------------------------------------------------
1 | MeanNormalizationScaler
2 | =======================
3 | 
4 | .. autoclass:: feature_engine.scaling.MeanNormalizationScaler
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/scaling/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Scaling
 4 | =======
 5 | 
 6 | Feature-engine's scaling transformers apply various scaling techniques to
 7 | given columns
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    MeanNormalizationScaler
13 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/DropConstantFeatures.rst:
--------------------------------------------------------------------------------
1 | DropConstantFeatures
2 | ====================
3 | 
4 | .. autoclass:: feature_engine.selection.DropConstantFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/DropCorrelatedFeatures.rst:
--------------------------------------------------------------------------------
1 | DropCorrelatedFeatures
2 | ======================
3 | 
4 | .. autoclass:: feature_engine.selection.DropCorrelatedFeatures
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/DropDuplicateFeatures.rst:
--------------------------------------------------------------------------------
1 | DropDuplicateFeatures
2 | =====================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.DropDuplicateFeatures
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/DropFeatures.rst:
--------------------------------------------------------------------------------
1 | DropFeatures
2 | =============
3 | 
4 | .. autoclass:: feature_engine.selection.DropFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/DropHighPSIFeatures.rst:
--------------------------------------------------------------------------------
1 | DropHighPSIFeatures
2 | ===================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.DropHighPSIFeatures
6 |     :members:


--------------------------------------------------------------------------------
/docs/api_doc/selection/MRMR.rst:
--------------------------------------------------------------------------------
1 | MRMR
2 | ====
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.MRMR
6 |     :members:


--------------------------------------------------------------------------------
/docs/api_doc/selection/ProbeFeatureSelection.rst:
--------------------------------------------------------------------------------
1 | ProbeFeatureSelection
2 | =====================
3 | 
4 | .. autoclass:: feature_engine.selection.ProbeFeatureSelection
5 |     :members:


--------------------------------------------------------------------------------
/docs/api_doc/selection/RecursiveFeatureAddition.rst:
--------------------------------------------------------------------------------
1 | RecursiveFeatureAddition
2 | ========================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.RecursiveFeatureAddition
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/RecursiveFeatureElimination.rst:
--------------------------------------------------------------------------------
1 | RecursiveFeatureElimination
2 | ============================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.RecursiveFeatureElimination
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/SelectByInformationValue.rst:
--------------------------------------------------------------------------------
1 | SelectByInformationValue
2 | ========================
3 | 
4 | .. autoclass:: feature_engine.selection.SelectByInformationValue
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/SelectByShuffling.rst:
--------------------------------------------------------------------------------
1 | SelectByShuffling
2 | =================
3 | 
4 | .. autoclass:: feature_engine.selection.SelectByShuffling
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/SelectBySingleFeaturePerformance.rst:
--------------------------------------------------------------------------------
1 | SelectBySingleFeaturePerformance
2 | ================================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.SelectBySingleFeaturePerformance
6 |     :members:
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/SelectByTargetMeanPerformance.rst:
--------------------------------------------------------------------------------
1 | SelectByTargetMeanPerformance
2 | =============================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.SelectByTargetMeanPerformance
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/selection/SmartCorrelatedSelection.rst:
--------------------------------------------------------------------------------
1 | SmartCorrelatedSelection
2 | ========================
3 | 
4 | 
5 | .. autoclass:: feature_engine.selection.SmartCorrelatedSelection
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/timeseries/forecasting/ExpandingWindowFeatures.rst:
--------------------------------------------------------------------------------
1 | ExpandingWindowFeatures
2 | =======================
3 | 
4 | .. autoclass:: feature_engine.timeseries.forecasting.ExpandingWindowFeatures
5 |     :members:


--------------------------------------------------------------------------------
/docs/api_doc/timeseries/forecasting/LagFeatures.rst:
--------------------------------------------------------------------------------
1 | LagFeatures
2 | ===========
3 | 
4 | .. autoclass:: feature_engine.timeseries.forecasting.LagFeatures
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/timeseries/forecasting/WindowFeatures.rst:
--------------------------------------------------------------------------------
1 | WindowFeatures
2 | ==============
3 | 
4 | .. autoclass:: feature_engine.timeseries.forecasting.WindowFeatures
5 |     :members:


--------------------------------------------------------------------------------
/docs/api_doc/timeseries/forecasting/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Forecasting Features
 4 | ====================
 5 | 
 6 | Feature-engine's time series forecasting transformers create and add new features to the
 7 | dataframe by lagging features or calculating statistics over windows of time in the
 8 | past.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 | 
13 |    LagFeatures
14 |    WindowFeatures
15 |    ExpandingWindowFeatures
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/api_doc/timeseries/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Time Series Features
 4 | ====================
 5 | 
 6 | Feature-engine's time series transformers derive features from time series data.
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 1
10 | 
11 |    forecasting/index
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/ArcsinTransformer.rst:
--------------------------------------------------------------------------------
1 | ArcsinTransformer
2 | =================
3 | 
4 | 
5 | .. autoclass:: feature_engine.transformation.ArcsinTransformer
6 |    :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/BoxCoxTransformer.rst:
--------------------------------------------------------------------------------
1 | BoxCoxTransformer
2 | =================
3 | 
4 | .. autoclass:: feature_engine.transformation.BoxCoxTransformer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/LogCpTransformer.rst:
--------------------------------------------------------------------------------
1 | LogCpTransformer
2 | ================
3 | 
4 | .. autoclass:: feature_engine.transformation.LogCpTransformer
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/LogTransformer.rst:
--------------------------------------------------------------------------------
1 | LogTransformer
2 | ==============
3 | 
4 | 
5 | .. autoclass:: feature_engine.transformation.LogTransformer
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/PowerTransformer.rst:
--------------------------------------------------------------------------------
1 | PowerTransformer
2 | ================
3 | 
4 | 
5 | .. autoclass:: feature_engine.transformation.PowerTransformer
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/ReciprocalTransformer.rst:
--------------------------------------------------------------------------------
1 | ReciprocalTransformer
2 | =====================
3 | 
4 | 
5 | .. autoclass:: feature_engine.transformation.ReciprocalTransformer
6 |     :members:
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/YeoJohnsonTransformer.rst:
--------------------------------------------------------------------------------
1 | YeoJohnsonTransformer
2 | =====================
3 | 
4 | .. autoclass:: feature_engine.transformation.YeoJohnsonTransformer
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/transformation/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Variance Stabilizing Transformations
 4 | ====================================
 5 | 
 6 | Feature-engine's variable transformers transform numerical variables with various
 7 | mathematical transformations.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    LogTransformer
13 |    LogCpTransformer
14 |    ReciprocalTransformer
15 |    ArcsinTransformer
16 |    PowerTransformer
17 |    BoxCoxTransformer
18 |    YeoJohnsonTransformer
19 | 
20 | 
21 | Transformers in other Libraries
22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 | 
24 | These and additional transformations can be obtained with the following Scikit-learn
25 | classes:
26 | 
27 | * `FunctionTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_
28 | * `PowerTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PowerTransformer.html>`_
29 | 
30 | Note that Scikit-klearn classes return Numpy arrays and are applied to the entire dataset.
31 | 


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/check_all_variables.rst:
--------------------------------------------------------------------------------
1 | check_all_variables
2 | ===================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: check_all_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/check_categorical_variables.rst:
--------------------------------------------------------------------------------
1 | check_categorical_variables
2 | ===========================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: check_categorical_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/check_datetime_variables.rst:
--------------------------------------------------------------------------------
1 | check_datetime_variables
2 | ========================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: check_datetime_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/check_numerical_variables.rst:
--------------------------------------------------------------------------------
1 | check_numerical_variables
2 | =========================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: check_numerical_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/find_all_variables.rst:
--------------------------------------------------------------------------------
1 | ﻿find_all_variables
2 | ==================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: find_all_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/find_categorical_and_numerical_variables.rst:
--------------------------------------------------------------------------------
1 | ﻿find_categorical_and_numerical_variables
2 | ========================================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: find_categorical_and_numerical_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/find_categorical_variables.rst:
--------------------------------------------------------------------------------
1 | ﻿find_categorical_variables
2 | ==========================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: find_categorical_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/find_datetime_variables.rst:
--------------------------------------------------------------------------------
1 | ﻿find_datetime_variables
2 | =======================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: find_datetime_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/find_numerical_variables.rst:
--------------------------------------------------------------------------------
1 | ﻿find_numerical_variables
2 | ========================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: find_numerical_variables


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Variable handling functions
 4 | ===========================
 5 | 
 6 | This set of functions find variables of a specific type in a dataframe, or check that a
 7 | list of variables is of a specified data type.
 8 | 
 9 | The `find` functions take a dataframe as an argument and returns a list with the names
10 | of the variables of the desired type.
11 | 
12 | The `check` functions check that the list of variables are all of the desired data type.
13 | 
14 | The `retain` functions select the variables in a list if they fulfill a condition.
15 | 
16 | These functions are used under-the-hood by all Feature-engine transformers to select the
17 | variables that they will modify.
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 | 
22 |    find_all_variables
23 |    find_categorical_variables
24 |    find_datetime_variables
25 |    find_numerical_variables
26 |    find_categorical_and_numerical_variables
27 |    check_all_variables
28 |    check_categorical_variables
29 |    check_datetime_variables
30 |    check_numerical_variables
31 |    retain_variables_if_in_df
32 | 


--------------------------------------------------------------------------------
/docs/api_doc/variable_handling/retain_variables_if_in_df.rst:
--------------------------------------------------------------------------------
1 | retain_variables_if_in_df
2 | =========================
3 | 
4 | .. currentmodule:: feature_engine.variable_handling
5 | 
6 | .. autofunction:: retain_variables_if_in_df


--------------------------------------------------------------------------------
/docs/api_doc/wrappers/Wrapper.rst:
--------------------------------------------------------------------------------
1 | SklearnTransformerWrapper
2 | =========================
3 | 
4 | .. autoclass:: feature_engine.wrappers.SklearnTransformerWrapper
5 |     :members:
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/api_doc/wrappers/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | .. currentmodule:: feature_engine.wrappers
 4 | 
 5 | Scikit-learn Wrapper
 6 | ====================
 7 | 
 8 | Feature-engine's Scikit-learn wrappers wrap Scikit-learn transformers allowing their
 9 | implementation only on a selected subset of features.
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    Wrapper
15 | 
16 | Other wrappers
17 | ~~~~~~~~~~~~~~
18 | 
19 | The :class:`SklearnTransformerWrapper()` offers a similar function to the
20 | `ColumnTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html>`_
21 | class available in Scikit-learn. They differ in the implementation to select the
22 | variables.


--------------------------------------------------------------------------------
/docs/contribute/code_of_conduct.rst:
--------------------------------------------------------------------------------
 1 | Code of Conduct
 2 | ===============
 3 | 
 4 | Feature-engine is an open source Python project. We follow the
 5 | `Python Software Foundation Code of Conduct <http://www.python.org/psf/codeofconduct/>`_.
 6 | All interactions among members of the Feature-engine community must meet those
 7 | guidelines. This includes (but is not limited to) interactions through the mailing
 8 | list, GitHub and StackOverflow.
 9 | 
10 | Everyone is expected to be open, considerate, and respectful of others no matter what
11 | their position is within the project. We show gratitude for any contribution, big or
12 | small. We welcome feedback and participation. We want to make Feature-engine a nice,
13 | welcoming and safe place for you to do your first contribution to open source, and why
14 | not the second, the third and so on :).
15 | 


--------------------------------------------------------------------------------
/docs/contribute/contribute_jup.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Contribute Jupyter notebooks
 4 | ============================
 5 | 
 6 | We created a collection of Jupyter notebooks that showcase the main functionality of
 7 | Feature-engine's transformers. We link these notebooks throughout the main documentation
 8 | to offer users more examples and details about transformers and how to use them.
 9 | 
10 | **Note** that the Jupyter notebooks are hosted in a separate
11 | `Github repository <https://github.com/feature-engine/feature-engine-examples>`_.
12 | 
13 | Here are some guidelines on how to add a new notebook or update an existing one. The
14 | contribution workflow is the same we use for the main source code base.
15 | 
16 | Jupyter contribution workflow
17 | -----------------------------
18 | 
19 | 1. Fork the `Github repository <https://github.com/feature-engine/feature-engine-examples>`_.
20 | 2. Clone your fork into your local computer: `git clone https://github.com/<YOURUSERNAME>/feature-engine-examples.git`.
21 | 3. Navigate into the project directory: `cd feature-engine-examples`.
22 | 4. If you haven't done so yet, install feature-engine: `pip install feature_engine`.
23 | 5. Create a feature branch with a meaningful name: `git checkout -b mynotebookbranch`.
24 | 6. Develop your notebook
25 | 7. Add the changes to your copy of the fork: `git add .`, `git commit -m "a meaningful commit message"`, `git pull origin mynotebookbranch`.
26 | 8. Go to your fork on Github and make a PR to this repo
27 | 9. Done
28 | 
29 | The review process for notebooks is usually much faster than for the main source code base.
30 | 
31 | Jupyter creation guidelines
32 | ---------------------------
33 | 
34 | If you want to add a new Jupyter notebook, there are a few things to note:
35 | 
36 | - Make sure that the dataset you use is publicly available and with a clear license that it is free to use
37 | - Do not upload datasets to the repository
38 | - Add instructions on how to obtain and prepare the data for the demo
39 | - Throughout the notebook, add guidelines on what you are going to do next, and what is the conclusion of the output
40 | 
41 | That's it! Fairly straightforward.
42 | 
43 | We look forward to your contribution :)


--------------------------------------------------------------------------------
/docs/donate.rst:
--------------------------------------------------------------------------------
 1 | Sponsor us
 2 | ----------
 3 | 
 4 | |
 5 | 
 6 | .. image:: images/sponsors/call_for_sponsors.png
 7 |    :align: center
 8 |    :target: https://github.com/sponsors/feature-engine
 9 | 
10 | |
11 | 
12 | Support Feature-engine financially through
13 | `Github Sponsors <https://github.com/sponsors/feature-engine>`_ and help further our
14 | mission to democratize machine learning and programming tools through open-source.
15 | 
16 | More details about how we use donations in the
17 | `sponsors page <https://github.com/sponsors/feature-engine>`_.
18 | 
19 | |
20 | 
21 | Sponsors
22 | --------
23 | 
24 | Feature-engine is a community driven project, however institutional, private and
25 | individual support help to assure its sustainability. The project would like to thank
26 | the following sponsors:
27 | 
28 | |
29 | 
30 | .. image:: images/sponsors/trainindata.png
31 |    :width: 200pt
32 |    :align: center
33 |    :target:  https://www.trainindata.com/
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/images/1024px-Relationship_between_mean_and_median_under_different_skewness.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/1024px-Relationship_between_mean_and_median_under_different_skewness.png


--------------------------------------------------------------------------------
/docs/images/Discretisation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/Discretisation.png


--------------------------------------------------------------------------------
/docs/images/FeatureEnginePackageStructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/FeatureEnginePackageStructure.png


--------------------------------------------------------------------------------
/docs/images/FeatureEnginePackageStructureCrossSectional.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/FeatureEnginePackageStructureCrossSectional.png


--------------------------------------------------------------------------------
/docs/images/FeatureEnginePackageStructureDatetimeText.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/FeatureEnginePackageStructureDatetimeText.png


--------------------------------------------------------------------------------
/docs/images/FeatureEnginePackageStructureTimeseries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/FeatureEnginePackageStructureTimeseries.png


--------------------------------------------------------------------------------
/docs/images/PSI_distribution_case1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/PSI_distribution_case1.png


--------------------------------------------------------------------------------
/docs/images/PSI_distribution_case3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/PSI_distribution_case3.png


--------------------------------------------------------------------------------
/docs/images/PSI_distribution_case4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/PSI_distribution_case4.png


--------------------------------------------------------------------------------
/docs/images/PSI_distribution_case5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/PSI_distribution_case5.png


--------------------------------------------------------------------------------
/docs/images/Variable_Transformation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/Variable_Transformation.png


--------------------------------------------------------------------------------
/docs/images/arbitraryvalueimputation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/arbitraryvalueimputation.png


--------------------------------------------------------------------------------
/docs/images/bmilogcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/bmilogcp.png


--------------------------------------------------------------------------------
/docs/images/bmiraw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/bmiraw.png


--------------------------------------------------------------------------------
/docs/images/boxplot-age-percentiles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-age-percentiles.png


--------------------------------------------------------------------------------
/docs/images/boxplot-age.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-age.png


--------------------------------------------------------------------------------
/docs/images/boxplot-fare-mad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-fare-mad.png


--------------------------------------------------------------------------------
/docs/images/boxplot-fare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-fare.png


--------------------------------------------------------------------------------
/docs/images/boxplot-sibsp-fare-iqr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-sibsp-fare-iqr.png


--------------------------------------------------------------------------------
/docs/images/boxplot-sibsp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-sibsp.png


--------------------------------------------------------------------------------
/docs/images/boxplot-titanic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/boxplot-titanic.png


--------------------------------------------------------------------------------
/docs/images/breast_cancer_arcsin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/breast_cancer_arcsin.png


--------------------------------------------------------------------------------
/docs/images/breast_cancer_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/breast_cancer_raw.png


--------------------------------------------------------------------------------
/docs/images/cookbook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/cookbook.png


--------------------------------------------------------------------------------
/docs/images/dmlm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/dmlm.png


--------------------------------------------------------------------------------
/docs/images/endtailimputer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/endtailimputer.png


--------------------------------------------------------------------------------
/docs/images/equalfrequencydiscretisation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/equalfrequencydiscretisation.png


--------------------------------------------------------------------------------
/docs/images/equalfrequencydiscretisation_gaussian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/equalfrequencydiscretisation_gaussian.png


--------------------------------------------------------------------------------
/docs/images/equalfrequencydiscretisation_skewed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/equalfrequencydiscretisation_skewed.png


--------------------------------------------------------------------------------
/docs/images/equalwidthdiscretisation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/equalwidthdiscretisation.png


--------------------------------------------------------------------------------
/docs/images/f_statistic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/f_statistic.png


--------------------------------------------------------------------------------
/docs/images/feml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/feml.png


--------------------------------------------------------------------------------
/docs/images/fetsf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/fetsf.png


--------------------------------------------------------------------------------
/docs/images/fork.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/fork.png


--------------------------------------------------------------------------------
/docs/images/frequentcategoryimputer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/frequentcategoryimputer.png


--------------------------------------------------------------------------------
/docs/images/fsml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/fsml.png


--------------------------------------------------------------------------------
/docs/images/fsmlbook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/fsmlbook.png


--------------------------------------------------------------------------------
/docs/images/fwml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/fwml.png


--------------------------------------------------------------------------------
/docs/images/hour_sin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/hour_sin.png


--------------------------------------------------------------------------------
/docs/images/hour_sin2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/hour_sin2.png


--------------------------------------------------------------------------------
/docs/images/hour_sin3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/hour_sin3.png


--------------------------------------------------------------------------------
/docs/images/hour_sin4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/hour_sin4.png


--------------------------------------------------------------------------------
/docs/images/increasingwidthdisc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/increasingwidthdisc.png


--------------------------------------------------------------------------------
/docs/images/ivml_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/ivml_logo.png


--------------------------------------------------------------------------------
/docs/images/logcpraw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/logcpraw.png


--------------------------------------------------------------------------------
/docs/images/logcptransform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/logcptransform.png


--------------------------------------------------------------------------------
/docs/images/logo/FeatureEngine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/logo/FeatureEngine.png


--------------------------------------------------------------------------------
/docs/images/logo/Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/logo/Logo.png


--------------------------------------------------------------------------------
/docs/images/logo/Logo_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/logo/Logo_name.png


--------------------------------------------------------------------------------
/docs/images/logo/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/logo/favicon.png


--------------------------------------------------------------------------------
/docs/images/lotarea_pt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotarea_pt.png


--------------------------------------------------------------------------------
/docs/images/lotarea_pt_custom_exp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotarea_pt_custom_exp.png


--------------------------------------------------------------------------------
/docs/images/lotarea_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotarea_raw.png


--------------------------------------------------------------------------------
/docs/images/lotareaboxcox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotareaboxcox.png


--------------------------------------------------------------------------------
/docs/images/lotarealog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotarealog.png


--------------------------------------------------------------------------------
/docs/images/lotareapower.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotareapower.png


--------------------------------------------------------------------------------
/docs/images/lotarearaw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotarearaw.png


--------------------------------------------------------------------------------
/docs/images/lotareareciprocal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotareareciprocal.png


--------------------------------------------------------------------------------
/docs/images/lotareayeojohnson.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotareayeojohnson.png


--------------------------------------------------------------------------------
/docs/images/lotshape-price-per-cat-enc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotshape-price-per-cat-enc.png


--------------------------------------------------------------------------------
/docs/images/lotshape-price-per-cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/lotshape-price-per-cat.png


--------------------------------------------------------------------------------
/docs/images/meanmedianimputater_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/meanmedianimputater_distributions.png


--------------------------------------------------------------------------------
/docs/images/medianimputation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/medianimputation.png


--------------------------------------------------------------------------------
/docs/images/medinc_disc_arbitrarily.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/medinc_disc_arbitrarily.png


--------------------------------------------------------------------------------
/docs/images/medinc_disc_arbitrarily2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/medinc_disc_arbitrarily2.png


--------------------------------------------------------------------------------
/docs/images/medinc_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/medinc_hist.png


--------------------------------------------------------------------------------
/docs/images/missingcategoryimputer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/missingcategoryimputer.png


--------------------------------------------------------------------------------
/docs/images/missingindicator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/missingindicator.png


--------------------------------------------------------------------------------
/docs/images/mli_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/mli_logo.png


--------------------------------------------------------------------------------
/docs/images/monotonic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/monotonic.png


--------------------------------------------------------------------------------
/docs/images/mzoning-price-per-cat-enc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/mzoning-price-per-cat-enc.png


--------------------------------------------------------------------------------
/docs/images/mzoning-price-per-cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/mzoning-price-per-cat.png


--------------------------------------------------------------------------------
/docs/images/nonnormalvars2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/nonnormalvars2.png


--------------------------------------------------------------------------------
/docs/images/nonnormalvars2logtransformed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/nonnormalvars2logtransformed.png


--------------------------------------------------------------------------------
/docs/images/nonnormalvars2transformed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/nonnormalvars2transformed.png


--------------------------------------------------------------------------------
/docs/images/ordinal_encoding_monotonic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/ordinal_encoding_monotonic.png


--------------------------------------------------------------------------------
/docs/images/pipelineprediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/pipelineprediction.png


--------------------------------------------------------------------------------
/docs/images/probe-importance-std.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/probe-importance-std.png


--------------------------------------------------------------------------------
/docs/images/probe_feature_normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/probe_feature_normal.png


--------------------------------------------------------------------------------
/docs/images/probe_features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/probe_features.png


--------------------------------------------------------------------------------
/docs/images/quasiconstant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/quasiconstant.png


--------------------------------------------------------------------------------
/docs/images/randomsampleimputation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/randomsampleimputation.png


--------------------------------------------------------------------------------
/docs/images/reciprocal_transformer/reciprocal_transfomer_inverse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/reciprocal_transformer/reciprocal_transfomer_inverse.png


--------------------------------------------------------------------------------
/docs/images/reciprocal_transformer/reciprocal_transfomer_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/reciprocal_transformer/reciprocal_transfomer_new.png


--------------------------------------------------------------------------------
/docs/images/reciprocal_transformer/reciprocal_transfomer_original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/reciprocal_transformer/reciprocal_transfomer_original.png


--------------------------------------------------------------------------------
/docs/images/reciprocal_transformer/reciprocal_transformer_3plots_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/reciprocal_transformer/reciprocal_transformer_3plots_new.png


--------------------------------------------------------------------------------
/docs/images/reciprocal_transformer/reciprocal_transformer_3plots_original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/reciprocal_transformer/reciprocal_transformer_3plots_original.png


--------------------------------------------------------------------------------
/docs/images/rfa_linreg_imp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/rfa_linreg_imp.png


--------------------------------------------------------------------------------
/docs/images/rfa_perf_drifts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/rfa_perf_drifts.png


--------------------------------------------------------------------------------
/docs/images/rfe_perf_drift.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/rfe_perf_drift.png


--------------------------------------------------------------------------------
/docs/images/rfimportancemrmr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/rfimportancemrmr.png


--------------------------------------------------------------------------------
/docs/images/selectionChart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/selectionChart.png


--------------------------------------------------------------------------------
/docs/images/shuffle-features-std.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/shuffle-features-std.png


--------------------------------------------------------------------------------
/docs/images/single-feature-perf-std.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/single-feature-perf-std.png


--------------------------------------------------------------------------------
/docs/images/single_feature_probes_imp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/single_feature_probes_imp.png


--------------------------------------------------------------------------------
/docs/images/sponsors/call_for_sponsors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/sponsors/call_for_sponsors.png


--------------------------------------------------------------------------------
/docs/images/sponsors/how-did-you-discover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/sponsors/how-did-you-discover.png


--------------------------------------------------------------------------------
/docs/images/sponsors/trainindata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/sponsors/trainindata.png


--------------------------------------------------------------------------------
/docs/images/summary/imputersSummary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/summary/imputersSummary.png


--------------------------------------------------------------------------------
/docs/images/summary/selectionSummary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/summary/selectionSummary.png


--------------------------------------------------------------------------------
/docs/images/target-mean-sel-std.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/target-mean-sel-std.png


--------------------------------------------------------------------------------
/docs/images/toydata_pt_raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/toydata_pt_raw.png


--------------------------------------------------------------------------------
/docs/images/toydata_pt_transformed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/toydata_pt_transformed.png


--------------------------------------------------------------------------------
/docs/images/toydata_pt_transformed_custom_exp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/toydata_pt_transformed_custom_exp.png


--------------------------------------------------------------------------------
/docs/images/transformedcoupleYJ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/transformedcoupleYJ.png


--------------------------------------------------------------------------------
/docs/images/treediscretisation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/treediscretisation.png


--------------------------------------------------------------------------------
/docs/images/treemonotonicprediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/treemonotonicprediction.png


--------------------------------------------------------------------------------
/docs/images/treepredictionrounded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/treepredictionrounded.png


--------------------------------------------------------------------------------
/docs/images/untransformedcoupleYJ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/untransformedcoupleYJ.png


--------------------------------------------------------------------------------
/docs/images/woe_encoding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/woe_encoding.png


--------------------------------------------------------------------------------
/docs/images/woe_prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/woe_prediction.png


--------------------------------------------------------------------------------
/docs/images/yeojohnsonformula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/docs/images/yeojohnsonformula.png


--------------------------------------------------------------------------------
/docs/quickstart/datasets.rst:
--------------------------------------------------------------------------------
 1 | .. _datasets:
 2 | 
 3 | Datasets
 4 | ========
 5 | 
 6 | The user guide and examples included in Feature-engine's documentation are based on
 7 | these 3 datasets:
 8 | 
 9 | Titanic dataset
10 | ~~~~~~~~~~~~~~~
11 | 
12 | We use the dataset available in `openML <https://www.openml.org/d/40945>`_ which can be
13 | downloaded from `here <https://www.openml.org/data/get_csv/16826755/phpMYEkMl>`_.
14 | 
15 | Ames House Prices dataset
16 | ~~~~~~~~~~~~~~~~~~~~~~~~~
17 | 
18 | We use the data set created by Professor Dean De Cock:
19 | * Dean De Cock (2011) Ames, Iowa: Alternative to the Boston Housing
20 | * Data as an End of Semester Regression Project, Journal of Statistics Education, Vol.19, No. 3.
21 | 
22 | The examples are based on a copy of the dataset available on
23 | `Kaggle <https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data>`_.
24 | 
25 | The original data and documentation can be found here:
26 | 
27 | * `Documentation <http://jse.amstat.org/v19n3/decock/DataDocumentation.txt>`_
28 | 
29 | * `Data <http://jse.amstat.org/v19n3/decock/AmesHousing.xls>`_
30 | 
31 | Credit Approval dataset
32 | ~~~~~~~~~~~~~~~~~~~~~~~
33 | 
34 | We use the Credit Approval dataset from the UCI Machine Learning Repository:
35 | 
36 | Dua, D. and Graff, C. (2019). `UCI Machine Learning Repository <http://archive.ics.uci.edu/ml>`_.
37 | Irvine, CA: University of California, School of Information and Computer Science.
38 | 
39 | To download the dataset visit this
40 | `website <http://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/>`_
41 | and click on "crx.data" to download the data set.
42 | 
43 | To prepare the data for the examples:
44 | 
45 | .. code:: python
46 | 
47 |     import random
48 |     import pandas as pd
49 |     import numpy as np
50 | 
51 |     # load data
52 |     data = pd.read_csv('crx.data', header=None)
53 | 
54 |     # create variable names according to UCI Machine Learning information
55 |     varnames = ['A'+str(s) for s in range(1,17)]
56 |     data.columns = varnames
57 | 
58 |     # replace ? by np.nan
59 |     data = data.replace('?', np.nan)
60 | 
61 |     # re-cast some variables to the correct types
62 |     data['A2'] = data['A2'].astype('float')
63 |     data['A14'] = data['A14'].astype('float')
64 | 
65 |     # encode target to binary
66 |     data['A16'] = data['A16'].map({'+':1, '-':0})
67 | 
68 |     # save the data
69 |     data.to_csv('creditApprovalUCI.csv', index=False)


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Library Dependencies
 2 | numpy>=1.18.2
 3 | pandas>=1.0.3
 4 | scikit-learn>=1.0.0
 5 | scipy>=1.4.1
 6 | statsmodels>=0.11.1
 7 | 
 8 | # Documentation Dependencies
 9 | docutils==0.16
10 | Sphinx>=4.3.2
11 | pydata_sphinx_theme>=0.7.2
12 | sphinx_autodoc_typehints>=1.11.1,<=1.21.3
13 | numpydoc>=0.9.2
14 | 


--------------------------------------------------------------------------------
/docs/resources/books.rst:
--------------------------------------------------------------------------------
 1 | Books
 2 | =====
 3 | 
 4 | You can learn more about how to use Feature-engine and feature engineering in general
 5 | in the following books:
 6 | 
 7 | .. figure::  ../images/cookbook.png
 8 |    :width: 200
 9 |    :figclass: align-center
10 |    :align: left
11 |    :target: https://www.packtpub.com/en-us/product/python-feature-engineering-cookbook-9781835883587
12 | 
13 |    Python Feature Engineering Cookbook
14 | 
15 | 
16 | .. figure::  ../images/fsmlbook.png
17 |    :width: 200
18 |    :figclass: align-center
19 |    :align: left
20 |    :target: https://www.trainindata.com/p/feature-selection-in-machine-learning-book
21 | 
22 |    Feature Selection in Machine Learning


--------------------------------------------------------------------------------
/docs/resources/courses.rst:
--------------------------------------------------------------------------------
 1 | Courses
 2 | =======
 3 | 
 4 | You can learn more about how to use Feature-engine and, feature engineering and feature
 5 | selection in general in the following online courses:
 6 | 
 7 | .. figure::  ../images/feml.png
 8 |    :width: 300
 9 |    :figclass: align-center
10 |    :align: left
11 |    :target: https://www.trainindata.com/p/feature-engineering-for-machine-learning
12 | 
13 |    Feature Engineering for Machine Learning
14 | 
15 | .. figure::  ../images/fsml.png
16 |    :width: 300
17 |    :figclass: align-center
18 |    :align: right
19 |    :target: https://www.trainindata.com/p/feature-selection-for-machine-learning
20 | 
21 |    Feature Selection for Machine Learning
22 | 
23 | .. figure::  ../images/fwml.png
24 |    :width: 300
25 |    :figclass: align-center
26 |    :align: left
27 |    :target: https://www.courses.trainindata.com/p/forecasting-with-machine-learning
28 | 
29 |    Forecasting with Machine Learning
30 | 
31 | .. figure::  ../images/fetsf.png
32 |    :width: 300
33 |    :figclass: align-center
34 |    :align: right
35 |    :target: https://www.trainindata.com/p/feature-engineering-for-forecasting
36 | 
37 |    Feature Engineering for Time Series Forecasting
38 | 
39 | .. figure::  ../images/mli_logo.png
40 |    :width: 300
41 |    :figclass: align-center
42 |    :align: left
43 |    :target: https://www.courses.trainindata.com/p/machine-learning-interpretability
44 | 
45 |    Interpreting Machine Learning Models
46 | 
47 | 
48 | |
49 | |
50 | 


--------------------------------------------------------------------------------
/docs/resources/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. _learning_resources:
 3 | 
 4 | Resources
 5 | =========
 6 | 
 7 | Here you find learning resources to know more about Feature-engine and feature
 8 | engineering and selection in general.
 9 | 
10 | We have gathered online courses, books, blogs, videos, podcasts, jupyter notebook and
11 | kaggle kernels, so you can follow the resource with the way of learning that you like
12 | the most.
13 | 
14 | .. toctree::
15 |    :maxdepth: 1
16 | 
17 |    courses
18 |    books
19 |    blogs
20 |    tutorials


--------------------------------------------------------------------------------
/docs/resources/tutorials.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | How To
 5 | ------
 6 | 
 7 | Check our `jupyter notebooks <https://nbviewer.jupyter.org/github/feature-engine/feature-engine-examples/tree/main/>`_
 8 | showcasing the functionality of each Feature-engine transformer.
 9 | 
10 | Kaggle Kernels
11 | --------------
12 | 
13 | We also prepared Kaggle kernels with demos mixing data exploration, feature engineering,
14 | feature creation, feature selection and hyperparameter optimization of entire pipelines.
15 | 
16 | - `Feature selection for bank customer satisfaction prediction <https://www.kaggle.com/solegalli/feature-selection-with-feature-engine>`_
17 | - `Feature engineering and selection for house price prediction <https://www.kaggle.com/solegalli/predict-house-price-with-feature-engine>`_
18 | - `Feature creation for wine quality prediction <https://www.kaggle.com/solegalli/create-new-features-with-feature-engine>`_
19 | - `Feature engineering and model stacking for house price modelling <https://www.kaggle.com/solegalli/feature-engineering-and-model-stacking>`_
20 | - `Feature engineering with Feature-engine and Randomized search <https://www.kaggle.com/solegalli/feature-engineering-with-randomized-search>`_
21 | - `Feature engineering with Feature-engine and Grid search <https://www.kaggle.com/solegalli/feature-engineering-pipeline-and-hyperparam-tuning>`_
22 | 
23 | 
24 | 
25 | Video tutorials
26 | ---------------
27 | 
28 | You can find some videos on how to use Feature-engine in the
29 | `Feature-engine playlist <https://www.youtube.com/playlist?list=PL_7uaHXkQmKVlqlvgQJuaWEKjagHbERtp>`_
30 | in Train in Data's YouTube channel. The list is a bit short at the moment, apologies.


--------------------------------------------------------------------------------
/docs/sphinxext/README.txt:
--------------------------------------------------------------------------------
 1 | =====================================
 2 | numpydoc -- Numpy's Sphinx extensions
 3 | =====================================
 4 | 
 5 | Numpy's documentation uses several custom extensions to Sphinx.  These
 6 | are shipped in this ``numpydoc`` package, in case you want to make use
 7 | of them in third-party projects.
 8 | 
 9 | The following extensions are available:
10 | 
11 |   - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 |     the code description directives ``np-function``, ``np-cfunction``, etc.
13 |     that support the Numpy docstring syntax.
14 | 
15 |   - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 | 
17 |   - ``numpydoc.plot_directives``: Adaptation of Matplotlib's ``plot::``
18 |     directive. Note that this implementation may still undergo severe
19 |     changes or eventually be deprecated.
20 | 
21 |   - ``numpydoc.only_directives``: (DEPRECATED)
22 | 
23 |   - ``numpydoc.autosummary``: (DEPRECATED) An ``autosummary::`` directive.
24 |     Available in Sphinx 0.6.2 and (to-be) 1.0 as ``sphinx.ext.autosummary``,
25 |     and it the Sphinx 1.0 version is recommended over that included in
26 |     Numpydoc.
27 | 
28 | 
29 | numpydoc
30 | ========
31 | 
32 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
33 | following the Numpy/Scipy format to a form palatable to Sphinx.
34 | 
35 | Options
36 | -------
37 | 
38 | The following options can be set in conf.py:
39 | 
40 | - numpydoc_use_plots: bool
41 | 
42 |   Whether to produce ``plot::`` directives for Examples sections that
43 |   contain ``import matplotlib``.
44 | 
45 | - numpydoc_show_class_members: bool
46 | 
47 |   Whether to show all members of a class in the Methods and Attributes
48 |   sections automatically.
49 | 
50 | - numpydoc_edit_link: bool  (DEPRECATED -- edit your HTML template instead)
51 | 
52 |   Whether to insert an edit link after docstrings.
53 | 


--------------------------------------------------------------------------------
/docs/user_guide/datetime/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Datetime Features
 4 | =================
 5 | 
 6 | Feature-engine’s datetime transformers are able to extract a wide variety of datetime
 7 | features from existing datetime or object-like data.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    DatetimeFeatures
13 |    DatetimeSubtraction


--------------------------------------------------------------------------------
/docs/user_guide/discretisation/index.rst:
--------------------------------------------------------------------------------
 1 | .. _discretization_transformers:
 2 | 
 3 | .. -*- mode: rst -*-
 4 | 
 5 | Discretisation
 6 | ==============
 7 | 
 8 | Feature-engine's variable discretisation transformers transform continuous numerical
 9 | variables into discrete variables. The discrete variables will contain contiguous
10 | intervals in the case of the equal frequency and equal width transformers. The
11 | Decision Tree discretiser will return a discrete variable, in the sense that the
12 | new feature takes a finite number of values.
13 | 
14 | The following illustration shows the process of discretisation:
15 | 
16 | .. figure::  ../../images/Discretisation.png
17 |    :align:   center
18 |    :width: 500
19 | 
20 | 
21 | With discretisation, sometimes we can obtain a more homogeneous value spread from an
22 | originally skewed variable. But this is not always possible.
23 | 
24 | **Discretisation plus encoding**
25 | 
26 | Very often, after we discretise the numerical continuous variables into discrete intervals
27 | we want to proceed their engineering as if they were categorical. This is common practice.
28 | Throughout the user guide, we point to jupyter notebooks that showcase this functionality.
29 | 
30 | **Discretisers**
31 | 
32 | .. toctree::
33 |    :maxdepth: 1
34 | 
35 |    EqualFrequencyDiscretiser
36 |    EqualWidthDiscretiser
37 |    ArbitraryDiscretiser
38 |    DecisionTreeDiscretiser
39 |    GeometricWidthDiscretiser
40 | 


--------------------------------------------------------------------------------
/docs/user_guide/imputation/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Missing Data Imputation
 4 | =======================
 5 | 
 6 | Feature-engine's missing data imputers replace missing data by parameters estimated
 7 | from data or arbitrary values pre-defined by the user. The following image summarizes
 8 | the main imputer's functionality.
 9 | 
10 | .. figure::  ../../images/summary/imputersSummary.png
11 |    :align:   center
12 | 
13 | |
14 | 
15 | In this guide, you will find code snippets to quickly be able to apply the imputers
16 | to your datasets, as well as general knowledge and guidance on the imputation
17 | techniques.
18 | 
19 | 
20 | Imputers
21 | ~~~~~~~~
22 | 
23 | .. toctree::
24 |    :maxdepth: 1
25 | 
26 |    MeanMedianImputer
27 |    ArbitraryNumberImputer
28 |    EndTailImputer
29 |    CategoricalImputer
30 |    RandomSampleImputer
31 |    AddMissingIndicator
32 |    DropMissingData


--------------------------------------------------------------------------------
/docs/user_guide/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. _user_guide:
 3 | 
 4 | User Guide
 5 | ==========
 6 | 
 7 | In this section you will find additional information about Feature-engine's transformers
 8 | and feature engineering transformations in general, as well as additional examples.
 9 | 
10 | Transformation
11 | --------------
12 | 
13 | .. toctree::
14 |    :maxdepth: 1
15 | 
16 |    imputation/index
17 |    encoding/index
18 |    discretisation/index
19 |    outliers/index
20 |    transformation/index
21 |    scaling/index
22 | 
23 | Creation
24 | --------
25 | 
26 | .. toctree::
27 |    :maxdepth: 1
28 | 
29 |    creation/index
30 |    datetime/index
31 | 
32 | 
33 | Selection
34 | ---------
35 | .. toctree::
36 |    :maxdepth: 1
37 | 
38 |    selection/index
39 | 
40 | 
41 | Time series
42 | -----------
43 | 
44 | .. toctree::
45 |    :maxdepth: 1
46 | 
47 |    timeseries/index
48 | 
49 | 
50 | Other
51 | -----
52 | .. toctree::
53 |    :maxdepth: 1
54 | 
55 |    preprocessing/index
56 |    wrappers/index
57 | 
58 | Pipeline
59 | --------
60 | .. toctree::
61 |    :maxdepth: 1
62 | 
63 |    pipeline/index
64 | 
65 | Tools
66 | -----
67 | .. toctree::
68 |    :maxdepth: 1
69 | 
70 |    variable_handling/index


--------------------------------------------------------------------------------
/docs/user_guide/outliers/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Outlier Handling
 4 | ================
 5 | 
 6 | Feature-engine's outlier cappers cap maximum or minimum values of a variable at an
 7 | arbitrary or derived value. The OutlierTrimmer removes outliers from the dataset.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    Winsorizer
13 |    ArbitraryOutlierCapper
14 |    OutlierTrimmer


--------------------------------------------------------------------------------
/docs/user_guide/pipeline/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Pipeline
 4 | ========
 5 | 
 6 | Feature-engine's Pipeline is equivalent to Scikit-learn's pipeline, and in addition,
 7 | it accepts the method `transform_x_y`, to adjust both X and y, in those cases where
 8 | rows are removed from X.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 | 
13 |    Pipeline
14 |    make_pipeline
15 | 


--------------------------------------------------------------------------------
/docs/user_guide/preprocessing/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Preprocessing
 4 | =============
 5 | 
 6 | Feature-engine's preprocessing transformers apply general data pre-processing
 7 | and transformation procedures.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    MatchCategories
13 |    MatchVariables
14 | 


--------------------------------------------------------------------------------
/docs/user_guide/scaling/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. _scaling_user_guide:
 3 | 
 4 | .. currentmodule:: feature_engine.scaling
 5 | 
 6 | Scaling
 7 | =======
 8 | 
 9 | `Feature scaling <https://www.blog.trainindata.com/feature-scaling-in-machine-learning/>`_
10 | is the process of transforming the range of numerical features so that they fit within a
11 | specific scale, usually to improve the performance and training stability of machine learning
12 | models.
13 | 
14 | Scaling helps to normalize the input data, ensuring that each feature contributes proportionately
15 | to the final result, particularly in algorithms that are sensitive to the range of the data,
16 | such as gradient descent-based models (e.g., linear regression, logistic regression, neural networks)
17 | and distance-based models (e.g., K-nearest neighbors, clustering).
18 | 
19 | Feature-engine's scalers replace the variables' values by the scaled ones. In this page, we
20 | discuss the importance of scaling numerical features, and then introduce the various
21 | scaling techniques supported by Feature-engine.
22 | 
23 | Importance of scaling
24 | ---------------------
25 | 
26 | Scaling is crucial in machine learning as it ensures that features contribute equally to model
27 | training, preventing bias toward variables with larger ranges. Properly scaled data enhances the
28 | performance of algorithms sensitive to the magnitude of input values, such as gradient descent
29 | and distance-based methods. Additionally, scaling can improve convergence speed and overall model
30 | accuracy, leading to more reliable predictions.
31 | 
32 | 
33 | When apply scaling
34 | ------------------
35 | 
36 | - **Training:** Most machine learning algorithms require data to be scaled before training,
37 |   especially linear models, neural networks, and distance-based models.
38 | 
39 | - **Feature Engineering:** Scaling can be essential for certain feature engineering techniques,
40 |   like polynomial features.
41 | 
42 | - **Resampling:** Some oversampling methods like SMOTE and many of the undersampling methods
43 |   clean data based on KNN algorithms, which are distance based models.
44 | 
45 | 
46 | When Scaling Is Not Necessary
47 | -----------------------------
48 | 
49 | Not all algorithms require scaling. For example, tree-based algorithms (like Decision Trees,
50 | Random Forests, Gradient Boosting) are generally invariant to scaling because they split data
51 | based on the order of values, not the magnitude.
52 | 
53 | Scalers
54 | -------
55 | 
56 | .. toctree::
57 |    :maxdepth: 1
58 | 
59 |    MeanNormalizationScaler
60 | 


--------------------------------------------------------------------------------
/docs/user_guide/timeseries/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | .. _timeseries:
 3 | 
 4 | .. currentmodule:: feature_engine.timeseries
 5 | 
 6 | 
 7 | Time Series Features
 8 | ====================
 9 | 
10 | Feature-engine's time series transformers create features from time series data.
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 | 
15 |    forecasting/index
16 | 
17 | |
18 | |
19 | |
20 | 


--------------------------------------------------------------------------------
/docs/user_guide/transformation/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Variance Stabilizing Transformations
 4 | ====================================
 5 | 
 6 | Feature-engine's variable transformers transform numerical variables with various
 7 | mathematical transformations.
 8 | 
 9 | Variable transformations are commonly used to spread the values of the original variables
10 | over a wider value range. See the following illustration:
11 | 
12 | .. figure::  ../../images/Variable_Transformation.png
13 |    :align:   center
14 | 
15 | 
16 | Article
17 | -------
18 | 
19 | We added a lot of information about **variance stabilizing transformations** in this
20 | `article <https://www.blog.trainindata.com/variance-stabilizing-transformations-in-machine-learning/>`_.
21 | 
22 | **Note**
23 | 
24 | Note however, that improving the value spread is not always possible and it depends
25 | on the nature of the variable.
26 | 
27 | **Transformers**
28 | 
29 | .. toctree::
30 |    :maxdepth: 1
31 | 
32 |    LogTransformer
33 |    LogCpTransformer
34 |    ReciprocalTransformer
35 |    ArcsinTransformer
36 |    PowerTransformer
37 |    BoxCoxTransformer
38 |    YeoJohnsonTransformer
39 | 


--------------------------------------------------------------------------------
/docs/user_guide/variable_handling/check_numerical_variables.rst:
--------------------------------------------------------------------------------
 1 | .. _check_num_vars:
 2 | 
 3 | .. currentmodule:: feature_engine.variable_handling
 4 | 
 5 | check_numerical_variables
 6 | =========================
 7 | 
 8 | :class:`check_numerical_variables()` checks that the variables in the list are of
 9 | type numerical.
10 | 
11 | Let's create a toy dataset with numerical, categorical and datetime variables:
12 | 
13 | .. code:: python
14 | 
15 |     import pandas as pd
16 |     df = pd.DataFrame({
17 |         "Name": ["tom", "nick", "krish", "jack"],
18 |         "City": ["London", "Manchester", "Liverpool", "Bristol"],
19 |         "Age": [20, 21, 19, 18],
20 |         "Marks": [0.9, 0.8, 0.7, 0.6],
21 |         "dob": pd.date_range("2020-02-24", periods=4, freq="T"),
22 |     })
23 | 
24 |     print(df.head())
25 | 
26 | We see the resulting dataframe below:
27 | 
28 | .. code:: python
29 | 
30 |         Name        City  Age  Marks                 dob
31 |     0    tom      London   20    0.9 2020-02-24 00:00:00
32 |     1   nick  Manchester   21    0.8 2020-02-24 00:01:00
33 |     2  krish   Liverpool   19    0.7 2020-02-24 00:02:00
34 |     3   jack     Bristol   18    0.6 2020-02-24 00:03:00
35 | 
36 | Let's now check that 2 of the variables are of type numerical:
37 | 
38 | .. code:: python
39 | 
40 |     from feature_engine.variable_handling import check_numerical_variables
41 | 
42 |     var_num = check_numerical_variables(df, ['Age', 'Marks'])
43 | 
44 |     var_num
45 | 
46 | If the variables are numerical, the function returns their names in a list:
47 | 
48 | .. code:: python
49 | 
50 |     ['Age', 'Marks']
51 | 
52 | If we pass a variable that is not of type numerical,
53 | :class:`check_numerical_variables()` will return an error:
54 | 
55 | .. code:: python
56 | 
57 |     check_numerical_variables(df, ['Age', 'Name'])
58 | 
59 | Below we see the error message:
60 | 
61 | .. code:: python
62 | 
63 |     TypeError: Some of the variables are not numerical. Please cast them as numerical
64 |     before using this transformer.
65 | 


--------------------------------------------------------------------------------
/docs/user_guide/variable_handling/find_numerical_variables.rst:
--------------------------------------------------------------------------------
 1 | ﻿.. _find_num_vars:
 2 | 
 3 | .. currentmodule:: feature_engine.variable_handling
 4 | 
 5 | find_numerical_variables
 6 | ========================
 7 | 
 8 | :class:`find_numerical_variables()` returns a list with the names of the numerical
 9 | variables in the dataset.
10 | 
11 | Let's create a toy dataset with numerical, categorical and datetime variables:
12 | 
13 | .. code:: python
14 | 
15 |     import pandas as pd
16 |     df = pd.DataFrame({
17 |         "Name": ["tom", "nick", "krish", "jack"],
18 |         "City": ["London", "Manchester", "Liverpool", "Bristol"],
19 |         "Age": [20, 21, 19, 18],
20 |         "Marks": [0.9, 0.8, 0.7, 0.6],
21 |         "dob": pd.date_range("2020-02-24", periods=4, freq="T"),
22 |     })
23 | 
24 |     print(df.head())
25 | 
26 | We see the resulting dataframe below:
27 | 
28 | .. code:: python
29 | 
30 |         Name        City  Age  Marks                 dob
31 |     0    tom      London   20    0.9 2020-02-24 00:00:00
32 |     1   nick  Manchester   21    0.8 2020-02-24 00:01:00
33 |     2  krish   Liverpool   19    0.7 2020-02-24 00:02:00
34 |     3   jack     Bristol   18    0.6 2020-02-24 00:03:00
35 | 
36 | With :class:`find_numerical_variables()` we capture the names of all numerical
37 | variables in a list. So let's do that and then display the list:
38 | 
39 | .. code:: python
40 | 
41 |     from feature_engine.variable_handling import find_numerical_variables
42 | 
43 |     var_num = find_numerical_variables(df)
44 | 
45 |     var_num
46 | 
47 | We see the names of the numerical variables in the list below:
48 | 
49 | .. code:: python
50 | 
51 |     ['Age', 'Marks']
52 | 
53 | If there are no numerical variables in the dataset, :class:`find_numerical_variables()`
54 | will raise an error.
55 | 


--------------------------------------------------------------------------------
/docs/user_guide/variable_handling/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Variable handling functions
 4 | ===========================
 5 | 
 6 | This set of functions find variables of a specific type in a dataframe, or check that a
 7 | list of variables is of a specified data type.
 8 | 
 9 | The `find` functions take a dataframe as an argument and returns a list with the names
10 | of the variables of the desired type.
11 | 
12 | The `check` functions check that the list of variables are all of the desired data type.
13 | 
14 | The `retain` functions select the variables in a list if they fulfill a condition.
15 | 
16 | You can use these functions to identify different sets of variables based on their
17 | data type to streamline your feature engineering pipelines or create your own
18 | Feature-engine or Scikit-learn compatible transformers.
19 | 
20 | 
21 | .. toctree::
22 |    :maxdepth: 1
23 | 
24 |    find_all_variables
25 |    find_categorical_variables
26 |    find_datetime_variables
27 |    find_numerical_variables
28 |    find_categorical_and_numerical_variables
29 |    check_all_variables
30 |    check_categorical_variables
31 |    check_datetime_variables
32 |    check_numerical_variables
33 |    retain_variables_if_in_df
34 | 


--------------------------------------------------------------------------------
/docs/user_guide/wrappers/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | Scikit-learn Wrapper
 4 | ====================
 5 | 
 6 | Feature-engine's Scikit-learn wrappers wrap Scikit-learn transformers allowing their
 7 | implementation only on a selected subset of features.
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    Wrapper


--------------------------------------------------------------------------------
/docs/versions/index.rst:
--------------------------------------------------------------------------------
1 | Other versions
2 | ==============
3 | 
4 | Web-based documentation is available for versions listed below:
5 | 
6 | - `Feature-engine 1.6 <https://feature-engine.trainindata.com/en/latest/index.html>`_
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/whats_new/index.rst:
--------------------------------------------------------------------------------
 1 | .. -*- mode: rst -*-
 2 | 
 3 | What's new
 4 | ==========
 5 | 
 6 | Find out what's new in each new version release.
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 | 
11 |    v_180
12 |    v_170
13 |    v_160
14 |    v_150
15 |    v_140
16 |    v_130
17 |    v_120
18 |    v_1
19 |    v_06


--------------------------------------------------------------------------------
/feature_engine/VERSION:
--------------------------------------------------------------------------------
1 | 1.8.3
2 | 


--------------------------------------------------------------------------------
/feature_engine/__init__.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | import feature_engine
 4 | 
 5 | PACKAGE_ROOT = pathlib.Path(feature_engine.__file__).resolve().parent
 6 | VERSION_PATH = PACKAGE_ROOT / "VERSION"
 7 | 
 8 | name = "feature_engine"
 9 | 
10 | with open(VERSION_PATH, "r") as version_file:
11 |     __version__ = version_file.read().strip()
12 | 


--------------------------------------------------------------------------------
/feature_engine/_base_transformers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/_base_transformers/__init__.py


--------------------------------------------------------------------------------
/feature_engine/_check_init_parameters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/_check_init_parameters/__init__.py


--------------------------------------------------------------------------------
/feature_engine/_check_init_parameters/check_init_input_params.py:
--------------------------------------------------------------------------------
 1 | def _check_param_missing_values(missing_values):
 2 |     if missing_values not in ["raise", "ignore"]:
 3 |         raise ValueError(
 4 |             "missing_values takes only values 'raise' or 'ignore'. "
 5 |             f"Got {missing_values} instead."
 6 |         )
 7 | 
 8 | 
 9 | def _check_param_drop_original(drop_original):
10 |     if not isinstance(drop_original, bool):
11 |         raise ValueError(
12 |             "drop_original takes only boolean values True and False. "
13 |             f"Got {drop_original} instead."
14 |         )
15 | 


--------------------------------------------------------------------------------
/feature_engine/_check_init_parameters/check_input_dictionary.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | 
 4 | def _check_numerical_dict(dict_: Optional[dict]) -> Optional[dict]:
 5 |     """
 6 |     Checks that all values in the dictionary are integers and floats. It can take also
 7 |     take None as value.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     dict_ : dict
12 |         The dictionary that will be checked.
13 | 
14 |     Raises
15 |     ------
16 |     ValueError
17 |         If any of the values in the dictionary are not int or float.
18 |     TypeError
19 |         When input type is not a dictionary.
20 |     """
21 | 
22 |     if isinstance(dict_, dict):
23 |         if not all([isinstance(x, (float, int)) for x in dict_.values()]):
24 |             raise ValueError(
25 |                 "All values in the dictionary must be integer or float. "
26 |                 f"Got {dict_} instead."
27 |             )
28 | 
29 |     elif dict_ is not None:
30 |         raise TypeError(
31 |             f"The parameter can only take a dictionary or None. Got {dict_} instead."
32 |         )
33 |     return None
34 | 


--------------------------------------------------------------------------------
/feature_engine/_check_init_parameters/check_variables.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Union
 2 | 
 3 | Variables = Union[None, int, str, List[Union[str, int]]]
 4 | 
 5 | 
 6 | def _check_variables_input_value(variables: Variables) -> Any:
 7 |     """
 8 |     Checks that the input value for the `variables` parameter located in the init of
 9 |     all Feature-engine transformers is of the correct type.
10 |     Allowed  values are None, int, str or list of strings and integers.
11 | 
12 |     Parameters
13 |     ----------
14 |     variables : string, int, list of strings, list of integers. Default=None
15 | 
16 |     Returns
17 |     -------
18 |     variables: same as input
19 |     """
20 | 
21 |     msg = (
22 |         "`variables` should contain a string, an integer or a list of strings or "
23 |         f"integers. Got {variables} instead."
24 |     )
25 |     msg_dupes = "The list entered in `variables` contains duplicated variable names."
26 |     msg_empty = "The list of `variables` is empty."
27 | 
28 |     if variables is not None:
29 |         if isinstance(variables, list):
30 |             if not all(isinstance(i, (str, int)) for i in variables):
31 |                 raise ValueError(msg)
32 |             if len(variables) == 0:
33 |                 raise ValueError(msg_empty)
34 |             if len(variables) != len(set(variables)):
35 |                 raise ValueError(msg_dupes)
36 |         else:
37 |             if not isinstance(variables, (str, int)):
38 |                 raise ValueError(msg)
39 |     return variables
40 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/_docstrings/__init__.py


--------------------------------------------------------------------------------
/feature_engine/_docstrings/fit_attributes.py:
--------------------------------------------------------------------------------
 1 | """Docstrings for the attributes that are generated during fit."""
 2 | 
 3 | _variables_attribute_docstring = """variables_:
 4 |         The group of variables that will be transformed.
 5 |         """.rstrip()
 6 | 
 7 | _feature_names_in_docstring = """feature_names_in_:
 8 |         List with the names of features seen during `fit`.
 9 |         """.rstrip()
10 | 
11 | _n_features_in_docstring = """n_features_in_:
12 |         The number of features in the train set used in fit.
13 |         """.rstrip()
14 | 
15 | # used by discretisers
16 | _binner_dict_docstring = """binner_dict_:
17 |          Dictionary with the interval limits per variable.
18 |      """.rstrip()
19 | 
20 | # used by imputers
21 | _imputer_dict_docstring = """imputer_dict_:
22 |         Dictionary with the values to replace missing data in each variable.
23 |     """.rstrip()
24 | 
25 | # used by outlier module
26 | _right_tail_caps_docstring = """right_tail_caps_:
27 |         Dictionary with the maximum values beyond which a value will be considered an
28 |         outlier.
29 |     """.rstrip()
30 | 
31 | _left_tail_caps_docstring = """left_tail_caps_:
32 |         Dictionary with the minimum values beyond which a value will be considered an
33 |         outlier.
34 |     """.rstrip()
35 | 
36 | # used by selection module
37 | _feature_importances_docstring = """feature_importances_:
38 |         Pandas Series with the feature importance (comes from step 2)
39 |     """.rstrip()
40 | 
41 | _feature_importances_std_docstring = """feature_importances_std_:
42 |         Pandas Series with the standard deviation of the feature importance.
43 |     """.rstrip()
44 | 
45 | _performance_drifts_docstring = """performance_drifts_:
46 |         Dictionary with the performance drift per examined feature (comes from step 5).
47 |     """.rstrip()
48 | 
49 | _performance_drifts_std_docstring = """performance_drifts_std_:
50 |         Dictionary with the performance drift's standard deviation of the
51 |         examined feature (comes from step 5).
52 |     """.rstrip()
53 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/init_parameters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/_docstrings/init_parameters/__init__.py


--------------------------------------------------------------------------------
/feature_engine/_docstrings/init_parameters/all_trasnformers.py:
--------------------------------------------------------------------------------
 1 | """Docstrings for the parameters corresponding to the  __init__"""
 2 | 
 3 | _variables_numerical_docstring = """variables: list, default=None
 4 |         The list of numerical variables to transform. If None, the transformer will
 5 |         automatically find and select all numerical variables.
 6 |     """.rstrip()
 7 | 
 8 | _variables_categorical_docstring = """variables: list, default=None
 9 |         The list of categorical variables that will be encoded. If None, the
10 |         encoder will find and transform all variables of type object or categorical by
11 |         default. You can also make the transformer accept numerical variables, see the
12 |         parameter `ignore_format`.
13 |     """.rstrip()
14 | 
15 | _drop_original_docstring = """drop_original: bool, default=False
16 |         If True, the original variables to transform will be dropped from the dataframe.
17 |     """.rstrip()
18 | 
19 | _missing_values_docstring = """missing_values: string, default='raise'
20 |         Indicates if missing values should be ignored or raised. If `'raise'` the
21 |         transformer will return an error if the the datasets to `fit` or `transform`
22 |         contain missing values. If `'ignore'`, missing data will be ignored when
23 |         learning parameters or performing the transformation.
24 |         """.rstrip()
25 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/init_parameters/discretisers.py:
--------------------------------------------------------------------------------
 1 | _return_object_docstring = """return_object: bool, default=False
 2 |         Whether the the discrete variable should be returned as type numeric or type
 3 |         object. If you would like to encode the discrete variables with Feature-engine's
 4 |         categorical encoders, use True. Alternatively, keep the default to False.
 5 |     """.rstrip()
 6 | 
 7 | _return_boundaries_docstring = """return_boundaries: bool, default=False
 8 |         Whether the output should be the interval boundaries. If True, it returns
 9 |         the interval boundaries. If False, it returns integers.
10 |     """.rstrip()
11 | 
12 | _precision_docstring = """precision: int, default=3
13 |         The precision at which to store and display the bins labels.
14 |     """.rstrip()
15 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/init_parameters/encoders.py:
--------------------------------------------------------------------------------
 1 | _ignore_format_docstring = """ignore_format: bool, default=False
 2 |         This transformer operates only on variables of type object or categorical. To
 3 |         override this behaviour and allow the transformer to transform numerical
 4 |         variables as well, set to `True`.\n
 5 |         If `ignore_format` is `False`, the encoder will automatically select variables
 6 |         of type object or categorical, or check that the variables entered by the user
 7 |         are of type object or categorical. If `True`, the encoder will select all
 8 |         variables or accept all variables entered by the user, including those cast as
 9 |         numeric.\n
10 |         In short, set to `True` when you want to encode numerical variables.
11 |     """.rstrip()
12 | 
13 | _unseen_docstring = """unseen: string, default='ignore'
14 |         Indicates what to do when categories not present in the train set are
15 |         encountered during transform. If `'raise'`, then unseen categories will raise
16 |         an error. If `'ignore'`, then unseen categories will be encoded as NaN and a
17 |         warning will be raised instead.
18 |     """.rstrip()
19 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/init_parameters/outliers.py:
--------------------------------------------------------------------------------
 1 | _capping_method_docstring = """capping_method: str, default='gaussian'
 2 |         Desired outlier detection method. Can be 'gaussian', 'iqr', 'mad',
 3 |         'quantiles'. \n
 4 |         The transformer will find the maximum and / or minimum values beyond which a
 5 |         data point will be considered an outlier using:
 6 |         **'gaussian'**: the Gaussian approximation.
 7 |         **'iqr'**: the IQR proximity rule.
 8 |         **'quantiles'**: the percentiles.
 9 |         **'mad'**: the Gaussian approximation but using robust statistics.
10 |     """.rstrip()
11 | 
12 | _tail_docstring = """tail: str, default='right'
13 |         Whether to look for outliers on the right, left or both tails of the
14 |         distribution. Can take 'left', 'right' or 'both'.
15 |     """.rstrip()
16 | 
17 | _fold_docstring = """fold: int, float or 'auto', default='auto'.
18 |         The factor used to multiply the std, MAD or IQR to calculate
19 |         the maximum or minimum allowed values.
20 |         When 'auto', `fold` is set based on the `capping_method`: \n
21 |          - If `capping_method='quantile'` then `'fold'` = 0.05; \n
22 |          - If `capping_method='gaussian'` then `'fold'` = 3.0; \n
23 |          - If `capping_method='mad'` then `'fold'` = 3.29; \n
24 |          - If `capping_method='iqr'` then `'fold'` = 1.5. \n
25 |         Recommended values are 2, 2.5 or 3 for the gaussian approximation,
26 |         1.5 or 3 for the IQR proximity rule and 3 or 3.5 for MAD rule. \n
27 |         If `capping_method='quantile'`, then `'fold'` indicates the percentile. So if
28 |         `fold=0.05`, the limits will be the 95th and 5th percentiles. \n
29 |         **Note**: When `capping_method='quantile'`, the maximum `fold` allowed is 0.2,
30 |         which will find boundaries at the 20th and 80th percentile.
31 |     """.rstrip()
32 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/init_parameters/selection.py:
--------------------------------------------------------------------------------
 1 | _confirm_variables_docstring = """confirm_variables: bool, default=False
 2 |             If set to True, variables that are not present in the input dataframe will
 3 |             be removed from the list of variables. Only used when passing a variable
 4 |             list to the parameter `variables`. See parameter variables for more details.
 5 |         """.rstrip()
 6 | 
 7 | _estimator_docstring = """estimator: object
 8 |             A Scikit-learn estimator for regression or classification.
 9 |             The estimator must have either a `feature_importances` or a `coef_`
10 |             attribute after fitting.
11 |     """.rstrip()
12 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/methods.py:
--------------------------------------------------------------------------------
 1 | """Docstrings for the methods. They are meant to be used in the init docstrings of
 2 | the transformers."""
 3 | 
 4 | _fit_not_learn_docstring = """fit:
 5 |         This transformer does not learn parameters.
 6 |         """.rstrip()
 7 | 
 8 | _fit_transform_docstring = """fit_transform:
 9 |         Fit to data, then transform it.
10 | 
11 |     get_feature_names_out:
12 |         Get output feature names for transformation.
13 | 
14 |     get_params:
15 |         Get parameters for this estimator.
16 | 
17 |     set_params:
18 |         Set the parameters of this estimator.
19 |         """.rstrip()
20 | 
21 | _inverse_transform_docstring = """inverse_transform:
22 |         Convert the data back to the original representation.
23 |         """.rstrip()
24 | 
25 | # used in categorical encoders
26 | _transform_encoders_docstring = """transform:
27 |         Encode the categories to numbers.
28 |     """.rstrip()
29 | 
30 | # used in creation module
31 | _transform_creation_docstring = """transform:
32 |         Create new features.
33 |     """.rstrip()
34 | 
35 | # used in discretisers module
36 | _fit_discretiser_docstring = """fit:
37 |         Find the interval limits.
38 |     """.rstrip()
39 | 
40 | _transform_discretiser_docstring = """transform:
41 |         Sort continuous variable values into the intervals.
42 |     """.rstrip()
43 | 
44 | # used in imputation module
45 | _transform_imputers_docstring = """transform:
46 |         Impute missing data.
47 |     """.rstrip()
48 | 


--------------------------------------------------------------------------------
/feature_engine/_docstrings/selection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/_docstrings/selection/__init__.py


--------------------------------------------------------------------------------
/feature_engine/_docstrings/substitute.py:
--------------------------------------------------------------------------------
 1 | """Utilities for docstring in Feature-engine.
 2 | 
 3 | Taken from the project imbalanced-learn:
 4 | 
 5 | https://github.com/scikit-learn-contrib/imbalanced-learn/blob/
 6 | imblearn/utils/_docstring.py#L7
 7 | """
 8 | 
 9 | 
10 | class Substitution:
11 |     """Decorate a function's or a class' docstring to perform string
12 |     substitution on it.
13 |     This decorator should be robust even if obj.__doc__ is None
14 |     (for example, if -OO was passed to the interpreter).
15 |     """
16 | 
17 |     def __init__(self, *args, **kwargs):
18 |         if args and kwargs:
19 |             raise AssertionError("Only positional or keyword args are allowed")
20 | 
21 |         self.params = args or kwargs
22 | 
23 |     def __call__(self, obj):
24 |         obj.__doc__ = obj.__doc__.format(**self.params)
25 |         return obj
26 | 


--------------------------------------------------------------------------------
/feature_engine/_prediction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/_prediction/__init__.py


--------------------------------------------------------------------------------
/feature_engine/creation/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module creation includes classes to create new variables by combination of existing
 3 | variables in the dataframe.
 4 | """
 5 | from .cyclical_features import CyclicalFeatures
 6 | from .decision_tree_features import DecisionTreeFeatures
 7 | from .math_features import MathFeatures
 8 | from .relative_features import RelativeFeatures
 9 | 
10 | __all__ = [
11 |     "DecisionTreeFeatures",
12 |     "MathFeatures",
13 |     "RelativeFeatures",
14 |     "CyclicalFeatures",
15 | ]
16 | 


--------------------------------------------------------------------------------
/feature_engine/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .titanic import load_titanic
2 | 
3 | __all__ = ["load_titanic"]
4 | 


--------------------------------------------------------------------------------
/feature_engine/datetime/__init__.py:
--------------------------------------------------------------------------------
1 | "The module datetime computes features from dates and times."
2 | 
3 | from .datetime import DatetimeFeatures
4 | from .datetime_subtraction import DatetimeSubtraction
5 | 
6 | __all__ = ["DatetimeFeatures", "DatetimeSubtraction"]
7 | 


--------------------------------------------------------------------------------
/feature_engine/datetime/_datetime_constants.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | FEATURES_SUPPORTED = [
 4 |     "month",
 5 |     "quarter",
 6 |     "semester",
 7 |     "year",
 8 |     "week",
 9 |     "day_of_week",
10 |     "day_of_month",
11 |     "day_of_year",
12 |     "weekend",
13 |     "month_start",
14 |     "month_end",
15 |     "quarter_start",
16 |     "quarter_end",
17 |     "year_start",
18 |     "year_end",
19 |     "leap_year",
20 |     "days_in_month",
21 |     "hour",
22 |     "minute",
23 |     "second",
24 | ]
25 | 
26 | FEATURES_DEFAULT = [
27 |     "month",
28 |     "year",
29 |     "day_of_week",
30 |     "day_of_month",
31 |     "hour",
32 |     "minute",
33 |     "second",
34 | ]
35 | 
36 | FEATURES_SUFFIXES = {
37 |     "month": "_month",
38 |     "quarter": "_quarter",
39 |     "semester": "_semester",
40 |     "year": "_year",
41 |     "week": "_week",
42 |     "day_of_week": "_day_of_week",
43 |     "day_of_month": "_day_of_month",
44 |     "day_of_year": "_day_of_year",
45 |     "weekend": "_weekend",
46 |     "month_start": "_month_start",
47 |     "month_end": "_month_end",
48 |     "quarter_start": "_quarter_start",
49 |     "quarter_end": "_quarter_end",
50 |     "year_start": "_year_start",
51 |     "year_end": "_year_end",
52 |     "leap_year": "_leap_year",
53 |     "days_in_month": "_days_in_month",
54 |     "hour": "_hour",
55 |     "minute": "_minute",
56 |     "second": "_second",
57 | }
58 | 
59 | FEATURES_FUNCTIONS = {
60 |     "month": lambda x: x.dt.month,
61 |     "quarter": lambda x: x.dt.quarter,
62 |     "semester": lambda x: np.where(x.dt.month <= 6, 1, 2).astype(np.int64),
63 |     "year": lambda x: x.dt.year,
64 |     "week": lambda x: x.dt.isocalendar().week.astype(np.int64),
65 |     "day_of_week": lambda x: x.dt.dayofweek,
66 |     "day_of_month": lambda x: x.dt.day,
67 |     "day_of_year": lambda x: x.dt.dayofyear,
68 |     "weekend": lambda x: np.where(x.dt.dayofweek <= 4, 0, 1).astype(np.int64),
69 |     "month_start": lambda x: x.dt.is_month_start.astype(np.int64),
70 |     "month_end": lambda x: x.dt.is_month_end.astype(np.int64),
71 |     "quarter_start": lambda x: x.dt.is_quarter_start.astype(np.int64),
72 |     "quarter_end": lambda x: x.dt.is_quarter_end.astype(np.int64),
73 |     "year_start": lambda x: x.dt.is_year_start.astype(np.int64),
74 |     "year_end": lambda x: x.dt.is_year_end.astype(np.int64),
75 |     "leap_year": lambda x: x.dt.is_leap_year.astype(np.int64),
76 |     "days_in_month": lambda x: x.dt.days_in_month.astype(np.int64),
77 |     "hour": lambda x: x.dt.hour,
78 |     "minute": lambda x: x.dt.minute,
79 |     "second": lambda x: x.dt.second,
80 | }
81 | 


--------------------------------------------------------------------------------
/feature_engine/discretisation/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module discretisation includes classes to sort continuous variables into bins or
 3 | intervals.
 4 | """
 5 | 
 6 | from .arbitrary import ArbitraryDiscretiser
 7 | from .decision_tree import DecisionTreeDiscretiser
 8 | from .equal_frequency import EqualFrequencyDiscretiser
 9 | from .equal_width import EqualWidthDiscretiser
10 | from .geometric_width import GeometricWidthDiscretiser
11 | 
12 | __all__ = [
13 |     "DecisionTreeDiscretiser",
14 |     "EqualFrequencyDiscretiser",
15 |     "EqualWidthDiscretiser",
16 |     "ArbitraryDiscretiser",
17 |     "GeometricWidthDiscretiser",
18 | ]
19 | 


--------------------------------------------------------------------------------
/feature_engine/encoding/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module encoding includes classes to transform categorical variables into numerical.
 3 | """
 4 | 
 5 | from .count_frequency import CountFrequencyEncoder
 6 | from .decision_tree import DecisionTreeEncoder
 7 | from .mean_encoding import MeanEncoder
 8 | from .one_hot import OneHotEncoder
 9 | from .ordinal import OrdinalEncoder
10 | from .rare_label import RareLabelEncoder
11 | from .similarity_encoder import StringSimilarityEncoder
12 | from .woe import WoEEncoder
13 | 
14 | __all__ = [
15 |     "CountFrequencyEncoder",
16 |     "DecisionTreeEncoder",
17 |     "MeanEncoder",
18 |     "OneHotEncoder",
19 |     "OrdinalEncoder",
20 |     "RareLabelEncoder",
21 |     "StringSimilarityEncoder",
22 |     "WoEEncoder",
23 | ]
24 | 


--------------------------------------------------------------------------------
/feature_engine/encoding/_helper_functions.py:
--------------------------------------------------------------------------------
 1 | def check_parameter_unseen(unseen, accepted_values):
 2 |     if not isinstance(accepted_values, list) or not all(
 3 |         isinstance(item, str) for item in accepted_values
 4 |     ):
 5 |         raise ValueError(
 6 |             "accepted_values should be a list of strings. "
 7 |             f" Got {accepted_values} instead."
 8 |         )
 9 |     if unseen not in accepted_values:
10 |         raise ValueError(
11 |             f"Parameter `unseen` takes only values {', '.join(accepted_values)}."
12 |             f" Got {unseen} instead."
13 |         )
14 | 


--------------------------------------------------------------------------------
/feature_engine/imputation/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module imputation includes classes to perform missing data imputation
 3 | """
 4 | 
 5 | from .arbitrary_number import ArbitraryNumberImputer
 6 | from .categorical import CategoricalImputer
 7 | from .drop_missing_data import DropMissingData
 8 | from .end_tail import EndTailImputer
 9 | from .mean_median import MeanMedianImputer
10 | from .missing_indicator import AddMissingIndicator
11 | from .random_sample import RandomSampleImputer
12 | 
13 | __all__ = [
14 |     "MeanMedianImputer",
15 |     "ArbitraryNumberImputer",
16 |     "CategoricalImputer",
17 |     "EndTailImputer",
18 |     "AddMissingIndicator",
19 |     "RandomSampleImputer",
20 |     "DropMissingData",
21 | ]
22 | 


--------------------------------------------------------------------------------
/feature_engine/outliers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module outliers includes classes to remove or cap outliers.
 3 | """
 4 | 
 5 | from .artbitrary import ArbitraryOutlierCapper
 6 | from .trimmer import OutlierTrimmer
 7 | from .winsorizer import Winsorizer
 8 | 
 9 | __all__ = ["Winsorizer", "ArbitraryOutlierCapper", "OutlierTrimmer"]
10 | 


--------------------------------------------------------------------------------
/feature_engine/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline import Pipeline, make_pipeline
2 | 
3 | __all__ = ["Pipeline", "make_pipeline"]
4 | 


--------------------------------------------------------------------------------
/feature_engine/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module preprocessing includes classes and functions for general data pre-processing
 3 | and transformation.
 4 | """
 5 | 
 6 | from .match_categories import MatchCategories
 7 | from .match_columns import MatchVariables
 8 | 
 9 | __all__ = [
10 |     "MatchCategories",
11 |     "MatchVariables",
12 | ]
13 | 


--------------------------------------------------------------------------------
/feature_engine/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/py.typed


--------------------------------------------------------------------------------
/feature_engine/scaling/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module scaling includes classes to transform variables using various
 3 | scaling methods.
 4 | """
 5 | 
 6 | from .mean_normalization import MeanNormalizationScaler
 7 | 
 8 | __all__ = [
 9 |     "MeanNormalizationScaler",
10 | ]
11 | 


--------------------------------------------------------------------------------
/feature_engine/selection/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module selection includes classes to select features or remove unwanted features.
 3 | """
 4 | from .drop_constant_features import DropConstantFeatures
 5 | from .drop_correlated_features import DropCorrelatedFeatures
 6 | from .drop_duplicate_features import DropDuplicateFeatures
 7 | from .drop_features import DropFeatures
 8 | from .drop_psi_features import DropHighPSIFeatures
 9 | from .information_value import SelectByInformationValue
10 | from .probe_feature_selection import ProbeFeatureSelection
11 | from .recursive_feature_addition import RecursiveFeatureAddition
12 | from .recursive_feature_elimination import RecursiveFeatureElimination
13 | from .shuffle_features import SelectByShuffling
14 | from .single_feature_performance import SelectBySingleFeaturePerformance
15 | from .smart_correlation_selection import SmartCorrelatedSelection
16 | from .target_mean_selection import SelectByTargetMeanPerformance
17 | from .mrmr import MRMR
18 | 
19 | __all__ = [
20 |     "DropFeatures",
21 |     "DropConstantFeatures",
22 |     "DropDuplicateFeatures",
23 |     "DropCorrelatedFeatures",
24 |     "DropHighPSIFeatures",
25 |     "SmartCorrelatedSelection",
26 |     "SelectByShuffling",
27 |     "SelectBySingleFeaturePerformance",
28 |     "RecursiveFeatureAddition",
29 |     "RecursiveFeatureElimination",
30 |     "SelectByTargetMeanPerformance",
31 |     "SelectByInformationValue",
32 |     "ProbeFeatureSelection",
33 |     "MRMR",
34 | ]
35 | 


--------------------------------------------------------------------------------
/feature_engine/selection/_selection_constants.py:
--------------------------------------------------------------------------------
 1 | _CLASSIFICATION_METRICS = [
 2 |     "accuracy",
 3 |     "balanced_accuracy",
 4 |     "top_k_accuracy",
 5 |     "average_precision",
 6 |     "neg_brier_score",
 7 |     "f1",
 8 |     "f1_micro",
 9 |     "f1_macro",
10 |     "f1_weighted",
11 |     "f1_samples",
12 |     "neg_log_loss",
13 |     "precision",
14 |     "precision_micro",
15 |     "precision_macro",
16 |     "precision_weighted",
17 |     "precision_samples",
18 |     "recall",
19 |     "recall_micro",
20 |     "recall_macro",
21 |     "recall_weighted",
22 |     "recall_samples",
23 |     "jaccard",
24 |     "jaccard_micro",
25 |     "jaccard_macro",
26 |     "jaccard_weighted",
27 |     "jaccard_samples",
28 |     "roc_auc",
29 |     "roc_auc_ovr",
30 |     "roc_auc_ovo",
31 |     "roc_auc_ovr_weighted",
32 |     "roc_auc_ovo_weighted",
33 | ]
34 | 
35 | _REGRESSION_METRICS = [
36 |     "explained_variance",
37 |     "r2",
38 |     "max_error",
39 |     "neg_median_absolute_error",
40 |     "neg_mean_absolute_error",
41 |     "neg_mean_absolute_percentage_error",
42 |     "neg_mean_squared_error",
43 |     "neg_mean_squared_log_error",
44 |     "neg_root_mean_squared_error",
45 |     "neg_mean_poisson_deviance",
46 |     "neg_mean_gamma_deviance",
47 | ]
48 | 


--------------------------------------------------------------------------------
/feature_engine/tags.py:
--------------------------------------------------------------------------------
 1 | import sklearn
 2 | from sklearn.utils.fixes import parse_version
 3 | 
 4 | sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
 5 | 
 6 | 
 7 | def _return_tags():
 8 |     tags = {
 9 |         "preserves_dtype": [],
10 |         "_xfail_checks": {
11 |             # Complex data in math terms, are values like 4i (imaginary numbers
12 |             # so to speak). I've never seen such a thing in the dfs I've
13 |             # worked with, so I don't think we need this test.
14 |             "check_complex_data": "Test not needed.",
15 |             # check that estimators treat dtype object as numeric if possible
16 |             "check_dtype_object": "Feature-engine transformers use dtypes to select "
17 |             "between numerical and categorical variables. Feature-engine trusts the "
18 |             "user casts the variables appropriately",
19 |             # Test fails because FE does not like the sklearn class _NotAnArray
20 |             # The test aims to check that the check_X_y function from sklearn is
21 |             # working, but we do not use that check, because we work with dfs.
22 |             "check_transformer_data_not_an_array": "Ok to fail",
23 |             "check_sample_weights_not_an_array": "Ok to fail",
24 |             # TODO: we probably need the test below!!
25 |             "check_methods_sample_order_invariance": "Test does not work on dataframes",
26 |             # TODO: we probably need the test below!!
27 |             # the test below tests that a second fit overrides a first fit.
28 |             # the problem is that the test does not work with pandas df.
29 |             "check_fit_idempotent": "Test does not work on dataframes.",
30 |             "check_fit2d_predict1d": "Test not relevant, Feature-engine transformers "
31 |             "only work with dataframes.",
32 |         },
33 |     }
34 | 
35 |     if sklearn_version > parse_version("1.6"):
36 |         msg1 = "against Feature-engines design."
37 |         msg2 = "Our transformers do not preserve dtype."
38 |         all_fail = {
39 |             "check_do_not_raise_errors_in_init_or_set_params": msg1,
40 |             "check_transformer_preserve_dtypes": msg2,
41 |             # TODO: investigate this test further.
42 |             "check_n_features_in_after_fitting": "not sure why it fails, we do check.",
43 |         }
44 |         tags["_xfail_checks"].update(all_fail)  # type: ignore
45 |     return tags
46 | 


--------------------------------------------------------------------------------
/feature_engine/timeseries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/feature_engine/timeseries/__init__.py


--------------------------------------------------------------------------------
/feature_engine/timeseries/forecasting/__init__.py:
--------------------------------------------------------------------------------
1 | """ Transformers that create features for time-series forecasting."""
2 | 
3 | from .expanding_window_features import ExpandingWindowFeatures
4 | from .lag_features import LagFeatures
5 | from .window_features import WindowFeatures
6 | 
7 | __all__ = ["LagFeatures", "WindowFeatures", "ExpandingWindowFeatures"]
8 | 


--------------------------------------------------------------------------------
/feature_engine/transformation/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module transformation includes classes to transform variables using mathematical
 3 | functions.
 4 | """
 5 | 
 6 | from .arcsin import ArcsinTransformer
 7 | from .boxcox import BoxCoxTransformer
 8 | from .log import LogCpTransformer, LogTransformer
 9 | from .power import PowerTransformer
10 | from .reciprocal import ReciprocalTransformer
11 | from .yeojohnson import YeoJohnsonTransformer
12 | 
13 | __all__ = [
14 |     "BoxCoxTransformer",
15 |     "LogTransformer",
16 |     "LogCpTransformer",
17 |     "PowerTransformer",
18 |     "ReciprocalTransformer",
19 |     "YeoJohnsonTransformer",
20 |     "ArcsinTransformer",
21 | ]
22 | 


--------------------------------------------------------------------------------
/feature_engine/variable_handling/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The module variable handling includes functions to select variables of a certain type
 3 | or check that a list of variables is in certain type.
 4 | """
 5 | 
 6 | from .check_variables import (
 7 |     check_all_variables,
 8 |     check_categorical_variables,
 9 |     check_datetime_variables,
10 |     check_numerical_variables,
11 | )
12 | from .find_variables import (
13 |     find_all_variables,
14 |     find_categorical_and_numerical_variables,
15 |     find_categorical_variables,
16 |     find_datetime_variables,
17 |     find_numerical_variables,
18 | )
19 | from .retain_variables import retain_variables_if_in_df
20 | 
21 | __all__ = [
22 |     "check_all_variables",
23 |     "check_numerical_variables",
24 |     "check_categorical_variables",
25 |     "check_datetime_variables",
26 |     "find_all_variables",
27 |     "find_numerical_variables",
28 |     "find_categorical_variables",
29 |     "find_datetime_variables",
30 |     "find_categorical_and_numerical_variables",
31 |     "retain_variables_if_in_df",
32 | ]
33 | 


--------------------------------------------------------------------------------
/feature_engine/variable_handling/_variable_type_checks.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import pandas as pd
 4 | from pandas.core.dtypes.common import is_datetime64_any_dtype as is_datetime
 5 | from pandas.core.dtypes.common import is_numeric_dtype as is_numeric
 6 | from pandas.core.dtypes.common import is_object_dtype as is_object
 7 | 
 8 | 
 9 | def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
10 |     # check for datetime only if object cannot be cast as numeric because
11 |     # if it could pd.to_datetime would convert it to datetime regardless
12 |     if is_object(column):
13 |         is_cat = _is_convertible_to_num(column) or not _is_convertible_to_dt(column)
14 | 
15 |     # check for datetime only if the type of the categories is not numeric
16 |     # because pd.to_datetime throws an error when it is an integer
17 |     elif isinstance(column.dtype, pd.CategoricalDtype):
18 |         is_cat = _is_categories_num(column) or not _is_convertible_to_dt(column)
19 | 
20 |     return is_cat
21 | 
22 | 
23 | def _is_categories_num(column: pd.Series) -> bool:
24 |     return is_numeric(column.dtype.categories)
25 | 
26 | 
27 | def _is_convertible_to_dt(column: pd.Series) -> bool:
28 |     with warnings.catch_warnings():
29 |         warnings.simplefilter("ignore")
30 |         return is_datetime(pd.to_datetime(column, errors="ignore", utc=True))
31 | 
32 | 
33 | def _is_convertible_to_num(column: pd.Series) -> bool:
34 |     try:
35 |         ser = pd.to_numeric(column)
36 |     except (ValueError, TypeError):
37 |         ser = column
38 |     return is_numeric(ser)
39 | 
40 | 
41 | def _is_categorical_and_is_datetime(column: pd.Series) -> bool:
42 |     # check for datetime only if object cannot be cast as numeric because
43 |     # if it could pd.to_datetime would convert it to datetime regardless
44 |     if is_object(column):
45 |         is_dt = not _is_convertible_to_num(column) and _is_convertible_to_dt(column)
46 | 
47 |     # check for datetime only if the type of the categories is not numeric
48 |     # because pd.to_datetime throws an error when it is an integer
49 |     elif isinstance(column.dtype, pd.CategoricalDtype):
50 |         is_dt = not _is_categories_num(column) and _is_convertible_to_dt(column)
51 | 
52 |     return is_dt
53 | 


--------------------------------------------------------------------------------
/feature_engine/variable_handling/dtypes.py:
--------------------------------------------------------------------------------
1 | DATETIME_TYPES = ("datetimetz", "datetime")
2 | 


--------------------------------------------------------------------------------
/feature_engine/variable_handling/retain_variables.py:
--------------------------------------------------------------------------------
 1 | """Functions to remove variables from a list."""
 2 | 
 3 | from typing import List, Union
 4 | 
 5 | Variables = Union[int, str, List[Union[str, int]]]
 6 | 
 7 | 
 8 | def retain_variables_if_in_df(X, variables):
 9 |     """Returns the subset of variables in the list that are present in the dataframe.
10 | 
11 |     More details in the :ref:`User Guide <retain_vars>`.
12 | 
13 |     Parameters
14 |     ----------
15 |     X:  pandas dataframe of shape = [n_samples, n_features]
16 |         The dataset.
17 | 
18 |     variables: string, int or list of strings or int.
19 |         The names of the variables to check.
20 | 
21 |     Returns
22 |     -------
23 |     variables_in_df: List.
24 |         The subset of `variables` that is present `X`.
25 | 
26 |         Examples
27 |     --------
28 |     >>> import pandas as pd
29 |     >>> from feature_engine.variable_handling import retain_variables_if_in_df
30 |     >>> X = pd.DataFrame({
31 |     >>>     "var_num": [1, 2, 3],
32 |     >>>     "var_cat": ["A", "B", "C"],
33 |     >>>     "var_date": pd.date_range("2020-02-24", periods=3, freq="T")
34 |     >>> })
35 |     >>> vars_in_df = retain_variables_if_in_df(X, ['var_num', 'var_cat', 'var_other'])
36 |     >>> vars_in_df
37 |     ['var_num', 'var_cat']
38 |     """
39 |     if isinstance(variables, (str, int)):
40 |         variables = [variables]
41 | 
42 |     variables_in_df = [var for var in variables if var in X.columns]
43 | 
44 |     # Raise an error if no column is left to work with.
45 |     if len(variables_in_df) == 0:
46 |         raise ValueError(
47 |             "None of the variables in the list are present in the dataframe."
48 |         )
49 | 
50 |     return variables_in_df
51 | 


--------------------------------------------------------------------------------
/feature_engine/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The module wrappers includes classes to wrap Scikit-learn transformers so that they
3 | can be applied to a selected subset of features and return a dataframe.
4 | """
5 | 
6 | from .wrappers import SklearnTransformerWrapper
7 | 
8 | __all__ = ["SklearnTransformerWrapper"]
9 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | warn_unused_ignores = True
 3 | follow_imports = skip
 4 | show_error_context = True
 5 | warn_incomplete_stub = True
 6 | ignore_missing_imports = True
 7 | check_untyped_defs = True
 8 | cache_dir = /dev/null
 9 | warn_redundant_casts = True
10 | warn_unused_configs = True
11 | strict_optional = True
12 | 
13 | exclude = (?x)(
14 |     mixins\.py$  # or files ending with "two.pyi"
15 |   )


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | # pytest.ini
2 | 
3 | [pytest]
4 | filterwarnings =
5 |     ignore::sklearn.exceptions.SkipTestWarning
6 |     ignore::UserWarning
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.18.2
2 | pandas>=2.2.0
3 | scikit-learn>=1.4.0
4 | scipy>=1.4.1
5 | statsmodels>=0.11.1
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | # Package meta-data.
 6 | NAME = "feature_engine"
 7 | DESCRIPTION = "Feature engineering and selection package with Scikit-learn's fit transform functionality"
 8 | URL = "http://github.com/feature-engine/feature_engine"
 9 | EMAIL = "solegalli@protonmail.com"
10 | AUTHOR = "Soledad Galli"
11 | REQUIRES_PYTHON = ">=3.9.0"
12 | 
13 | # description
14 | with open("README.md", "r") as fh:
15 |     long_description = fh.read()
16 | 
17 | 
18 | # Packages required for this module to be executed
19 | def list_reqs(fname='requirements.txt'):
20 |     with open(fname) as fd:
21 |         return fd.read().splitlines()
22 | 
23 | 
24 | # Load the package's VERSION file as a dictionary.
25 | about = {}
26 | ROOT_DIR = Path(__file__).resolve().parent
27 | PACKAGE_DIR = ROOT_DIR / 'feature_engine'
28 | with open(PACKAGE_DIR / "VERSION") as f:
29 |     _version = f.read().strip()
30 |     about["__version__"] = _version
31 | 
32 | setup(name=NAME,
33 |       version=about["__version__"],
34 |       description=DESCRIPTION,
35 |       long_description=long_description,
36 |       long_description_content_type="text/markdown",
37 |       url=URL,
38 |       author=AUTHOR,
39 |       author_email=EMAIL,
40 |       python_requires=REQUIRES_PYTHON,
41 |       packages=find_packages(exclude=("tests",)),
42 |       package_data={"feature_engine": ["VERSION", "py.typed"]},
43 |       license='BSD 3 clause',
44 |       install_requires=list_reqs(),
45 |       include_package_data=True,
46 |       classifiers=[
47 |           # Trove classifiers
48 |           # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
49 |           "License :: OSI Approved :: BSD License",
50 |           "Programming Language :: Python :: 3.9",
51 |           "Programming Language :: Python :: 3.10",
52 |           "Programming Language :: Python :: 3.11",
53 |           "Programming Language :: Python :: 3.12",
54 |       ],
55 |       zip_safe=False)
56 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
 1 | -r requirements.txt
 2 | pytest>=5.4.1
 3 | 
 4 | # repo maintenance tooling
 5 | black>=21.5b1
 6 | coverage>=6.4.4
 7 | flake8>=3.9.2
 8 | isort>=5.8.0
 9 | mypy>=0.740
10 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/__init__.py


--------------------------------------------------------------------------------
/tests/estimator_checks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/estimator_checks/__init__.py


--------------------------------------------------------------------------------
/tests/estimator_checks/dataframe_for_checks.py:
--------------------------------------------------------------------------------
 1 | """Dataframe used as input by many estimator checks."""
 2 | 
 3 | from typing import Tuple
 4 | 
 5 | import pandas as pd
 6 | from sklearn.datasets import make_classification
 7 | 
 8 | 
 9 | def test_df(
10 |     categorical: bool = False, datetime: bool = False
11 | ) -> Tuple[pd.DataFrame, pd.Series]:
12 |     """
13 |     Creates a dataframe that contains only numerical features, or additionally,
14 |     categorical and datetime features.
15 | 
16 |     Parameters
17 |     ----------
18 |     categorical: bool, default=False
19 |         Whether to add 2 additional categorical features.
20 | 
21 |     datetime: bool, default=False
22 |         Whether to add one additional datetime feature.
23 | 
24 |     Returns
25 |     -------
26 |     X: pd.DataFrame
27 |         A pandas dataframe.
28 |     """
29 |     X, y = make_classification(
30 |         n_samples=1000,
31 |         n_features=12,
32 |         n_redundant=4,
33 |         n_clusters_per_class=1,
34 |         weights=[0.50],
35 |         class_sep=2,
36 |         random_state=1,
37 |     )
38 | 
39 |     # transform arrays into pandas df and series
40 |     colnames = [f"var_{i}" for i in range(12)]
41 |     X = pd.DataFrame(X, columns=colnames)
42 |     y = pd.Series(y)
43 | 
44 |     if categorical is True:
45 |         X["cat_var1"] = ["A"] * 1000
46 |         X["cat_var2"] = ["B"] * 1000
47 | 
48 |     if datetime is True:
49 |         X["date1"] = pd.date_range("2020-02-24", periods=1000, freq="min")
50 |         X["date2"] = pd.date_range("2021-09-29", periods=1000, freq="h")
51 | 
52 |     return X, y
53 | 


--------------------------------------------------------------------------------
/tests/estimator_checks/fit_functionality_checks.py:
--------------------------------------------------------------------------------
 1 | """Checks functionality in the fit method shared by all transformers."""
 2 | 
 3 | import pytest
 4 | from sklearn import clone
 5 | 
 6 | from tests.estimator_checks.dataframe_for_checks import test_df
 7 | 
 8 | 
 9 | def check_feature_names_in(estimator):
10 |     """Checks that transformers learn the variable names of the train set used
11 |     during fit, as well as the number of variables.
12 | 
13 |     Should be applied to all transformers.
14 |     """
15 |     # the estimator learns the parameters from the train set
16 |     X, y = test_df(categorical=True, datetime=True)
17 |     varnames = list(X.columns)
18 |     estimator = clone(estimator)
19 |     estimator.fit(X, y)
20 |     assert estimator.feature_names_in_ == varnames
21 |     assert estimator.n_features_in_ == len(varnames)
22 | 
23 | 
24 | def check_error_if_y_not_passed(estimator):
25 |     """
26 |     Checks that transformer raises error when y is not passed during fit. Functionality
27 |     is provided by Python, when making a parameter mandatory.
28 | 
29 |     For this test to run, we need to add the tag 'requires_y' to the transformer.
30 |     """
31 |     X, y = test_df()
32 |     estimator = clone(estimator)
33 |     with pytest.raises(TypeError):
34 |         estimator.fit(X)
35 | 


--------------------------------------------------------------------------------
/tests/estimator_checks/init_params_allowed_values_checks.py:
--------------------------------------------------------------------------------
 1 | """Many transformers have similar init parameters which take the same input values.
 2 | In this script, we add tests for the allowed values for those parameters.
 3 | """
 4 | import pytest
 5 | from sklearn import clone
 6 | 
 7 | 
 8 | def check_error_param_missing_values(estimator):
 9 |     """
10 |     Only for transformers with a parameter `missing_values`in init.
11 | 
12 |     Checks transformer raises error when user enters non-permitted value to the
13 |     parameter.
14 |     """
15 |     # param takes values "raise" or "ignore"
16 |     estimator = clone(estimator)
17 |     for value in [2, "hola", False]:
18 |         if estimator.__class__.__name__ == "MathFeatures":
19 |             with pytest.raises(ValueError):
20 |                 estimator.__class__(
21 |                     variables=["var_1", "var_2", "var_3"],
22 |                     func="mean",
23 |                     missing_values=value,
24 |                 )
25 | 
26 |         elif estimator.__class__.__name__ == "RelativeFeatures":
27 |             with pytest.raises(ValueError):
28 |                 estimator.__class__(
29 |                     variables=["var_1", "var_2", "var_3"],
30 |                     reference=["var_4"],
31 |                     func="mean",
32 |                     missing_values=value,
33 |                 )
34 |         else:
35 |             with pytest.raises(ValueError):
36 |                 estimator.__class__(missing_values=value)
37 | 
38 | 
39 | def check_error_param_confirm_variables(estimator):
40 |     """
41 |     Only for transformers with a parameter `confirm_variables`in init.
42 | 
43 |     Checks transformer raises error when user enters non-permitted value to the
44 |     parameter.
45 |     """
46 |     # param takes values True or False
47 |     estimator = clone(estimator)
48 |     for value in [2, "hola", [True]]:
49 |         msg = (
50 |             f"confirm_variables takes only values True and False. Got {value} instead."
51 |         )
52 |         with pytest.raises(ValueError) as record:
53 |             estimator.__class__(confirm_variables=value)
54 |         assert record.value.args[0] == msg
55 | 


--------------------------------------------------------------------------------
/tests/estimator_checks/non_fitted_error_checks.py:
--------------------------------------------------------------------------------
 1 | """Checks functionality in the transform method shared by all transformers."""
 2 | 
 3 | import pytest
 4 | from sklearn import clone
 5 | from sklearn.exceptions import NotFittedError
 6 | 
 7 | from tests.estimator_checks.dataframe_for_checks import test_df
 8 | 
 9 | 
10 | def check_raises_non_fitted_error(estimator):
11 |     """
12 |     Check if transformer raises error when transform() method is called before
13 |     calling fit() method.
14 | 
15 |     The functionality is provided by sklearn's `check_is_fitted` function.
16 |     """
17 |     X, y = test_df()
18 |     transformer = clone(estimator)
19 |     # Test when fit is not called prior to transform.
20 |     with pytest.raises(NotFittedError):
21 |         transformer.transform(X)
22 | 


--------------------------------------------------------------------------------
/tests/parametrize_with_checks_creation_v16.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File intended to help understand check_estimator tests for the module creation of
 3 | Feature-engine. It is not run as part of the battery of acceptance tests. Works from
 4 | sklearn > 1.6.
 5 | """
 6 | 
 7 | from sklearn.utils.estimator_checks import parametrize_with_checks
 8 | 
 9 | from feature_engine.creation import (
10 |     CyclicalFeatures,
11 |     DecisionTreeFeatures,
12 |     MathFeatures,
13 |     RelativeFeatures,
14 | )
15 | 
16 | dtf = DecisionTreeFeatures(regression=False)
17 | cf = CyclicalFeatures()
18 | mf = MathFeatures(variables=["x0", "x1"], func="mean", missing_values="ignore")
19 | rf = RelativeFeatures(
20 |     variables=["x0", "x1"],
21 |     reference=["x0"],
22 |     func=["add"],
23 |     missing_values="ignore",
24 | )
25 | 
26 | EXPECTED_FAILED_CHECKS = {
27 |     "DecisionTreeFeatures": dtf._more_tags()["_xfail_checks"],
28 |     "CyclicalFeatures": cf._more_tags()["_xfail_checks"],
29 |     "MathFeatures": mf._more_tags()["_xfail_checks"],
30 |     "RelativeFeatures": rf._more_tags()["_xfail_checks"],
31 | }
32 | 
33 | 
34 | # creation
35 | @parametrize_with_checks(
36 |     estimators=[dtf, cf, mf, rf],
37 |     expected_failed_checks=lambda est: EXPECTED_FAILED_CHECKS.get(
38 |         est.__class__.__name__, {}
39 |     ),
40 | )
41 | def test_sklearn_compatible_creator(estimator, check):
42 |     check(estimator)
43 | 


--------------------------------------------------------------------------------
/tests/parametrize_with_checks_discretization_v16.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File intended to help understand check_estimator tests for Feature-engine's
 3 | discretization module. It is not run as part of the battery of acceptance tests.
 4 | Works from sklearn > 1.6.
 5 | """
 6 | 
 7 | import numpy as np
 8 | from sklearn.utils.estimator_checks import parametrize_with_checks
 9 | 
10 | from feature_engine.discretisation import (
11 |     ArbitraryDiscretiser,
12 |     DecisionTreeDiscretiser,
13 |     EqualFrequencyDiscretiser,
14 |     EqualWidthDiscretiser,
15 |     GeometricWidthDiscretiser,
16 | )
17 | 
18 | dtd = DecisionTreeDiscretiser(regression=False)
19 | efd = EqualFrequencyDiscretiser()
20 | ewd = EqualWidthDiscretiser()
21 | ad = ArbitraryDiscretiser(binning_dict={"x0": [-np.inf, 0, np.inf]})
22 | gd = GeometricWidthDiscretiser()
23 | 
24 | EXPECTED_FAILED_CHECKS = {
25 |     "DecisionTreeDiscretiser": dtd._more_tags()["_xfail_checks"],
26 |     "EqualFrequencyDiscretiser": efd._more_tags()["_xfail_checks"],
27 |     "EqualWidthDiscretiser": ewd._more_tags()["_xfail_checks"],
28 |     "ArbitraryDiscretiser": ad._more_tags()["_xfail_checks"],
29 |     "GeometricWidthDiscretiser": gd._more_tags()["_xfail_checks"],
30 | }
31 | 
32 | 
33 | # discretization
34 | @parametrize_with_checks(
35 |     estimators=[dtd, efd, ewd, ad, gd],
36 |     expected_failed_checks=lambda est: EXPECTED_FAILED_CHECKS.get(
37 |         est.__class__.__name__, {}
38 |     ),
39 | )
40 | def test_sklearn_compatible_creator(estimator, check):
41 |     check(estimator)
42 | 


--------------------------------------------------------------------------------
/tests/parametrize_with_checks_encoders_v16.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File intended to help understand check_estimator tests for Feature-engine's
 3 | encoding module. It is not run as part of the battery of acceptance tests.
 4 | Works from sklearn > 1.6.
 5 | """
 6 | 
 7 | from sklearn.utils.estimator_checks import parametrize_with_checks
 8 | 
 9 | from feature_engine.encoding import (
10 |     CountFrequencyEncoder,
11 |     MeanEncoder,
12 |     OneHotEncoder,
13 |     OrdinalEncoder,
14 |     RareLabelEncoder,
15 |     StringSimilarityEncoder,
16 |     WoEEncoder,
17 | )
18 | from feature_engine.tags import _return_tags
19 | 
20 | ce = CountFrequencyEncoder(ignore_format=True)
21 | me = MeanEncoder(ignore_format=True)
22 | ohe = OneHotEncoder(ignore_format=True)
23 | oe = OrdinalEncoder(ignore_format=True)
24 | re = RareLabelEncoder(
25 |     tol=0.00000000001,
26 |     n_categories=100000000000,
27 |     replace_with=10,
28 |     ignore_format=True,
29 | )
30 | woe = WoEEncoder(ignore_format=True)
31 | sse = StringSimilarityEncoder(ignore_format=True)
32 | 
33 | FAILED_CHECKS = _return_tags()["_xfail_checks"]
34 | FAILED_CHECKS.update({"check_estimators_nan_inf": "transformer allows NA"})
35 | 
36 | EXPECTED_FAILED_CHECKS = {
37 |     "CountFrequencyEncoder": FAILED_CHECKS,
38 |     "MeanEncoder": FAILED_CHECKS,
39 |     "OneHotEncoder": FAILED_CHECKS,
40 |     "OrdinalEncoder": FAILED_CHECKS,
41 |     "RareLabelEncoder": FAILED_CHECKS,
42 |     "StringSimilarityEncoder": FAILED_CHECKS,
43 | }
44 | 
45 | 
46 | # encoding
47 | @parametrize_with_checks(
48 |     estimators=[ce, me, ohe, oe, re, woe, sse],
49 |     expected_failed_checks=lambda est: EXPECTED_FAILED_CHECKS.get(
50 |         est.__class__.__name__, {}
51 |     ),
52 | )
53 | def test_sklearn_compatible_creator(estimator, check):
54 |     check(estimator)
55 | 


--------------------------------------------------------------------------------
/tests/parametrize_with_checks_outliers_v16.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File intended to help understand check_estimator tests for Feature-engine's
 3 | outliers module. It is not run as part of the battery of acceptance tests.
 4 | Works from sklearn > 1.6.
 5 | """
 6 | 
 7 | from sklearn.utils.estimator_checks import parametrize_with_checks
 8 | 
 9 | from feature_engine.outliers import ArbitraryOutlierCapper, OutlierTrimmer, Winsorizer
10 | from feature_engine.tags import _return_tags
11 | 
12 | aoc = ArbitraryOutlierCapper(max_capping_dict={"x0": 10})
13 | ot = OutlierTrimmer()
14 | wz = Winsorizer()
15 | 
16 | FAILED_CHECKS = _return_tags()["_xfail_checks"]
17 | FAILED_CHECKS_AOC = _return_tags()["_xfail_checks"]
18 | 
19 | msg1 = "transformers raise errors when data variation is low, " "thus this check fails"
20 | 
21 | msg2 = "transformer has 1 mandatory parameter"
22 | 
23 | FAILED_CHECKS.update({"check_fit2d_1sample": msg1})
24 | FAILED_CHECKS_AOC.update(
25 |     {
26 |         "check_fit2d_1sample": msg1,
27 |         "check_parameters_default_constructible": msg2,
28 |     }
29 | )
30 | 
31 | EXPECTED_FAILED_CHECKS = {
32 |     "ArbitraryOutlierCapper": FAILED_CHECKS_AOC,
33 |     "OutlierTrimmer": FAILED_CHECKS,
34 |     "Winsorizer": FAILED_CHECKS,
35 | }
36 | 
37 | 
38 | # encoding
39 | @parametrize_with_checks(
40 |     estimators=[aoc, ot, wz],
41 |     expected_failed_checks=lambda est: EXPECTED_FAILED_CHECKS.get(
42 |         est.__class__.__name__, {}
43 |     ),
44 | )
45 | def test_sklearn_compatible_creator(estimator, check):
46 |     check(estimator)
47 | 


--------------------------------------------------------------------------------
/tests/parametrize_with_checks_prediction_v16.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File intended to help understand check_estimator tests for Feature-engine's
 3 | prediction module. It is not run as part of the battery of acceptance tests.
 4 | Works from sklearn > 1.6.
 5 | """
 6 | 
 7 | from sklearn.utils.estimator_checks import parametrize_with_checks
 8 | 
 9 | from feature_engine._prediction.base_predictor import BaseTargetMeanEstimator
10 | from feature_engine._prediction.target_mean_classifier import TargetMeanClassifier
11 | from feature_engine._prediction.target_mean_regressor import TargetMeanRegressor
12 | from feature_engine.tags import _return_tags
13 | 
14 | _estimators = [BaseTargetMeanEstimator(), TargetMeanClassifier(), TargetMeanRegressor()]
15 | 
16 | FAILED_CHECKS = _return_tags()["_xfail_checks"]
17 | 
18 | EXPECTED_FAILED_CHECKS = {
19 |     "BaseTargetMeanEstimator": FAILED_CHECKS,
20 |     "TargetMeanClassifier": FAILED_CHECKS,
21 |     "TargetMeanRegressor": FAILED_CHECKS,
22 | }
23 | 
24 | 
25 | @parametrize_with_checks(
26 |     estimators=_estimators,
27 |     expected_failed_checks=lambda est: EXPECTED_FAILED_CHECKS.get(
28 |         est.__class__.__name__, {}
29 |     ),
30 | )
31 | def test_sklearn_compatible_creator(estimator, check):
32 |     check(estimator)
33 | 


--------------------------------------------------------------------------------
/tests/test_base_transformers/test_base_numerical_transformer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from numpy import inf
 3 | from pandas.testing import assert_frame_equal
 4 | 
 5 | from feature_engine._base_transformers.base_numerical import BaseNumericalTransformer
 6 | from tests.estimator_checks.non_fitted_error_checks import check_raises_non_fitted_error
 7 | 
 8 | 
 9 | class MockClass(BaseNumericalTransformer):
10 |     def __init__(self):
11 |         self.variables = None
12 | 
13 |     def transform(self, X):
14 |         return self._check_transform_input_and_state(X)
15 | 
16 | 
17 | def test_fit_method(df_vartypes, df_na):
18 |     transformer = MockClass()
19 |     res = transformer.fit(df_vartypes)
20 |     assert transformer.feature_names_in_ == list(df_vartypes.columns)
21 |     assert transformer.n_features_in_ == len(df_vartypes.columns)
22 |     assert_frame_equal(res, df_vartypes)
23 | 
24 |     with pytest.raises(ValueError):
25 |         transformer.fit(df_na)
26 | 
27 |     df_na = df_na.fillna(inf)
28 |     with pytest.raises(ValueError):
29 |         assert transformer.fit(df_na)
30 | 
31 | 
32 | def test_transform_method(df_vartypes, df_na):
33 |     transformer = MockClass()
34 |     transformer.fit(df_vartypes)
35 |     assert_frame_equal(
36 |         transformer._check_transform_input_and_state(df_vartypes), df_vartypes
37 |     )
38 |     assert_frame_equal(
39 |         transformer._check_transform_input_and_state(
40 |             df_vartypes[["City", "Age", "Name", "Marks", "dob"]]
41 |         ),
42 |         df_vartypes,
43 |     )
44 | 
45 |     with pytest.raises(ValueError):
46 |         transformer.fit(df_na)
47 | 
48 |     df_na = df_na.fillna(inf)
49 |     with pytest.raises(ValueError):
50 |         assert transformer.fit(df_na)
51 | 
52 |     with pytest.raises(ValueError):
53 |         assert transformer._check_transform_input_and_state(
54 |             df_vartypes[["Age", "Marks"]]
55 |         )
56 | 
57 | 
58 | def test_raises_non_fitted_error():
59 |     check_raises_non_fitted_error(MockClass())
60 | 


--------------------------------------------------------------------------------
/tests/test_base_transformers/test_transform_xy_mixin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from feature_engine._base_transformers.mixins import TransformXyMixin
 5 | 
 6 | 
 7 | class MockTransformer(TransformXyMixin):
 8 |     def transform(self, X):
 9 |         return X.iloc[1:-1].copy()
10 | 
11 | 
12 | def test_transform_x_y_method(df_vartypes):
13 |     # single target
14 |     y = pd.Series(0, index=np.arange(len(df_vartypes)))
15 |     transformer = MockTransformer()
16 |     Xt, yt = transformer.transform_x_y(df_vartypes, y)
17 | 
18 |     assert len(Xt) == len(yt)
19 |     assert len(Xt) != len(df_vartypes)
20 |     assert len(yt) != len(y)
21 |     assert (Xt.index == yt.index).all()
22 |     assert (Xt.index == [1, 2]).all()
23 | 
24 |     # multioutput target
25 |     y = (
26 |         pd.DataFrame(columns=["vara", "varb"], index=df_vartypes.index)
27 |         .astype(float)
28 |         .fillna(0)
29 |     )
30 |     Xt, yt = transformer.transform_x_y(df_vartypes, y)
31 | 
32 |     assert len(Xt) == len(yt)
33 |     assert len(Xt) != len(df_vartypes)
34 |     assert len(yt) != len(y)
35 |     assert (Xt.index == yt.index).all()
36 |     assert (Xt.index == [1, 2]).all()
37 | 


--------------------------------------------------------------------------------
/tests/test_check_init_parameters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_check_init_parameters/__init__.py


--------------------------------------------------------------------------------
/tests/test_check_init_parameters/test_check_init_input_params.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from feature_engine._check_init_parameters.check_init_input_params import (
 4 |     _check_param_drop_original,
 5 |     _check_param_missing_values,
 6 | )
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("missing_vals", [None, ["Hola"], True, "Hola"])
10 | def test_check_param_missing_values(missing_vals):
11 |     with pytest.raises(ValueError):
12 |         _check_param_missing_values(missing_vals)
13 | 
14 | 
15 | @pytest.mark.parametrize("drop_orig", [None, ["Hola"], 10, "Hola"])
16 | def test_check_param_drop_original(drop_orig):
17 |     with pytest.raises(ValueError):
18 |         _check_param_drop_original(drop_orig)
19 | 


--------------------------------------------------------------------------------
/tests/test_check_init_parameters/test_check_input_dictionary.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from feature_engine._check_init_parameters.check_input_dictionary import (
 4 |     _check_numerical_dict,
 5 | )
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("input_dict", [{"a": 1, "b": "c"}, {1: 1, 2: "c"}])
 9 | def test_raises_error_when_item_in_dict_not_numerical(input_dict):
10 |     with pytest.raises(ValueError):
11 |         _check_numerical_dict(input_dict)
12 | 
13 | 
14 | @pytest.mark.parametrize("input_dict", [[1, 2, 3], (1, 2, 3), "hola", 5])
15 | def test_raises_error_when_input_not_dictionary_or_none(input_dict):
16 |     with pytest.raises(TypeError):
17 |         _check_numerical_dict(input_dict)
18 | 


--------------------------------------------------------------------------------
/tests/test_check_init_parameters/test_check_variables.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from feature_engine._check_init_parameters.check_variables import (
 4 |     _check_variables_input_value,
 5 | )
 6 | 
 7 | 
 8 | @pytest.mark.parametrize("_input_vars", [("var1", "var2"), {"var1": 1, "var2": 2}])
 9 | def test_raises_errors_when_not_list_str_or_int(_input_vars):
10 |     with pytest.raises(ValueError) as record:
11 |         assert _check_variables_input_value(_input_vars)
12 |     msg = (
13 |         "`variables` should contain a string, an integer or a list of strings or "
14 |         f"integers. Got {_input_vars} instead."
15 |     )
16 |     assert str(record.value) == msg
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     "_input_vars", [["var1", "var2", "var2", "var3"], [0, 1, 1, 2]]
21 | )
22 | def test_raises_error_when_duplicated_var_names(_input_vars):
23 |     with pytest.raises(ValueError) as record:
24 |         assert _check_variables_input_value(_input_vars)
25 |     msg = "The list entered in `variables` contains duplicated variable names."
26 |     assert str(record.value) == msg
27 | 
28 | 
29 | def test_raises_error_when_empty_list():
30 |     with pytest.raises(ValueError) as record:
31 |         assert _check_variables_input_value([])
32 |     msg = "The list of `variables` is empty."
33 |     assert str(record.value) == msg
34 | 
35 | 
36 | @pytest.mark.parametrize(
37 |     "_input_vars",
38 |     [["var1", "var2", "var3"], [0, 1, 2, 3], "var1", ["var1"], 0, [0]],
39 | )
40 | def test_return_variables(_input_vars):
41 |     assert _check_variables_input_value(_input_vars) == _input_vars
42 | 
43 | 
44 | def test_return_when_variables_is_none():
45 |     assert _check_variables_input_value(None) is None
46 | 


--------------------------------------------------------------------------------
/tests/test_creation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_creation/__init__.py


--------------------------------------------------------------------------------
/tests/test_creation/test_check_estimator_creation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | import sklearn
 4 | from sklearn.pipeline import Pipeline
 5 | from sklearn.utils.estimator_checks import check_estimator
 6 | from sklearn.utils.fixes import parse_version
 7 | 
 8 | from feature_engine.creation import (
 9 |     CyclicalFeatures,
10 |     DecisionTreeFeatures,
11 |     MathFeatures,
12 |     RelativeFeatures,
13 | )
14 | from tests.estimator_checks.estimator_checks import check_feature_engine_estimator
15 | 
16 | sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
17 | 
18 | _estimators = [
19 |     MathFeatures(variables=["x0", "x1"], func="mean", missing_values="ignore"),
20 |     RelativeFeatures(
21 |         variables=["x0", "x1"], reference=["x0"], func=["add"], missing_values="ignore"
22 |     ),
23 |     CyclicalFeatures(),
24 |     DecisionTreeFeatures(regression=False),
25 | ]
26 | 
27 | if sklearn_version > parse_version("1.6"):
28 | 
29 |     @pytest.mark.parametrize("estimator", _estimators)
30 |     def test_check_estimator_from_sklearn(estimator):
31 |         return check_estimator(
32 |             estimator=estimator,
33 |             expected_failed_checks=estimator._more_tags()["_xfail_checks"],
34 |         )
35 | 
36 | else:
37 | 
38 |     @pytest.mark.parametrize("estimator", _estimators)
39 |     def test_check_estimator_from_sklearn(estimator):
40 |         return check_estimator(estimator)
41 | 
42 | 
43 | _estimators = [
44 |     MathFeatures(variables=["var_1", "var_2", "var_3"], func="mean"),
45 |     RelativeFeatures(variables=["var_1", "var_2"], reference=["var_3"], func=["add"]),
46 |     CyclicalFeatures(),
47 | ]
48 | 
49 | 
50 | @pytest.mark.parametrize("estimator", _estimators)
51 | def test_check_estimator_from_feature_engine(estimator):
52 |     return check_feature_engine_estimator(estimator)
53 | 
54 | 
55 | _estimators = [
56 |     CyclicalFeatures(),
57 |     MathFeatures(variables=["feature_1", "feature_2"], func=["sum", "mean"]),
58 |     RelativeFeatures(variables=["feature_1"], reference=["feature_2"], func=["div"]),
59 | ]
60 | 
61 | 
62 | @pytest.mark.parametrize("transformer", _estimators)
63 | def test_transformers_in_pipeline_with_set_output_pandas(transformer):
64 |     X = pd.DataFrame({"feature_1": [1, 2, 3, 4, 5], "feature_2": [6, 7, 8, 9, 10]})
65 |     y = pd.Series([0, 1, 0, 1, 0])
66 | 
67 |     pipe = Pipeline([("trs", transformer)]).set_output(transform="pandas")
68 | 
69 |     Xtt = transformer.fit_transform(X)
70 |     Xtp = pipe.fit_transform(X, y)
71 | 
72 |     pd.testing.assert_frame_equal(Xtt, Xtp)
73 | 


--------------------------------------------------------------------------------
/tests/test_datasets/__init__().py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_datasets/__init__().py


--------------------------------------------------------------------------------
/tests/test_datetime/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_datetime/__init__.py


--------------------------------------------------------------------------------
/tests/test_datetime/test_check_estimator_datetime.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from sklearn.pipeline import Pipeline
 4 | 
 5 | from feature_engine.datetime import DatetimeFeatures, DatetimeSubtraction
 6 | from tests.estimator_checks.estimator_checks import check_feature_engine_estimator
 7 | 
 8 | _estimators = [DatetimeFeatures()]
 9 | 
10 | 
11 | @pytest.mark.parametrize("estimator", _estimators)
12 | def test_check_estimator_from_feature_engine(estimator):
13 |     return check_feature_engine_estimator(estimator)
14 | 
15 | 
16 | transformers = [
17 |     DatetimeFeatures(),
18 |     DatetimeSubtraction(variables="feature_1", reference="feature_2"),
19 | ]
20 | 
21 | 
22 | @pytest.mark.parametrize("transformer", transformers)
23 | def test_datetime_transformers(transformer):
24 |     X = pd.DataFrame(
25 |         {
26 |             "feature_1": [
27 |                 "2014-05-05",
28 |                 "2014-05-05",
29 |                 "2014-05-05",
30 |                 "2014-05-05",
31 |                 "2014-05-05",
32 |             ],
33 |             "feature_2": [
34 |                 "2014-05-05",
35 |                 "2014-05-05",
36 |                 "2014-05-05",
37 |                 "2014-05-05",
38 |                 "2014-05-05",
39 |             ],
40 |         },
41 |     )
42 |     y = pd.Series([0, 1, 0, 1, 0])
43 | 
44 |     pipe = Pipeline(
45 |         [
46 |             ("trs", transformer),
47 |         ]
48 |     ).set_output(transform="pandas")
49 | 
50 |     Xtt = transformer.fit_transform(X)
51 |     Xtp = pipe.fit_transform(X, y)
52 | 
53 |     pd.testing.assert_frame_equal(Xtt, Xtp)
54 | 


--------------------------------------------------------------------------------
/tests/test_discretisation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_discretisation/__init__.py


--------------------------------------------------------------------------------
/tests/test_discretisation/test_check_estimator_discretisers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | import sklearn
 5 | from sklearn.pipeline import Pipeline
 6 | from sklearn.utils.estimator_checks import check_estimator
 7 | from sklearn.utils.fixes import parse_version
 8 | 
 9 | from feature_engine.discretisation import (
10 |     ArbitraryDiscretiser,
11 |     DecisionTreeDiscretiser,
12 |     EqualFrequencyDiscretiser,
13 |     EqualWidthDiscretiser,
14 |     GeometricWidthDiscretiser,
15 | )
16 | from tests.estimator_checks.estimator_checks import check_feature_engine_estimator
17 | 
18 | sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
19 | 
20 | 
21 | _estimators = [
22 |     DecisionTreeDiscretiser(regression=False),
23 |     EqualFrequencyDiscretiser(),
24 |     EqualWidthDiscretiser(),
25 |     ArbitraryDiscretiser(binning_dict={"x0": [-np.inf, 0, np.inf]}),
26 |     GeometricWidthDiscretiser(),
27 | ]
28 | 
29 | if sklearn_version < parse_version("1.6"):
30 | 
31 |     @pytest.mark.parametrize("estimator", _estimators)
32 |     def test_check_estimator_from_sklearn(estimator):
33 |         return check_estimator(estimator)
34 | 
35 | else:
36 | 
37 |     @pytest.mark.parametrize("estimator", _estimators)
38 |     def test_check_estimator_from_sklearn(estimator):
39 |         return check_estimator(
40 |             estimator=estimator,
41 |             expected_failed_checks=estimator._more_tags()["_xfail_checks"],
42 |         )
43 | 
44 | 
45 | @pytest.mark.parametrize("estimator", _estimators)
46 | def test_check_estimator_from_feature_engine(estimator):
47 |     if estimator.__class__.__name__ == "ArbitraryDiscretiser":
48 |         estimator.set_params(binning_dict={"var_1": [-np.inf, 0, np.inf]})
49 |     return check_feature_engine_estimator(estimator)
50 | 
51 | 
52 | @pytest.mark.parametrize("transformer", _estimators)
53 | def test_transformers_within_pipeline(transformer):
54 |     if transformer.__class__.__name__ == "ArbitraryDiscretiser":
55 |         transformer.set_params(binning_dict={"feature_1": [-np.inf, 0, np.inf]})
56 | 
57 |     X = pd.DataFrame({"feature_1": [1, 2, 3, 4, 5], "feature_2": [6, 7, 8, 9, 10]})
58 |     y = pd.Series([0, 1, 0, 1, 0])
59 | 
60 |     pipe = Pipeline([("trs", transformer)]).set_output(transform="pandas")
61 | 
62 |     Xtt = transformer.fit_transform(X, y)
63 |     Xtp = pipe.fit_transform(X, y)
64 | 
65 |     pd.testing.assert_frame_equal(Xtt, Xtp)
66 | 


--------------------------------------------------------------------------------
/tests/test_encoding/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_encoding/__init__.py


--------------------------------------------------------------------------------
/tests/test_encoding/test_base_encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_encoding/test_base_encoders/__init__.py


--------------------------------------------------------------------------------
/tests/test_encoding/test_base_encoders/test_categorical_init_mixin.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from feature_engine.encoding.base_encoder import CategoricalInitMixin
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("param", [1, "hola", [1, 2, 0], (True, False)])
 7 | def test_raises_error_when_ignore_format_not_permitted(param):
 8 |     with pytest.raises(ValueError) as record:
 9 |         CategoricalInitMixin(ignore_format=param)
10 |     msg = f"ignore_format takes only booleans True and False. Got {param} instead."
11 |     assert str(record.value) == msg
12 | 
13 | 
14 | @pytest.mark.parametrize("param", [True, False])
15 | def test_ignore_format_value_assignment(param):
16 |     enc = CategoricalInitMixin(ignore_format=param)
17 |     assert enc.ignore_format == param
18 | 


--------------------------------------------------------------------------------
/tests/test_encoding/test_base_encoders/test_categorical_init_mixin_na.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from feature_engine.encoding.base_encoder import CategoricalInitMixinNA
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("param", [1, "hola", [1, 2, 0], (True, False)])
 7 | def test_raises_error_when_ignore_format_not_permitted(param):
 8 |     with pytest.raises(ValueError) as record:
 9 |         CategoricalInitMixinNA(ignore_format=param)
10 |     msg = f"ignore_format takes only booleans True and False. Got {param} instead."
11 |     assert str(record.value) == msg
12 | 
13 | 
14 | @pytest.mark.parametrize("param", [1, "hola", [1, 2, 0], (True, False)])
15 | def test_raises_error_when_missing_values_not_permitted(param):
16 |     with pytest.raises(ValueError) as record:
17 |         CategoricalInitMixinNA(missing_values=param)
18 |     msg = f"missing_values takes only values 'raise' or 'ignore'. Got {param} instead."
19 |     assert str(record.value) == msg
20 | 
21 | 
22 | @pytest.mark.parametrize("param", [(True, "ignore"), (False, "raise")])
23 | def test_correct_param_value_assignment(param):
24 |     format_, na_ = param
25 |     enc = CategoricalInitMixinNA(ignore_format=format_, missing_values=na_)
26 |     assert enc.ignore_format == format_
27 |     assert enc.missing_values == na_
28 | 


--------------------------------------------------------------------------------
/tests/test_encoding/test_helper_functions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from feature_engine.encoding._helper_functions import check_parameter_unseen
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("accepted", ["one", False, [1, 2], ("one", "two"), 1])
 7 | def test_raises_error_when_accepted_values_not_permitted(accepted):
 8 |     with pytest.raises(ValueError) as record:
 9 |         check_parameter_unseen("zero", accepted)
10 |     msg = "accepted_values should be a list of strings. " f" Got {accepted} instead."
11 |     assert str(record.value) == msg
12 | 
13 | 
14 | @pytest.mark.parametrize("accepted", [["one", "two"], ["three", "four"]])
15 | def test_raises_error_when_error_not_in_accepted_values(accepted):
16 |     with pytest.raises(ValueError) as record:
17 |         check_parameter_unseen("zero", accepted)
18 |     msg = (
19 |         f"Parameter `unseen` takes only values {', '.join(accepted)}."
20 |         " Got zero instead."
21 |     )
22 |     assert str(record.value) == msg
23 | 


--------------------------------------------------------------------------------
/tests/test_encoding/test_woe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_encoding/test_woe/__init__.py


--------------------------------------------------------------------------------
/tests/test_encoding/test_woe/test_woe_class.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | 
 5 | from feature_engine.encoding.woe import WoE
 6 | 
 7 | 
 8 | def test_woe_calculation(df_enc):
 9 |     pos_exp = pd.Series({"A": 0.333333, "B": 0.333333, "C": 0.333333})
10 |     neg_exp = pd.Series({"A": 0.285714, "B": 0.571429, "C": 0.142857})
11 | 
12 |     woe_class = WoE()
13 |     pos, neg, woe = woe_class._calculate_woe(df_enc, df_enc["target"], "var_A")
14 | 
15 |     pd.testing.assert_series_equal(pos, pos_exp, check_names=False)
16 |     pd.testing.assert_series_equal(neg, neg_exp, check_names=False)
17 |     pd.testing.assert_series_equal(np.log(pos_exp / neg_exp), woe, check_names=False)
18 | 
19 | 
20 | def test_woe_error():
21 |     df = {
22 |         "var_A": ["B"] * 9 + ["A"] * 6 + ["C"] * 3 + ["D"] * 2,
23 |         "var_B": ["A"] * 10 + ["B"] * 6 + ["C"] * 4,
24 |         "target": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
25 |     }
26 |     df = pd.DataFrame(df)
27 |     woe_class = WoE()
28 | 
29 |     with pytest.raises(ValueError):
30 |         woe_class._calculate_woe(df, df["target"], "var_A")
31 | 
32 | 
33 | @pytest.mark.parametrize("fill_value", [1, 10, 0.1])
34 | def test_fill_value(fill_value):
35 |     df = {
36 |         "var_A": ["A"] * 9 + ["B"] * 6 + ["C"] * 3 + ["D"] * 2,
37 |         "var_B": ["A"] * 10 + ["B"] * 6 + ["C"] * 4,
38 |         "target": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0],
39 |     }
40 |     df = pd.DataFrame(df)
41 | 
42 |     pos_exp = pd.Series(
43 |         {
44 |             "A": 0.2857142857142857,
45 |             "B": 0.2857142857142857,
46 |             "C": 0.42857142857142855,
47 |             "D": fill_value,
48 |         }
49 |     )
50 |     neg_exp = pd.Series(
51 |         {
52 |             "A": 0.5384615384615384,
53 |             "B": 0.3076923076923077,
54 |             "C": fill_value,
55 |             "D": 0.15384615384615385,
56 |         }
57 |     )
58 | 
59 |     woe_class = WoE()
60 |     pos, neg, woe = woe_class._calculate_woe(
61 |         df, df["target"], "var_A", fill_value=fill_value
62 |     )
63 | 
64 |     pd.testing.assert_series_equal(pos, pos_exp, check_names=False)
65 |     pd.testing.assert_series_equal(neg, neg_exp, check_names=False)
66 |     pd.testing.assert_series_equal(np.log(pos_exp / neg_exp), woe, check_names=False)
67 | 


--------------------------------------------------------------------------------
/tests/test_imputation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_imputation/__init__.py


--------------------------------------------------------------------------------
/tests/test_imputation/test_mean_mdian_imputer.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | from feature_engine.imputation import MeanMedianImputer
 5 | 
 6 | 
 7 | def test_mean_imputation_and_automatically_select_variables(df_na):
 8 |     # set up transformer
 9 |     imputer = MeanMedianImputer(imputation_method="mean", variables=None)
10 |     X_transformed = imputer.fit_transform(df_na)
11 | 
12 |     # set up reference result
13 |     X_reference = df_na.copy()
14 |     X_reference["Age"] = X_reference["Age"].fillna(28.714285714285715)
15 |     X_reference["Marks"] = X_reference["Marks"].fillna(0.6833333333333332)
16 | 
17 |     # test init params
18 |     assert imputer.imputation_method == "mean"
19 |     assert imputer.variables is None
20 | 
21 |     # test fit attributes
22 |     assert imputer.variables_ == ["Age", "Marks"]
23 |     imputer.imputer_dict_ = {
24 |         key: round(value, 3) for (key, value) in imputer.imputer_dict_.items()
25 |     }
26 |     assert imputer.imputer_dict_ == {
27 |         "Age": 28.714,
28 |         "Marks": 0.683,
29 |     }
30 |     assert imputer.n_features_in_ == 6
31 | 
32 |     # test transform output:
33 |     # selected variables should have no NA
34 |     # not selected variables should still have NA
35 |     assert X_transformed[["Age", "Marks"]].isnull().sum().sum() == 0
36 |     assert X_transformed[["Name", "City"]].isnull().sum().sum() > 0
37 |     pd.testing.assert_frame_equal(X_transformed, X_reference)
38 | 
39 | 
40 | def test_median_imputation_when_user_enters_single_variables(df_na):
41 |     # set up trasnformer
42 |     imputer = MeanMedianImputer(imputation_method="median", variables=["Age"])
43 |     X_transformed = imputer.fit_transform(df_na)
44 | 
45 |     # set up reference output
46 |     X_reference = df_na.copy()
47 |     X_reference["Age"] = X_reference["Age"].fillna(23.0)
48 | 
49 |     # test init params
50 |     assert imputer.imputation_method == "median"
51 |     assert imputer.variables == ["Age"]
52 | 
53 |     # test fit attributes
54 |     assert imputer.n_features_in_ == 6
55 |     assert imputer.imputer_dict_ == {"Age": 23.0}
56 | 
57 |     # test transform output
58 |     assert X_transformed["Age"].isnull().sum() == 0
59 |     pd.testing.assert_frame_equal(X_transformed, X_reference)
60 | 
61 | 
62 | def test_error_with_wrong_imputation_method():
63 |     with pytest.raises(ValueError):
64 |         MeanMedianImputer(imputation_method="arbitrary")
65 | 


--------------------------------------------------------------------------------
/tests/test_outliers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_outliers/__init__.py


--------------------------------------------------------------------------------
/tests/test_prediction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_prediction/__init__.py


--------------------------------------------------------------------------------
/tests/test_prediction/conftest.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.fixture(scope="module")
 6 | def df_classification():
 7 |     df = {
 8 |         "cat_var_A": ["A"] * 5 + ["B"] * 5 + ["C"] * 5 + ["D"] * 5,
 9 |         "cat_var_B": ["A"] * 6
10 |         + ["B"] * 2
11 |         + ["C"] * 2
12 |         + ["B"] * 2
13 |         + ["C"] * 2
14 |         + ["D"] * 6,
15 |         "num_var_A": [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4],
16 |         "num_var_B": [1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4],
17 |     }
18 | 
19 |     df = pd.DataFrame(df)
20 |     y = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
21 |     return df, y
22 | 
23 | 
24 | @pytest.fixture(scope="module")
25 | def df_regression():
26 |     df = {
27 |         "cat_var_A": ["A"] * 5 + ["B"] * 5 + ["C"] * 5 + ["D"] * 5,
28 |         "cat_var_B": ["A"] * 6
29 |         + ["B"] * 2
30 |         + ["C"] * 2
31 |         + ["B"] * 2
32 |         + ["C"] * 2
33 |         + ["D"] * 6,
34 |         "num_var_A": [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4],
35 |         "num_var_B": [1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4],
36 |     }
37 | 
38 |     df = pd.DataFrame(df)
39 |     y = pd.Series([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
40 |     return df, y
41 | 


--------------------------------------------------------------------------------
/tests/test_preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_preprocessing/__init__.py


--------------------------------------------------------------------------------
/tests/test_scaling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_scaling/__init__.py


--------------------------------------------------------------------------------
/tests/test_selection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_selection/__init__.py


--------------------------------------------------------------------------------
/tests/test_selection/test_base_selector.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas.testing import assert_frame_equal
 3 | 
 4 | from feature_engine.selection.base_selector import BaseSelector
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("val", [None, "hola", [True]])
 8 | def test_confirm_variables_in_init(val):
 9 |     with pytest.raises(ValueError):
10 |         BaseSelector(confirm_variables=val)
11 | 
12 | 
13 | class MockClass(BaseSelector):
14 |     def __init__(self, variables=None, confirm_variables=False):
15 |         self.variables = variables
16 |         self.confirm_variables = confirm_variables
17 | 
18 |     def fit(self, X, y=None):
19 |         self.features_to_drop_ = ["Name", "Marks"]
20 |         self._get_feature_names_in(X)
21 |         return self
22 | 
23 | 
24 | def test_transform_method(df_vartypes):
25 |     transformer = MockClass()
26 |     transformer.fit(df_vartypes)
27 |     Xt = transformer.transform(df_vartypes)
28 | 
29 |     # tests output of transform
30 |     assert_frame_equal(Xt, df_vartypes.drop(["Name", "Marks"], axis=1))
31 | 
32 |     # tests this line: X = X[self.feature_names_in_]
33 |     assert_frame_equal(
34 |         transformer.transform(df_vartypes[["City", "Age", "Name", "Marks", "dob"]]),
35 |         Xt,
36 |     )
37 |     # test error when there is a df shape missmatch
38 |     with pytest.raises(ValueError):
39 |         assert transformer.transform(df_vartypes[["Age", "Marks"]])
40 | 
41 | 
42 | def test_get_feature_names_in(df_vartypes):
43 |     tr = MockClass()
44 |     tr._get_feature_names_in(df_vartypes)
45 |     assert tr.n_features_in_ == df_vartypes.shape[1]
46 |     assert tr.feature_names_in_ == list(df_vartypes.columns)
47 | 
48 | 
49 | def test_get_support(df_vartypes):
50 |     tr = MockClass()
51 |     tr.fit(df_vartypes)
52 |     v_bool = [False, True, True, False, True]
53 |     v_ind = [1, 2, 4]
54 |     assert tr.get_support() == v_bool
55 |     assert list(tr.get_support(indices=True)) == v_ind
56 | 


--------------------------------------------------------------------------------
/tests/test_sklearn_compatible/test_set_output.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.linear_model import LogisticRegression
 5 | from sklearn.pipeline import make_pipeline
 6 | from sklearn.preprocessing import StandardScaler
 7 | 
 8 | from feature_engine.transformation import YeoJohnsonTransformer
 9 | 
10 | 
11 | def test_pipeline_with_set_output_sklearn_last():
12 | 
13 |     X, y = load_iris(return_X_y=True, as_frame=True)
14 | 
15 |     pipeline = make_pipeline(
16 |         YeoJohnsonTransformer(), StandardScaler(), LogisticRegression()
17 |     ).set_output(transform="default")
18 | 
19 |     pipeline.fit(X, y)
20 | 
21 |     X_t = pipeline[:-1].transform(X)
22 |     assert isinstance(X_t, np.ndarray)
23 | 
24 |     pipeline.set_output(transform="pandas")
25 |     X_t = pipeline[:-1].transform(X)
26 | 
27 |     assert isinstance(X_t, pd.DataFrame)
28 | 
29 | 
30 | def test_pipeline_with_set_output_featureengine_last():
31 | 
32 |     X, y = load_iris(return_X_y=True, as_frame=True)
33 | 
34 |     pipeline = make_pipeline(
35 |         StandardScaler(), YeoJohnsonTransformer(), LogisticRegression()
36 |     ).set_output(transform="default")
37 | 
38 |     pipeline.fit(X, y)
39 | 
40 |     X_t = pipeline[:-1].transform(X)
41 |     pipeline.fit(X, y)
42 |     assert isinstance(X_t, pd.DataFrame)
43 | 
44 |     pipeline.set_output(transform="pandas")
45 |     pipeline.fit(X, y)
46 | 
47 |     X_t = pipeline[:-1].transform(X)
48 | 
49 |     assert isinstance(X_t, pd.DataFrame)
50 | 
51 | 
52 | def test_individual_transformer():
53 | 
54 |     X, y = load_iris(return_X_y=True, as_frame=True)
55 | 
56 |     transformer = YeoJohnsonTransformer()
57 |     transformer.set_output(transform="default")
58 |     transformer.fit(X)
59 | 
60 |     X_t = transformer.transform(X)
61 |     assert isinstance(X_t, pd.DataFrame)
62 | 
63 |     transformer.set_output(transform="pandas")
64 |     X_t = transformer.transform(X)
65 | 
66 |     assert isinstance(X_t, pd.DataFrame)
67 | 


--------------------------------------------------------------------------------
/tests/test_time_series/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_time_series/__init__.py


--------------------------------------------------------------------------------
/tests/test_time_series/test_forecasting/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_time_series/test_forecasting/__init__.py


--------------------------------------------------------------------------------
/tests/test_time_series/test_forecasting/conftest.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.fixture(scope="module")
 6 | def df_time():
 7 |     date_time = [
 8 |         "2020-05-15 12:00:00",
 9 |         "2020-05-15 12:15:00",
10 |         "2020-05-15 12:30:00",
11 |         "2020-05-15 12:45:00",
12 |         "2020-05-15 13:00:00",
13 |         "2020-05-15 13:15:00",
14 |         "2020-05-15 13:30:00",
15 |         "2020-05-15 13:45:00",
16 |         "2020-05-15 14:00:00",
17 |         "2020-05-15 14:15:00",
18 |         "2020-05-15 14:30:00",
19 |         "2020-05-15 14:45:00",
20 |         "2020-05-15 15:00:00",
21 |         "2020-05-15 15:15:00",
22 |         "2020-05-15 15:30:00",
23 |     ]
24 | 
25 |     data = {
26 |         "ambient_temp": [
27 |             31.31,
28 |             31.51,
29 |             32.15,
30 |             32.39,
31 |             32.62,
32 |             32.5,
33 |             32.52,
34 |             32.68,
35 |             33.76,
36 |             34.13,
37 |             34.08,
38 |             33.7,
39 |             33.89,
40 |             34.04,
41 |             34.4,
42 |         ],
43 |         "module_temp": [
44 |             49.18,
45 |             49.84,
46 |             52.35,
47 |             50.63,
48 |             49.61,
49 |             47.01,
50 |             46.67,
51 |             47.52,
52 |             49.8,
53 |             55.03,
54 |             54.52,
55 |             47.62,
56 |             46.03,
57 |             44.29,
58 |             46.74,
59 |         ],
60 |         "irradiation": [
61 |             0.51,
62 |             0.79,
63 |             0.65,
64 |             0.76,
65 |             0.42,
66 |             0.49,
67 |             0.57,
68 |             0.56,
69 |             0.74,
70 |             0.89,
71 |             0.47,
72 |             0.54,
73 |             0.4,
74 |             0.45,
75 |             0.57,
76 |         ],
77 |         "color": ["blue"] * 10 + ["green"] * 5,
78 |     }
79 | 
80 |     df = pd.DataFrame(data, index=date_time)
81 |     df.index = pd.to_datetime(df.index)
82 |     return df
83 | 


--------------------------------------------------------------------------------
/tests/test_transformation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_transformation/__init__.py


--------------------------------------------------------------------------------
/tests/test_transformation/test_arcsin_transformer.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from sklearn.exceptions import NotFittedError
 4 | 
 5 | from feature_engine.transformation import ArcsinTransformer
 6 | 
 7 | 
 8 | def test_transform_and_inverse_transform(df_vartypes):
 9 |     transformer = ArcsinTransformer(variables=["Marks"])
10 |     X = transformer.fit_transform(df_vartypes)
11 | 
12 |     # expected output
13 |     transf_df = df_vartypes.copy()
14 |     transf_df["Marks"] = [1.24905, 1.10715, 0.99116, 0.88607]
15 | 
16 |     # test transform output
17 |     pd.testing.assert_frame_equal(X, transf_df)
18 | 
19 |     # test inverse_transform
20 |     Xit = transformer.inverse_transform(X)
21 | 
22 |     # convert numbers to original format.
23 |     Xit["Marks"] = Xit["Marks"].round(1)
24 | 
25 |     # test
26 |     pd.testing.assert_frame_equal(Xit, df_vartypes)
27 | 
28 | 
29 | def test_fit_raises_error_if_na_in_df(df_na):
30 |     # test case 2: when dataset contains na, fit method
31 |     transformer = ArcsinTransformer(variables=["Marks"])
32 |     with pytest.raises(ValueError):
33 |         transformer.fit(df_na)
34 | 
35 | 
36 | def test_transform_raises_error_if_na_in_df(df_vartypes, df_na):
37 |     # test case 3: when dataset contains na, transform method
38 |     transformer = ArcsinTransformer(variables=["Marks"])
39 |     transformer.fit(df_vartypes)
40 |     with pytest.raises(ValueError):
41 |         transformer.transform(df_na[df_vartypes.columns])
42 | 
43 | 
44 | def test_error_if_df_contains_outside_range_values(df_vartypes):
45 |     # test error when data contains value outside range [0, +1]
46 |     df_out_range = df_vartypes.copy()
47 |     df_out_range.loc[1, "Marks"] = 2
48 | 
49 |     transformer = ArcsinTransformer(variables=["Marks"])
50 |     # test case 4: when variable contains value outside range, fit
51 |     with pytest.raises(ValueError):
52 |         transformer.fit(df_out_range)
53 | 
54 |     # test case 5: when variable contains value outside range, transform
55 |     transformer.fit(df_vartypes)
56 |     with pytest.raises(ValueError):
57 |         transformer.transform(df_out_range)
58 | 
59 |     # when selecting variables automatically and some are outside range
60 |     transformer = ArcsinTransformer()
61 |     with pytest.raises(ValueError):
62 |         transformer.fit(df_vartypes)
63 | 
64 | 
65 | def test_non_fitted_error(df_vartypes):
66 |     transformer = ArcsinTransformer(variables="Marks")
67 |     with pytest.raises(NotFittedError):
68 |         transformer.transform(df_vartypes)
69 | 


--------------------------------------------------------------------------------
/tests/test_transformation/test_boxcox_transformer.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from sklearn.exceptions import NotFittedError
 4 | 
 5 | from feature_engine.transformation import BoxCoxTransformer
 6 | 
 7 | 
 8 | def test_automatically_finds_variables(df_vartypes):
 9 |     # test case 1: automatically select variables
10 |     transformer = BoxCoxTransformer(variables=None)
11 |     X = transformer.fit_transform(df_vartypes)
12 | 
13 |     # expected output
14 |     transf_df = df_vartypes.copy()
15 |     transf_df["Age"] = [9.78731, 10.1666, 9.40189, 9.0099]
16 |     transf_df["Marks"] = [-0.101687, -0.207092, -0.316843, -0.431788]
17 | 
18 |     # test init params
19 |     assert transformer.variables is None
20 |     # test fit attr
21 |     assert transformer.variables_ == ["Age", "Marks"]
22 |     assert transformer.n_features_in_ == 5
23 |     # test transform output
24 |     pd.testing.assert_frame_equal(X, transf_df)
25 | 
26 |     # test inverse_transform
27 |     Xit = transformer.inverse_transform(X)
28 | 
29 |     # convert numbers to original format.
30 |     Xit["Age"] = Xit["Age"].round().astype("int64")
31 |     Xit["Marks"] = Xit["Marks"].round(1)
32 | 
33 |     # test
34 |     pd.testing.assert_frame_equal(Xit, df_vartypes)
35 | 
36 | 
37 | def test_fit_raises_error_if_df_contains_na(df_na):
38 |     # test case 2: when dataset contains na, fit method
39 |     transformer = BoxCoxTransformer()
40 |     with pytest.raises(ValueError):
41 |         transformer.fit(df_na)
42 | 
43 | 
44 | def test_transform_raises_error_if_df_contains_na(df_vartypes, df_na):
45 |     # test case 3: when dataset contains na, transform method
46 |     transformer = BoxCoxTransformer()
47 |     transformer.fit(df_vartypes)
48 |     with pytest.raises(ValueError):
49 |         transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
50 | 
51 | 
52 | def test_error_if_df_contains_negative_values(df_vartypes):
53 |     # test error when data contains negative values
54 |     df_neg = df_vartypes.copy()
55 |     df_neg.loc[1, "Age"] = -1
56 | 
57 |     # test case 4: when variable contains negative value, fit
58 |     transformer = BoxCoxTransformer()
59 |     with pytest.raises(ValueError):
60 |         transformer.fit(df_neg)
61 | 
62 |     # test case 5: when variable contains negative value, transform
63 |     transformer = BoxCoxTransformer()
64 |     transformer.fit(df_vartypes)
65 |     with pytest.raises(ValueError):
66 |         transformer.transform(df_neg)
67 | 
68 | 
69 | def test_non_fitted_error(df_vartypes):
70 |     transformer = BoxCoxTransformer()
71 |     with pytest.raises(NotFittedError):
72 |         transformer.transform(df_vartypes)
73 | 


--------------------------------------------------------------------------------
/tests/test_transformation/test_reciprocal_transformer.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from sklearn.exceptions import NotFittedError
 4 | 
 5 | from feature_engine.transformation import ReciprocalTransformer
 6 | 
 7 | 
 8 | def test_automatically_find_variables(df_vartypes):
 9 |     # test case 1: automatically select variables
10 |     transformer = ReciprocalTransformer(variables=None)
11 |     X = transformer.fit_transform(df_vartypes)
12 | 
13 |     # expected output
14 |     transf_df = df_vartypes.copy()
15 |     transf_df["Age"] = [0.05, 0.047619, 0.0526316, 0.0555556]
16 |     transf_df["Marks"] = [1.11111, 1.25, 1.42857, 1.66667]
17 | 
18 |     # test init params
19 |     assert transformer.variables is None
20 |     # test fit attr
21 |     assert transformer.variables_ == ["Age", "Marks"]
22 |     assert transformer.n_features_in_ == 5
23 |     # test transform output
24 |     pd.testing.assert_frame_equal(X, transf_df)
25 | 
26 |     # test inverse_transform
27 |     Xit = transformer.inverse_transform(X)
28 | 
29 |     # convert numbers to original format.
30 |     Xit["Age"] = Xit["Age"].round().astype("int64")
31 |     Xit["Marks"] = Xit["Marks"].round(1)
32 | 
33 |     # test
34 |     pd.testing.assert_frame_equal(Xit, df_vartypes)
35 | 
36 | 
37 | def test_fit_raises_error_if_na_in_df(df_na):
38 |     # test case 2: when dataset contains na, fit method
39 |     with pytest.raises(ValueError):
40 |         transformer = ReciprocalTransformer()
41 |         transformer.fit(df_na)
42 | 
43 | 
44 | def test_transform_raises_error_if_na_in_df(df_vartypes, df_na):
45 |     # test case 3: when dataset contains na, transform method
46 |     with pytest.raises(ValueError):
47 |         transformer = ReciprocalTransformer()
48 |         transformer.fit(df_vartypes)
49 |         transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
50 | 
51 | 
52 | def test_error_if_df_contains_0_as_value(df_vartypes):
53 |     # test error when data contains value zero
54 |     df_neg = df_vartypes.copy()
55 |     df_neg.loc[1, "Age"] = 0
56 | 
57 |     # test case 4: when variable contains zero, fit
58 |     with pytest.raises(ValueError):
59 |         transformer = ReciprocalTransformer()
60 |         transformer.fit(df_neg)
61 | 
62 |     # test case 5: when variable contains zero, transform
63 |     with pytest.raises(ValueError):
64 |         transformer = ReciprocalTransformer()
65 |         transformer.fit(df_vartypes)
66 |         transformer.transform(df_neg)
67 | 
68 | 
69 | def test_non_fitted_error(df_vartypes):
70 |     with pytest.raises(NotFittedError):
71 |         transformer = ReciprocalTransformer()
72 |         transformer.transform(df_vartypes)
73 | 


--------------------------------------------------------------------------------
/tests/test_variable_handling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_variable_handling/__init__.py


--------------------------------------------------------------------------------
/tests/test_variable_handling/conftest.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.fixture
 6 | def df():
 7 |     df = pd.DataFrame(
 8 |         {
 9 |             "Name": ["tom", "nick", "krish", "jack"],
10 |             "City": ["London", "Manchester", "Liverpool", "Bristol"],
11 |             "Age": [20, 21, 19, 18],
12 |             "Marks": [0.9, 0.8, 0.7, 0.6],
13 |             "date_range": pd.date_range("2020-02-24", periods=4, freq="min"),
14 |             "date_obj0": ["2020-02-24", "2020-02-25", "2020-02-26", "2020-02-27"],
15 |             "date_range_tz": pd.date_range(
16 |                 "2020-02-24", periods=4, freq="min"
17 |             ).tz_localize("UTC"),
18 |         }
19 |     )
20 |     df["Name"] = df["Name"].astype("category")
21 |     return df
22 | 
23 | 
24 | @pytest.fixture
25 | def df_int(df):
26 |     df = df.copy()
27 |     df.columns = range(1, len(df.columns) + 1)
28 |     return df
29 | 
30 | 
31 | @pytest.fixture
32 | def df_datetime(df):
33 |     df = df.copy()
34 | 
35 |     df["date_obj1"] = ["01-Jan-2010", "24-Feb-1945", "14-Jun-2100", "17-May-1999"]
36 |     df["date_obj2"] = ["10/11/12", "12/31/09", "06/30/95", "03/17/04"]
37 |     df["time_obj"] = ["21:45:23", "09:15:33", "12:34:59", "03:27:02"]
38 | 
39 |     df["time_objTZ"] = df["time_obj"].add(["+5", "+11", "-3", "-8"])
40 |     df["date_obj1"] = df["date_obj1"].astype("category")
41 |     df["Age"] = df["Age"].astype("O")
42 |     return df
43 | 


--------------------------------------------------------------------------------
/tests/test_variable_handling/test_remove_variables.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | 
 4 | from feature_engine.variable_handling.retain_variables import retain_variables_if_in_df
 5 | 
 6 | test_dict = [
 7 |     (
 8 |         pd.DataFrame(columns=["A", "B", "C", "D", "E"]),
 9 |         ["A", "C", "B", "G", "H"],
10 |         ["A", "C", "B"],
11 |         ["X", "Y"],
12 |     ),
13 |     (pd.DataFrame(columns=[1, 2, 3, 4, 5]), [1, 2, 4, 6], [1, 2, 4], [6, 7]),
14 |     (pd.DataFrame(columns=[1, 2, 3, 4, 5]), 1, [1], 7),
15 |     (pd.DataFrame(columns=["A", "B", "C", "D", "E"]), "C", ["C"], "G"),
16 | ]
17 | 
18 | 
19 | @pytest.mark.parametrize("df, variables, overlap, col_not_in_df", test_dict)
20 | def test_retain_variables_if_in_df(df, variables, overlap, col_not_in_df):
21 | 
22 |     msg = "None of the variables in the list are present in the dataframe."
23 | 
24 |     assert retain_variables_if_in_df(df, variables) == overlap
25 | 
26 |     with pytest.raises(ValueError) as record:
27 |         retain_variables_if_in_df(df, col_not_in_df)
28 |     assert str(record.value) == msg
29 | 


--------------------------------------------------------------------------------
/tests/test_wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feature-engine/feature_engine/ead24576946db3d7e9eac9d2946ad4a27a46030e/tests/test_wrappers/__init__.py


--------------------------------------------------------------------------------
/tests/test_wrappers/test_check_estimator_wrappers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sklearn
 3 | from sklearn.impute import SimpleImputer
 4 | from sklearn.preprocessing import OrdinalEncoder, StandardScaler
 5 | from sklearn.utils.estimator_checks import check_estimator
 6 | from sklearn.utils.fixes import parse_version
 7 | 
 8 | from feature_engine.wrappers import SklearnTransformerWrapper
 9 | from tests.estimator_checks.estimator_checks import (
10 |     check_raises_error_when_input_not_a_df,
11 | )
12 | from tests.estimator_checks.fit_functionality_checks import check_feature_names_in
13 | from tests.estimator_checks.non_fitted_error_checks import check_raises_non_fitted_error
14 | from tests.estimator_checks.variable_selection_checks import (
15 |     check_all_types_variables_assignment,
16 |     check_numerical_variables_assignment,
17 | )
18 | 
19 | sklearn_version = parse_version(parse_version(sklearn.__version__).base_version)
20 | 
21 | if sklearn_version < parse_version("1.6"):
22 | 
23 |     def test_sklearn_transformer_wrapper():
24 |         check_estimator(SklearnTransformerWrapper(transformer=SimpleImputer()))
25 | 
26 | else:
27 | 
28 |     def test_sklearn_transformer_wrapper():
29 |         check_estimator(
30 |             estimator=SklearnTransformerWrapper(transformer=SimpleImputer()),
31 |             expected_failed_checks=SklearnTransformerWrapper(
32 |                 transformer=SimpleImputer()
33 |             )._more_tags()["_xfail_checks"],
34 |         )
35 | 
36 | 
37 | @pytest.mark.parametrize(
38 |     "estimator", [SklearnTransformerWrapper(transformer=OrdinalEncoder())]
39 | )
40 | def test_check_estimator_from_feature_engine(estimator):
41 |     check_raises_non_fitted_error(estimator)
42 |     check_raises_error_when_input_not_a_df(estimator)
43 |     check_feature_names_in(estimator)
44 | 
45 | 
46 | def test_check_variables_assignment():
47 |     check_numerical_variables_assignment(
48 |         SklearnTransformerWrapper(transformer=StandardScaler())
49 |     )
50 |     check_all_types_variables_assignment(
51 |         SklearnTransformerWrapper(transformer=OrdinalEncoder())
52 |     )
53 | 
54 | 
55 | def test_raises_error_when_no_transformer_passed():
56 |     # this transformer needs an estimator as an input param.
57 |     with pytest.raises(TypeError):
58 |         SklearnTransformerWrapper()
59 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py39, py310, py311-sklearn150, py311-sklearn160, py312, codecov, docs, stylechecks, typechecks
 3 | skipsdist = true
 4 | 
 5 | [testenv]
 6 | install_command = pip install {opts} {packages}
 7 | envdir = {toxworkdir}/unit_tests
 8 | setenv =
 9 |     PYTHONPATH=.
10 |     COVERAGE_RCFILE = {envtmpdir}/coveragerc
11 | commands =
12 |     pytest tests
13 | 
14 | [testenv:py39]
15 | deps =
16 |     -rtest_requirements.txt
17 | 
18 | [testenv:py310]
19 | deps =
20 |     -rtest_requirements.txt
21 | 
22 | [testenv:py311-sklearn150]
23 | deps =
24 |     -rtest_requirements.txt
25 |     scikit-learn==1.5.1
26 | 
27 | [testenv:py311-sklearn160]
28 | deps =
29 |     -rtest_requirements.txt
30 |     scikit-learn==1.6.1
31 | 
32 | [testenv:py312]
33 | deps =
34 |     -rtest_requirements.txt
35 | 
36 | [testenv:codecov]
37 | deps =
38 |     -rtest_requirements.txt
39 | commands_pre =
40 |     {envpython} -c 'from pathlib import Path; Path(r"{env:COVERAGE_RCFILE}").write_text(Path(".coveragerc").read_text())'
41 | commands =
42 |     coverage run -m pytest -v
43 |     coverage report
44 | 
45 | [testenv:docs]
46 | deps =
47 |      -r docs/requirements.txt
48 | commands =
49 |      sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
50 | 
51 | [testenv:stylechecks]
52 | deps =
53 |     flake8
54 | commands = {posargs:flake8 feature_engine tests}
55 | 
56 | [testenv:typechecks]
57 | deps =
58 |      mypy
59 | commands = {posargs:mypy feature_engine}
60 | 
61 | [flake8]
62 | exclude = .git, env
63 | # match black code formatter
64 | max-line-length = 88
65 | 
66 | profile = black
67 | line_length = 88
68 | lines_between_sections = 1
69 | known_first_party = "sentry"


--------------------------------------------------------------------------------