├── .github └── workflows │ ├── black-ruff.yml │ ├── codeql.yml │ ├── linux-ci.yml │ ├── release.yml │ ├── wheels-any.yml │ └── windows-macos-ci.yml ├── .gitignore ├── CHANGELOGS.md ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── benchmarks ├── README.txt ├── bench_plot_onnxruntime_decision_tree.py ├── bench_plot_onnxruntime_hgb.py ├── bench_plot_onnxruntime_linreg.py ├── bench_plot_onnxruntime_logreg.py ├── bench_plot_onnxruntime_random_forest.py ├── bench_plot_onnxruntime_random_forest_reg.py ├── bench_plot_onnxruntime_svm_reg.py └── post_graph.py ├── docs ├── api_summary.rst ├── conf.py ├── examples │ ├── Au-Salon-de-l-agriculture-la-campagne-recrute.jpg │ ├── README.txt │ ├── daisy_wikipedia.jpg │ ├── plot_backend.py │ ├── plot_benchmark_cdist.py │ ├── plot_benchmark_pipeline.py │ ├── plot_black_op.py │ ├── plot_cast_transformer.py │ ├── plot_complex_pipeline.py │ ├── plot_convert_decision_function.py │ ├── plot_convert_model.py │ ├── plot_convert_syntax.py │ ├── plot_convert_zipmap.py │ ├── plot_custom_model.py │ ├── plot_custom_parser.py │ ├── plot_custom_parser_alternative.py │ ├── plot_errors_onnxruntime.py │ ├── plot_gpr.py │ ├── plot_intermediate_outputs.py │ ├── plot_investigate_pipeline.py │ ├── plot_logging.py │ ├── plot_metadata.py │ ├── plot_nmf.py │ ├── plot_onnx_operators.py │ ├── plot_output_onnx_single_probability.py │ ├── plot_pipeline.py │ ├── plot_pipeline_lightgbm.py │ ├── plot_pipeline_xgboost.py │ └── plot_tfidfvectorizer.py ├── exts │ ├── github_link.py │ └── sphinx_skl2onnx_extension.py ├── images │ └── woe.png ├── index.rst ├── index_tutorial.rst ├── introduction.rst ├── logo_main.png ├── parameterized.rst ├── pipeline.png ├── pipeline.rst ├── requirements.txt ├── supported.rst ├── tests │ ├── test_documentation_examples.py │ ├── test_utils_benchmark.py │ └── test_utils_classes.py ├── tutorial │ ├── README.txt │ ├── plot_abegin_convert_pipeline.py │ ├── plot_bbegin_measure_time.py │ ├── plot_catwoe_transformer.py │ ├── plot_cbegin_opset.py │ ├── plot_dbegin_options.py │ ├── plot_dbegin_options_list.py │ ├── plot_dbegin_options_zipmap.py │ ├── plot_ebegin_float_double.py │ ├── plot_fbegin_investigate.py │ ├── plot_gbegin_cst.py │ ├── plot_gbegin_dataframe.py │ ├── plot_gconverting.py │ ├── plot_gexternal_catboost.py │ ├── plot_gexternal_lightgbm.py │ ├── plot_gexternal_lightgbm_reg.py │ ├── plot_gexternal_xgboost.py │ ├── plot_icustom_converter.py │ ├── plot_jcustom_syntax.py │ ├── plot_jfunction_transformer.py │ ├── plot_kcustom_converter_wrapper.py │ ├── plot_lcustom_options.py │ ├── plot_mcustom_parser.py │ ├── plot_ngrams.py │ ├── plot_transformer_discrepancy.py │ ├── plot_usparse_xgboost.py │ ├── plot_weird_pandas_and_hash.py │ ├── plot_wext_pyod_forest.py │ └── plot_woe_transformer.py ├── tutorial_1-5_external.rst ├── tutorial_1_simple.rst ├── tutorial_2-5_extlib.rst ├── tutorial_2_new_converter.rst └── tutorial_4_advanced.rst ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── skl2onnx ├── __init__.py ├── __main__.py ├── _parse.py ├── _supported_operators.py ├── algebra │ ├── __init__.py │ ├── automation.py │ ├── complex_functions.py │ ├── custom_ops.py │ ├── graph_state.py │ ├── onnx_operator.py │ ├── onnx_operator_mixin.py │ ├── onnx_ops.py │ ├── onnx_subgraph_operator_mixin.py │ ├── sklearn_ops.py │ └── type_helper.py ├── common │ ├── __init__.py │ ├── _apply_operation.py │ ├── _container.py │ ├── _onnx_optimisation_common.py │ ├── _registration.py │ ├── _topology.py │ ├── data_types.py │ ├── exceptions.py │ ├── graph_builder_opset.py │ ├── onnx_optimisation_identity.py │ ├── shape_calculator.py │ ├── tree_ensemble.py │ ├── utils.py │ ├── utils_checking.py │ ├── utils_classifier.py │ └── utils_sklearn.py ├── convert.py ├── helpers │ ├── __init__.py │ ├── integration.py │ ├── investigate.py │ ├── onnx_helper.py │ └── onnx_rare_helper.py ├── operator_converters │ ├── __init__.py │ ├── _gp_kernels.py │ ├── ada_boost.py │ ├── array_feature_extractor.py │ ├── bagging.py │ ├── binariser.py │ ├── calibrated_classifier_cv.py │ ├── cast_op.py │ ├── class_labels.py │ ├── common.py │ ├── concat_op.py │ ├── cross_decomposition.py │ ├── decision_tree.py │ ├── decomposition.py │ ├── dict_vectoriser.py │ ├── feature_hasher.py │ ├── feature_selection.py │ ├── flatten_op.py │ ├── function_transformer.py │ ├── gamma_regressor.py │ ├── gaussian_mixture.py │ ├── gaussian_process.py │ ├── gradient_boosting.py │ ├── grid_search_cv.py │ ├── id_op.py │ ├── imputer_op.py │ ├── isolation_forest.py │ ├── k_bins_discretiser.py │ ├── k_means.py │ ├── kernel_pca.py │ ├── label_binariser.py │ ├── label_encoder.py │ ├── linear_classifier.py │ ├── linear_regressor.py │ ├── local_outlier_factor.py │ ├── multilayer_perceptron.py │ ├── multioutput.py │ ├── multiply_op.py │ ├── naive_bayes.py │ ├── nearest_neighbours.py │ ├── normaliser.py │ ├── one_hot_encoder.py │ ├── one_vs_one_classifier.py │ ├── one_vs_rest_classifier.py │ ├── ordinal_encoder.py │ ├── ovr_decision_function.py │ ├── pipelines.py │ ├── polynomial_features.py │ ├── power_transformer.py │ ├── quadratic_discriminant_analysis.py │ ├── quantile_transformer.py │ ├── random_forest.py │ ├── random_projection.py │ ├── random_trees_embedding.py │ ├── ransac_regressor.py │ ├── replace_op.py │ ├── scaler_op.py │ ├── sequence.py │ ├── sgd_classifier.py │ ├── sgd_oneclass_svm.py │ ├── stacking.py │ ├── support_vector_machines.py │ ├── target_encoder.py │ ├── text_vectoriser.py │ ├── tfidf_transformer.py │ ├── tfidf_vectoriser.py │ ├── tuned_threshold_classifier.py │ ├── voting_classifier.py │ ├── voting_regressor.py │ └── zip_map.py ├── proto │ └── __init__.py ├── shape_calculators │ ├── __init__.py │ ├── array_feature_extractor.py │ ├── cast_op.py │ ├── class_labels.py │ ├── concat.py │ ├── cross_decomposition.py │ ├── dict_vectorizer.py │ ├── ensemble_shapes.py │ ├── feature_hasher.py │ ├── feature_selection.py │ ├── flatten.py │ ├── function_transformer.py │ ├── gaussian_process.py │ ├── grid_search_cv.py │ ├── identity.py │ ├── imputer.py │ ├── isolation_forest.py │ ├── k_bins_discretiser.py │ ├── k_means.py │ ├── kernel_pca.py │ ├── label_binariser.py │ ├── label_encoder.py │ ├── linear_classifier.py │ ├── linear_regressor.py │ ├── local_outlier_factor.py │ ├── mixture.py │ ├── multioutput.py │ ├── multiply.py │ ├── nearest_neighbours.py │ ├── one_hot_encoder.py │ ├── one_vs_one_classifier.py │ ├── one_vs_rest_classifier.py │ ├── ordinal_encoder.py │ ├── ovr_decision_function.py │ ├── pipelines.py │ ├── polynomial_features.py │ ├── power_transformer.py │ ├── quadratic_discriminant_analysis.py │ ├── quantile_transformer.py │ ├── random_projection.py │ ├── random_trees_embedding.py │ ├── replace_op.py │ ├── scaler.py │ ├── sequence.py │ ├── sgd_oneclass_svm.py │ ├── support_vector_machines.py │ ├── svd.py │ ├── target_encoder.py │ ├── text_vectorizer.py │ ├── tfidf_transformer.py │ ├── tuned_threshold_classifier.py │ ├── voting_classifier.py │ ├── voting_regressor.py │ └── zip_map.py ├── sklapi │ ├── __init__.py │ ├── cast_regressor.py │ ├── cast_transformer.py │ ├── register.py │ ├── replace_transformer.py │ ├── sklearn_text.py │ ├── sklearn_text_onnx.py │ ├── woe_transformer.py │ └── woe_transformer_onnx.py └── tutorial │ ├── __init__.py │ ├── benchmark.py │ └── imagenet_classes.py ├── tests ├── benchmark.py ├── datasets │ ├── small_titanic.csv │ ├── treecl.onnx │ ├── treecl2.onnx │ └── treecl3.onnx ├── test_algebra_cascade.py ├── test_algebra_complex.py ├── test_algebra_converters.py ├── test_algebra_custom_model.py ├── test_algebra_custom_model_sub_estimator.py ├── test_algebra_deprecation.py ├── test_algebra_double.py ├── test_algebra_onnx_doc.py ├── test_algebra_onnx_operator_mixin_syntax.py ├── test_algebra_onnx_operators.py ├── test_algebra_onnx_operators_if.py ├── test_algebra_onnx_operators_opset.py ├── test_algebra_onnx_operators_scan.py ├── test_algebra_onnx_operators_sparse.py ├── test_algebra_onnx_operators_sub_estimator.py ├── test_algebra_onnx_operators_wrapped.py ├── test_algebra_symbolic.py ├── test_algebra_test_helper.py ├── test_algebra_to_onnx.py ├── test_convert.py ├── test_convert_options.py ├── test_custom_transformer_ordwoe.py ├── test_custom_transformer_tsne.py ├── test_investigate.py ├── test_issues_2024.py ├── test_issues_2025.py ├── test_onnx_helper.py ├── test_onnx_rare_helper.py ├── test_onnxruntime.py ├── test_op10.py ├── test_opset13.py ├── test_optimisation.py ├── test_options.py ├── test_other_converter_library_pipelines.py ├── test_parsing_options.py ├── test_raw_name.py ├── test_scikit_pandas.py ├── test_shapes.py ├── test_sklearn_adaboost_converter.py ├── test_sklearn_array_feature_extractor.py ├── test_sklearn_bagging_converter.py ├── test_sklearn_binarizer_converter.py ├── test_sklearn_calibrated_classifier_cv_converter.py ├── test_sklearn_cast_regressor.py ├── test_sklearn_cast_transformer.py ├── test_sklearn_classifiers_extreme.py ├── test_sklearn_concat.py ├── test_sklearn_constant_predictor.py ├── test_sklearn_count_vectorizer_converter.py ├── test_sklearn_count_vectorizer_converter_bug.py ├── test_sklearn_custom_nmf.py ├── test_sklearn_decision_tree_converters.py ├── test_sklearn_dict_vectorizer_converter.py ├── test_sklearn_documentation.py ├── test_sklearn_double_tensor_type_cls.py ├── test_sklearn_double_tensor_type_reg.py ├── test_sklearn_double_tensor_type_tr.py ├── test_sklearn_feature_hasher.py ├── test_sklearn_feature_selection_converters.py ├── test_sklearn_feature_union.py ├── test_sklearn_function_transformer_converter.py ├── test_sklearn_gamma_regressor.py ├── test_sklearn_gaussian_mixture_converter.py ├── test_sklearn_gaussian_process_classifier.py ├── test_sklearn_gaussian_process_regressor.py ├── test_sklearn_glm_classifier_converter.py ├── test_sklearn_glm_regressor_converter.py ├── test_sklearn_gradient_boosting_converters.py ├── test_sklearn_grid_search_cv_converter.py ├── test_sklearn_imputer_converter.py ├── test_sklearn_isolation_forest.py ├── test_sklearn_k_bins_discretiser_converter.py ├── test_sklearn_k_means_converter.py ├── test_sklearn_kernel_pca_converter.py ├── test_sklearn_label_binariser_converter.py ├── test_sklearn_label_encoder_converter.py ├── test_sklearn_local_outlier_factor.py ├── test_sklearn_mlp_converter.py ├── test_sklearn_multi_output.py ├── test_sklearn_naive_bayes_converter.py ├── test_sklearn_nearest_neighbour_converter.py ├── test_sklearn_normalizer_converter.py ├── test_sklearn_one_hot_encoder_converter.py ├── test_sklearn_one_vs_one_classifier_converter.py ├── test_sklearn_one_vs_rest_classifier_converter.py ├── test_sklearn_ordinal_encoder.py ├── test_sklearn_passive_aggressive_classifier_converter.py ├── test_sklearn_pca_converter.py ├── test_sklearn_perceptron_converter.py ├── test_sklearn_pipeline.py ├── test_sklearn_pipeline_concat_tfidf.py ├── test_sklearn_pipeline_within_pipeline.py ├── test_sklearn_pls_regression.py ├── test_sklearn_polynomial_features_converter.py ├── test_sklearn_power_transformer.py ├── test_sklearn_quadratic_discriminant_analysis_converter.py ├── test_sklearn_quantile_transformer.py ├── test_sklearn_random_forest_converters.py ├── test_sklearn_random_projection.py ├── test_sklearn_random_trees_embedding.py ├── test_sklearn_replace_transformer.py ├── test_sklearn_scaler_converter.py ├── test_sklearn_sgd_classifier_converter.py ├── test_sklearn_sgd_oneclass_svm_converter.py ├── test_sklearn_stacking.py ├── test_sklearn_svm_converters.py ├── test_sklearn_target_encoder_converter.py ├── test_sklearn_text.py ├── test_sklearn_tfidf_transformer_converter.py ├── test_sklearn_tfidf_transformer_converter_sparse.py ├── test_sklearn_tfidf_vectorizer_converter.py ├── test_sklearn_tfidf_vectorizer_converter_char.py ├── test_sklearn_tfidf_vectorizer_converter_dataset.py ├── test_sklearn_tfidf_vectorizer_converter_pipeline.py ├── test_sklearn_tfidf_vectorizer_converter_regex.py ├── test_sklearn_truncated_svd.py ├── test_sklearn_tuned_threshold_classifier.py ├── test_sklearn_voting_classifier_converter.py ├── test_sklearn_voting_regressor_converter.py ├── test_sklearn_woe_transformer.py ├── test_supported_converters.py ├── test_topology_prune.py ├── test_utils │ ├── __init__.py │ ├── main.py │ ├── reference_implementation_afe.py │ ├── reference_implementation_helper.py │ ├── reference_implementation_ml.py │ ├── reference_implementation_svm.py │ ├── reference_implementation_text.py │ ├── reference_implementation_tree.py │ ├── reference_implementation_zipmap.py │ ├── tests_helper.py │ ├── utils_backend.py │ ├── utils_backend_onnx.py │ └── utils_backend_onnxruntime.py ├── test_utils_sklearn.py └── test_variable_names.py └── tests_onnxmltools ├── test_columns.py ├── test_issues_onnxmltools_2024.py ├── test_lightgbm.py └── test_xgboost_converters.py /.github/workflows/black-ruff.yml: -------------------------------------------------------------------------------- 1 | name: Black Format Checker 2 | on: [push, pull_request] 3 | jobs: 4 | black-format-check: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | - uses: psf/black@stable 9 | with: 10 | options: "--diff --check" 11 | src: "." 12 | ruff-format-check: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - uses: chartboost/ruff-action@v1 17 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ 'main' ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ 'main' ] 9 | schedule: 10 | - cron: '14 5 * * 6' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ubuntu-latest 16 | permissions: 17 | actions: read 18 | contents: read 19 | security-events: write 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | language: [ 'python' ] 25 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 26 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 27 | 28 | steps: 29 | - name: Checkout repository 30 | uses: actions/checkout@v3 31 | 32 | # Initializes the CodeQL tools for scanning. 33 | - name: Initialize CodeQL 34 | uses: github/codeql-action/init@v2 35 | with: 36 | languages: ${{ matrix.language }} 37 | # If you wish to specify custom queries, you can do so here or in a config file. 38 | # By default, queries listed here will override any specified in a config file. 39 | # Prefix the list here with "+" to use these queries and those in the config file. 40 | 41 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 42 | queries: +security-and-quality 43 | 44 | 45 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 46 | # If this step fails, then you should remove it and run the build manually (see below) 47 | - name: Autobuild 48 | uses: github/codeql-action/autobuild@v2 49 | 50 | # ℹ️ Command-line programs to run using the OS shell. 51 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 52 | 53 | # If the Autobuild fails above, remove it and uncomment the following three lines. 54 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 55 | 56 | # - run: | 57 | # echo "Run, Build Application using script" 58 | # ./location_of_script_within_repo/buildscript.sh 59 | 60 | - name: Perform CodeQL Analysis 61 | uses: github/codeql-action/analyze@v2 62 | with: 63 | category: "/language:${{matrix.language}}" 64 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | permissions: 23 | id-token: write 24 | environment: release 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Set up Python 28 | uses: actions/setup-python@v5 29 | with: 30 | python-version: '3.x' 31 | 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | pip install build 36 | 37 | - name: Build package 38 | run: python -m build 39 | 40 | - name: Publish package 41 | uses: pypa/gh-action-pypi-publish@release/v1 42 | with: 43 | attestations: true 44 | -------------------------------------------------------------------------------- /.github/workflows/wheels-any.yml: -------------------------------------------------------------------------------- 1 | name: Build Any Wheel 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - 'releases/**' 8 | 9 | jobs: 10 | build_wheels: 11 | name: Build wheels on ${{ matrix.os }} 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | os: [ubuntu-latest] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: actions/setup-python@v4 21 | with: 22 | python-version: '3.12' 23 | 24 | - name: build wheel 25 | run: python -m pip wheel . -v 26 | 27 | - name: install twine 28 | run: python -m pip install twine 29 | 30 | - name: check wheel 31 | run: python -m twine check ./skl2onnx*.whl 32 | 33 | - uses: actions/upload-artifact@v4 34 | with: 35 | path: ./skl2onnx*.whl 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Visual Studio Code files 2 | .vscode 3 | 4 | # IPython notebook checkpoints 5 | .ipynb_checkpoints 6 | 7 | # Compiled python 8 | *.pyc 9 | 10 | # setup.py intermediates 11 | .eggs 12 | *.egg-info/ 13 | dist/ 14 | build/ 15 | 16 | # PyCharm files 17 | .idea 18 | 19 | # OSX dir files 20 | .DS_Store 21 | 22 | # Windows 23 | *.bat 24 | 25 | # test generated files 26 | *.onnx 27 | *.dot* 28 | *.whl 29 | .pytest_cache 30 | .cache 31 | htmlcov 32 | coverage.xml 33 | .coverage 34 | __dump_data/ 35 | junit/ 36 | tests_dump/ 37 | skl2onnx/algebra/_cache/*.rst 38 | docs/auto_examples 39 | docs/examples/graph*.* 40 | docs/examples/*.onnx 41 | docs/examples/pipeline*.dot* 42 | docs/sg_execution_times.rst 43 | tests/TESTDUMP 44 | tests/tests_dump 45 | tests/graph.dot* 46 | docs/examples/tiny_yolov2* 47 | docs/examples/imagenet_class_index.json 48 | TESTDUMP/* 49 | htmlcov/* 50 | tests/temp_onnx_helper_load_save.onnx 51 | tests/*.new 52 | benchmarks/*.csv 53 | benchmarks/*.png 54 | tests/Operators*.md 55 | docs/examples/*.pkl 56 | tests/debug_gp.onnx 57 | tests/test*.onnx 58 | tests_onnxmltools/*.pkl 59 | tests_onnxmltools/tests/* 60 | tests_onnxmltools/tests_dump/* 61 | docs/tests/*.dot* 62 | tests/*.dot* 63 | tests/*.onnx 64 | docs/tests/*.onnx 65 | docs/examples/validator_classifier.dot.png 66 | docs/examples/validator_classifier.dot 67 | docs/examples/mixture*.* 68 | docs/examples/cast1* 69 | docs/examples/cast2* 70 | docs/auto_tutorial 71 | docs/tutorial/*.onnx 72 | docs/tutorial/*.jpg 73 | docs/tutorial/*.png 74 | docs/tutorial/*.dot 75 | docs/tutorial/catboost_info 76 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # include 2 | include *.rst 3 | recursive-include docs * 4 | recursive-include tests * 5 | include LICENSE 6 | include README.md 7 | include MANIFEST.in 8 | include requirements.txt 9 | include skl2onnx/algebra/_cache/*.rst 10 | 11 | # exclude from sdist 12 | recursive-exclude benchmarks * 13 | recursive-exclude .azure-pipelines * 14 | recursive-exclude tests/tests_dump * 15 | recursive-exclude tests_onnxmltools/tests_dump * 16 | recursive-exclude tests/test_utils/__pycache__ * 17 | recursive-exclude docs/notebooks * 18 | exclude *.onnx 19 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | sklearn-onnx 2 | Copyright (c) 2018-2023 Microsoft Corporation 3 | 4 | This product includes software developed at 5 | The LF AI & Data Foundation (https://lfaidata.foundation/). 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |

4 | 5 | [![PyPI - Version](https://img.shields.io/pypi/v/skl2onnx.svg)](https://pypi.org/project/skl2onnx) 6 | [![Linux](https://github.com/onnx/sklearn-onnx/actions/workflows/linux-ci.yml/badge.svg)](https://github.com/onnx/sklearn-onnx/actions/workflows/linux-ci.yml) 7 | [![Windows/Macos](https://github.com/onnx/sklearn-onnx/actions/workflows/windows-macos-ci.yml/badge.svg)](https://github.com/onnx/sklearn-onnx/actions/workflows/windows-macos-ci.yml) 8 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 9 | 10 | ## Introduction 11 | *sklearn-onnx* converts [scikit-learn](https://scikit-learn.org/stable/) models to [ONNX](https://github.com/onnx/onnx). 12 | Once in the ONNX format, you can use tools like [ONNX Runtime](https://github.com/Microsoft/onnxruntime) for high performance scoring. 13 | All converters are tested with [onnxruntime](https://onnxruntime.ai/). 14 | Any external converter can be registered to convert scikit-learn pipeline 15 | including models or transformers coming from external libraries. 16 | 17 | ## Documentation 18 | Full documentation including tutorials is available at [https://onnx.ai/sklearn-onnx/](https://onnx.ai/sklearn-onnx/). 19 | [Supported scikit-learn Models](https://onnx.ai/sklearn-onnx/supported.html) 20 | Last supported opset is 21. 21 | 22 | You may also find answers in [existing issues](https://github.com/onnx/sklearn-onnx/issues?utf8=%E2%9C%93&q=is%3Aissue) 23 | or submit a new one. 24 | 25 | ## Installation 26 | You can install from [PyPi](https://pypi.org/project/skl2onnx/): 27 | ``` 28 | pip install skl2onnx 29 | ``` 30 | Or you can install from the source with the latest changes. 31 | ``` 32 | pip install git+https://github.com/onnx/sklearn-onnx.git 33 | ``` 34 | 35 | ## Getting started 36 | 37 | ```python 38 | # Train a model. 39 | import numpy as np 40 | from sklearn.datasets import load_iris 41 | from sklearn.model_selection import train_test_split 42 | from sklearn.ensemble import RandomForestClassifier 43 | 44 | iris = load_iris() 45 | X, y = iris.data, iris.target 46 | X = X.astype(np.float32) 47 | X_train, X_test, y_train, y_test = train_test_split(X, y) 48 | clr = RandomForestClassifier() 49 | clr.fit(X_train, y_train) 50 | 51 | # Convert into ONNX format. 52 | from skl2onnx import to_onnx 53 | 54 | onx = to_onnx(clr, X[:1]) 55 | with open("rf_iris.onnx", "wb") as f: 56 | f.write(onx.SerializeToString()) 57 | 58 | # Compute the prediction with onnxruntime. 59 | import onnxruntime as rt 60 | 61 | sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"]) 62 | input_name = sess.get_inputs()[0].name 63 | label_name = sess.get_outputs()[0].name 64 | pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0] 65 | ``` 66 | 67 | ## Contribute 68 | We welcome contributions in the form of feedback, ideas, or code. 69 | 70 | ## License 71 | [Apache License v2.0](LICENSE) 72 | -------------------------------------------------------------------------------- /benchmarks/README.txt: -------------------------------------------------------------------------------- 1 | To run the benchmark: 2 | 3 | All benchmarks produces csv files written in subfolder *results*. 4 | Benchmark can be run the following way: 5 | 6 | :: 7 | 8 | python bench_plot_onnxruntime_linreg.py 9 | python bench_plot_onnxruntime_logreg.py 10 | python bench_plot_onnxruntime_random_forest_reg.py 11 | python bench_plot_onnxruntime_svm_reg.py 12 | 13 | In subfolder *results*, script post_graph produces 14 | graph for each of them. 15 | 16 | :: 17 | 18 | python results/post_graph.py 19 | 20 | -------------------------------------------------------------------------------- /docs/examples/Au-Salon-de-l-agriculture-la-campagne-recrute.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/docs/examples/Au-Salon-de-l-agriculture-la-campagne-recrute.jpg -------------------------------------------------------------------------------- /docs/examples/README.txt: -------------------------------------------------------------------------------- 1 | Gallery of examples 2 | =================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | -------------------------------------------------------------------------------- /docs/examples/daisy_wikipedia.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/docs/examples/daisy_wikipedia.jpg -------------------------------------------------------------------------------- /docs/examples/plot_backend.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | """ 5 | 6 | .. _l-example-backend-api: 7 | 8 | ONNX Runtime Backend for ONNX 9 | ============================= 10 | 11 | .. index:: backend 12 | 13 | *ONNX Runtime* extends the 14 | `onnx backend API `_ 16 | to run predictions using this runtime. 17 | Let's use the API to compute the prediction 18 | of a simple logistic regression model. 19 | """ 20 | 21 | import skl2onnx 22 | import onnxruntime 23 | import onnx 24 | import sklearn 25 | from sklearn.datasets import load_iris 26 | from sklearn.linear_model import LogisticRegression 27 | import numpy 28 | from onnxruntime import get_device 29 | import numpy as np 30 | import onnxruntime.backend as backend 31 | 32 | 33 | ####################################### 34 | # Let's create an ONNX graph first. 35 | 36 | data = load_iris() 37 | X, Y = data.data, data.target 38 | logreg = LogisticRegression(C=1e5).fit(X, Y) 39 | model = skl2onnx.to_onnx(logreg, X.astype(np.float32)) 40 | name = "logreg_iris.onnx" 41 | with open(name, "wb") as f: 42 | f.write(model.SerializeToString()) 43 | 44 | ####################################### 45 | # Let's use ONNX backend API to test it. 46 | 47 | model = onnx.load(name) 48 | rep = backend.prepare(model) 49 | x = np.array( 50 | [[-1.0, -2.0, 5.0, 6.0], [-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, 7.0, 8.0]], 51 | dtype=np.float32, 52 | ) 53 | label, proba = rep.run(x) 54 | print("label={}".format(label)) 55 | print("probabilities={}".format(proba)) 56 | 57 | ######################################## 58 | # The device depends on how the package was compiled, 59 | # GPU or CPU. 60 | print(get_device()) 61 | 62 | ######################################## 63 | # The backend can also directly load the model 64 | # without using *onnx*. 65 | 66 | rep = backend.prepare(name) 67 | x = np.array( 68 | [[-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, -3.0, -4.0], [-1.0, -2.0, -3.0, -4.0]], 69 | dtype=np.float32, 70 | ) 71 | label, proba = rep.run(x) 72 | print("label={}".format(label)) 73 | print("probabilities={}".format(proba)) 74 | 75 | ####################################### 76 | # The backend API is implemented by other frameworks 77 | # and makes it easier to switch between multiple runtimes 78 | # with the same API. 79 | 80 | ################################# 81 | # **Versions used for this example** 82 | 83 | print("numpy:", numpy.__version__) 84 | print("scikit-learn:", sklearn.__version__) 85 | print("onnx: ", onnx.__version__) 86 | print("onnxruntime: ", onnxruntime.__version__) 87 | print("skl2onnx: ", skl2onnx.__version__) 88 | -------------------------------------------------------------------------------- /docs/examples/plot_convert_decision_function.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | """ 5 | .. _l-rf-example-decision-function: 6 | 7 | Probabilities or raw scores 8 | =========================== 9 | 10 | A classifier usually returns a matrix of probabilities. 11 | By default, *sklearn-onnx* creates an ONNX graph 12 | which returns probabilities but it may skip that 13 | step and return raw scores if the model implements 14 | the method *decision_function*. Option ``'raw_scores'`` 15 | is used to change the default behaviour. Let's see 16 | that on a simple example. 17 | 18 | Train a model and convert it 19 | ++++++++++++++++++++++++++++ 20 | 21 | """ 22 | 23 | import numpy 24 | import sklearn 25 | from sklearn.datasets import load_iris 26 | from sklearn.model_selection import train_test_split 27 | import onnxruntime as rt 28 | import onnx 29 | import skl2onnx 30 | from skl2onnx.common.data_types import FloatTensorType 31 | from skl2onnx import convert_sklearn 32 | from sklearn.linear_model import LogisticRegression 33 | 34 | iris = load_iris() 35 | X, y = iris.data, iris.target 36 | X_train, X_test, y_train, y_test = train_test_split(X, y) 37 | clr = LogisticRegression(max_iter=500) 38 | clr.fit(X_train, y_train) 39 | print(clr) 40 | 41 | initial_type = [("float_input", FloatTensorType([None, 4]))] 42 | onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) 43 | 44 | ############################ 45 | # Output type 46 | # +++++++++++ 47 | # 48 | # Let's confirm the output type of the probabilities 49 | # is a list of dictionaries with onnxruntime. 50 | 51 | sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) 52 | res = sess.run(None, {"float_input": X_test.astype(numpy.float32)}) 53 | print("skl", clr.predict_proba(X_test[:1])) 54 | print("onnx", res[1][:2]) 55 | 56 | ################################### 57 | # Raw scores and decision_function 58 | # ++++++++++++++++++++++++++++++++ 59 | # 60 | 61 | initial_type = [("float_input", FloatTensorType([None, 4]))] 62 | options = {id(clr): {"raw_scores": True}} 63 | onx2 = convert_sklearn( 64 | clr, initial_types=initial_type, options=options, target_opset=12 65 | ) 66 | 67 | sess2 = rt.InferenceSession( 68 | onx2.SerializeToString(), providers=["CPUExecutionProvider"] 69 | ) 70 | res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)}) 71 | print("skl", clr.decision_function(X_test[:1])) 72 | print("onnx", res2[1][:2]) 73 | 74 | ################################# 75 | # **Versions used for this example** 76 | 77 | print("numpy:", numpy.__version__) 78 | print("scikit-learn:", sklearn.__version__) 79 | print("onnx: ", onnx.__version__) 80 | print("onnxruntime: ", rt.__version__) 81 | print("skl2onnx: ", skl2onnx.__version__) 82 | -------------------------------------------------------------------------------- /docs/examples/plot_convert_model.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | """ 5 | .. _l-rf-iris-example: 6 | 7 | Train, convert and predict a model 8 | ================================== 9 | 10 | Train and deploy a model usually involves the 11 | three following steps: 12 | 13 | * train a pipeline with *scikit-learn*, 14 | * convert it into *ONNX* with *sklearn-onnx*, 15 | * predict with *onnxruntime*. 16 | 17 | Train a model 18 | +++++++++++++ 19 | 20 | A very basic example using random forest and 21 | the iris dataset. 22 | """ 23 | 24 | import skl2onnx 25 | import onnx 26 | import sklearn 27 | from sklearn.linear_model import LogisticRegression 28 | import numpy 29 | import onnxruntime as rt 30 | from skl2onnx.common.data_types import FloatTensorType 31 | from skl2onnx import convert_sklearn 32 | from sklearn.datasets import load_iris 33 | from sklearn.model_selection import train_test_split 34 | from sklearn.ensemble import RandomForestClassifier 35 | 36 | iris = load_iris() 37 | X, y = iris.data, iris.target 38 | X_train, X_test, y_train, y_test = train_test_split(X, y) 39 | clr = RandomForestClassifier() 40 | clr.fit(X_train, y_train) 41 | print(clr) 42 | 43 | ########################### 44 | # Convert a model into ONNX 45 | # +++++++++++++++++++++++++ 46 | 47 | initial_type = [("float_input", FloatTensorType([None, 4]))] 48 | onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) 49 | 50 | with open("rf_iris.onnx", "wb") as f: 51 | f.write(onx.SerializeToString()) 52 | 53 | ################################### 54 | # Compute the prediction with ONNX Runtime 55 | # ++++++++++++++++++++++++++++++++++++++++ 56 | sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"]) 57 | input_name = sess.get_inputs()[0].name 58 | label_name = sess.get_outputs()[0].name 59 | pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] 60 | print(pred_onx) 61 | 62 | ####################################### 63 | # Full example with a logistic regression 64 | 65 | clr = LogisticRegression() 66 | clr.fit(X_train, y_train) 67 | initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))] 68 | onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) 69 | with open("logreg_iris.onnx", "wb") as f: 70 | f.write(onx.SerializeToString()) 71 | 72 | sess = rt.InferenceSession("logreg_iris.onnx", providers=["CPUExecutionProvider"]) 73 | input_name = sess.get_inputs()[0].name 74 | label_name = sess.get_outputs()[0].name 75 | pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] 76 | print(pred_onx) 77 | 78 | 79 | ################################# 80 | # **Versions used for this example** 81 | 82 | print("numpy:", numpy.__version__) 83 | print("scikit-learn:", sklearn.__version__) 84 | print("onnx: ", onnx.__version__) 85 | print("onnxruntime: ", rt.__version__) 86 | print("skl2onnx: ", skl2onnx.__version__) 87 | -------------------------------------------------------------------------------- /docs/examples/plot_logging.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | """ 5 | .. _l-example-logging: 6 | 7 | Logging, verbose 8 | ================ 9 | 10 | The conversion of a pipeline fails if it contains an object without any 11 | associated converter. It may also fails if one of the object is mapped 12 | by a custom converter. If the error message is not explicit enough, 13 | it is possible to enable logging. 14 | 15 | Train a model 16 | +++++++++++++ 17 | 18 | A very basic example using random forest and 19 | the iris dataset. 20 | """ 21 | 22 | import logging 23 | import numpy 24 | import onnx 25 | import onnxruntime as rt 26 | import sklearn 27 | from sklearn.datasets import load_iris 28 | from sklearn.model_selection import train_test_split 29 | from sklearn.tree import DecisionTreeClassifier 30 | from skl2onnx.common.data_types import FloatTensorType 31 | from skl2onnx import convert_sklearn 32 | import skl2onnx 33 | 34 | iris = load_iris() 35 | X, y = iris.data, iris.target 36 | X_train, X_test, y_train, y_test = train_test_split(X, y) 37 | clr = DecisionTreeClassifier() 38 | clr.fit(X_train, y_train) 39 | print(clr) 40 | 41 | ########################### 42 | # Convert a model into ONNX 43 | # +++++++++++++++++++++++++ 44 | 45 | initial_type = [("float_input", FloatTensorType([None, 4]))] 46 | onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) 47 | 48 | 49 | sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) 50 | input_name = sess.get_inputs()[0].name 51 | label_name = sess.get_outputs()[0].name 52 | pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] 53 | print(pred_onx) 54 | 55 | ######################################## 56 | # Conversion with parameter verbose 57 | # +++++++++++++++++++++++++++++++++ 58 | # 59 | # verbose is a parameter which prints messages on the standard output. 60 | # It tells which converter is called. `verbose=1` usually means what *skl2onnx* 61 | # is doing to convert a pipeline. `verbose=2+` 62 | # is reserved for information within converters. 63 | 64 | convert_sklearn(clr, initial_types=initial_type, target_opset=12, verbose=1) 65 | 66 | ######################################## 67 | # Conversion with logging 68 | # +++++++++++++++++++++++ 69 | # 70 | # This is very detailed logging. It which operators or variables 71 | # (output of converters) is processed, which node is created... 72 | # This information may be useful when a custom converter is being 73 | # implemented. 74 | 75 | logger = logging.getLogger("skl2onnx") 76 | logger.setLevel(logging.DEBUG) 77 | 78 | convert_sklearn(clr, initial_types=initial_type, target_opset=12) 79 | 80 | ########################### 81 | # And to disable it. 82 | 83 | logger.setLevel(logging.INFO) 84 | 85 | convert_sklearn(clr, initial_types=initial_type, target_opset=12) 86 | 87 | logger.setLevel(logging.WARNING) 88 | 89 | ################################# 90 | # **Versions used for this example** 91 | 92 | print("numpy:", numpy.__version__) 93 | print("scikit-learn:", sklearn.__version__) 94 | print("onnx: ", onnx.__version__) 95 | print("onnxruntime: ", rt.__version__) 96 | print("skl2onnx: ", skl2onnx.__version__) 97 | -------------------------------------------------------------------------------- /docs/examples/plot_metadata.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | """ 5 | Metadata 6 | ======== 7 | 8 | .. index:: metadata 9 | 10 | ONNX format contains metadata related to how the 11 | model was produced. It is useful when the model 12 | is deployed to production to keep track of which 13 | instance was used at a specific time. 14 | Let's see how to do that with a simple 15 | logistic regression model trained with 16 | *scikit-learn*. 17 | """ 18 | 19 | import skl2onnx 20 | import onnxruntime 21 | import sklearn 22 | import numpy 23 | from onnxruntime import InferenceSession 24 | import onnx 25 | from onnxruntime.datasets import get_example 26 | 27 | example = get_example("logreg_iris.onnx") 28 | 29 | model = onnx.load(example) 30 | 31 | print("doc_string={}".format(model.doc_string)) 32 | print("domain={}".format(model.domain)) 33 | print("ir_version={}".format(model.ir_version)) 34 | print("metadata_props={}".format(model.metadata_props)) 35 | print("model_version={}".format(model.model_version)) 36 | print("producer_name={}".format(model.producer_name)) 37 | print("producer_version={}".format(model.producer_version)) 38 | 39 | ############################# 40 | # With *ONNX Runtime*: 41 | 42 | sess = InferenceSession(example, providers=["CPUExecutionProvider"]) 43 | meta = sess.get_modelmeta() 44 | 45 | print("custom_metadata_map={}".format(meta.custom_metadata_map)) 46 | print("description={}".format(meta.description)) 47 | print("domain={}".format(meta.domain)) 48 | print("graph_name={}".format(meta.graph_name)) 49 | print("producer_name={}".format(meta.producer_name)) 50 | print("version={}".format(meta.version)) 51 | 52 | ################################# 53 | # **Versions used for this example** 54 | 55 | print("numpy:", numpy.__version__) 56 | print("scikit-learn:", sklearn.__version__) 57 | print("onnx: ", onnx.__version__) 58 | print("onnxruntime: ", onnxruntime.__version__) 59 | print("skl2onnx: ", skl2onnx.__version__) 60 | -------------------------------------------------------------------------------- /docs/examples/plot_pipeline.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | """ 5 | Draw a pipeline 6 | =============== 7 | 8 | There is no other way to look into one model stored 9 | in ONNX format than looking into its node with 10 | *onnx*. This example demonstrates 11 | how to draw a model and to retrieve it in *json* 12 | format. 13 | 14 | Retrieve a model in JSON format 15 | +++++++++++++++++++++++++++++++ 16 | 17 | That's the most simple way. 18 | """ 19 | 20 | import skl2onnx 21 | import onnxruntime 22 | import sklearn 23 | import numpy 24 | import matplotlib.pyplot as plt 25 | import os 26 | from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer 27 | from onnx import ModelProto 28 | import onnx 29 | from skl2onnx.algebra.onnx_ops import OnnxAdd, OnnxMul 30 | 31 | onnx_fct = OnnxAdd( 32 | OnnxMul("X", numpy.array([2], dtype=numpy.float32), op_version=12), 33 | numpy.array([[1, 0], [0, 1]], dtype=numpy.float32), 34 | output_names=["Y"], 35 | op_version=12, 36 | ) 37 | 38 | X = numpy.array([[4, 5], [-2, 3]], dtype=numpy.float32) 39 | model = onnx_fct.to_onnx({"X": X}, target_opset=12) 40 | print(model) 41 | 42 | filename = "example1.onnx" 43 | with open(filename, "wb") as f: 44 | f.write(model.SerializeToString()) 45 | 46 | 47 | ################################# 48 | # Draw a model with ONNX 49 | # ++++++++++++++++++++++ 50 | # We use `net_drawer.py 51 | # `_ 52 | # included in *onnx* package. 53 | # We use *onnx* to load the model 54 | # in a different way than before. 55 | 56 | 57 | model = ModelProto() 58 | with open(filename, "rb") as fid: 59 | content = fid.read() 60 | model.ParseFromString(content) 61 | 62 | ################################### 63 | # We convert it into a graph. 64 | pydot_graph = GetPydotGraph( 65 | model.graph, 66 | name=model.graph.name, 67 | rankdir="TB", 68 | node_producer=GetOpNodeProducer("docstring"), 69 | ) 70 | pydot_graph.write_dot("graph.dot") 71 | 72 | ####################################### 73 | # Then into an image 74 | os.system("dot -O -Tpng graph.dot") 75 | 76 | ################################ 77 | # Which we display... 78 | image = plt.imread("graph.dot.png") 79 | plt.imshow(image) 80 | plt.axis("off") 81 | 82 | ################################# 83 | # **Versions used for this example** 84 | 85 | print("numpy:", numpy.__version__) 86 | print("scikit-learn:", sklearn.__version__) 87 | print("onnx: ", onnx.__version__) 88 | print("onnxruntime: ", onnxruntime.__version__) 89 | print("skl2onnx: ", skl2onnx.__version__) 90 | -------------------------------------------------------------------------------- /docs/exts/github_link.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | # Source: https://github.com/scikit-learn/scikit-learn/blob/ 4 | # main/doc/sphinxext/github_link.py 5 | from operator import attrgetter 6 | import inspect 7 | import subprocess 8 | import os 9 | import sys 10 | from functools import partial 11 | 12 | REVISION_CMD = "git rev-parse --short HEAD" 13 | 14 | 15 | def _get_git_revision(): 16 | try: 17 | revision = subprocess.check_output(REVISION_CMD.split()).strip() 18 | except (subprocess.CalledProcessError, OSError): 19 | print("Failed to execute git to get revision") 20 | return None 21 | return revision.decode("utf-8") 22 | 23 | 24 | def _linkcode_resolve(domain, info, package, url_fmt, revision): 25 | """Determine a link to online source for a class/method/function 26 | This is called by sphinx.ext.linkcode 27 | An example with a long-untouched module that everyone has 28 | >>> _linkcode_resolve('py', {'module': 'tty', 29 | ... 'fullname': 'setraw'}, 30 | ... package='tty', 31 | ... url_fmt='http://hg.python.org/cpython/file/' 32 | ... '{revision}/Lib/{package}/{path}#L{lineno}', 33 | ... revision='xxxx') 34 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' 35 | """ 36 | 37 | if revision is None: 38 | return 39 | if domain not in ("py", "pyx"): 40 | return 41 | if not info.get("module") or not info.get("fullname"): 42 | return 43 | 44 | class_name = info["fullname"].split(".")[0] 45 | module = __import__(info["module"], fromlist=[class_name]) 46 | obj = attrgetter(info["fullname"])(module) 47 | 48 | # Unwrap the object to get the correct source 49 | # file in case that is wrapped by a decorator 50 | obj = inspect.unwrap(obj) 51 | 52 | try: 53 | fn = inspect.getsourcefile(obj) 54 | except Exception: 55 | fn = None 56 | if not fn: 57 | try: 58 | fn = inspect.getsourcefile(sys.modules[obj.__module__]) 59 | except Exception: 60 | fn = None 61 | if not fn: 62 | return 63 | 64 | fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__)) 65 | try: 66 | lineno = inspect.getsourcelines(obj)[1] 67 | except Exception: 68 | lineno = "" 69 | return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno) 70 | 71 | 72 | def make_linkcode_resolve(package, url_fmt): 73 | """Returns a linkcode_resolve function for the given URL format 74 | revision is a git commit reference (hash or name) 75 | package is the name of the root module of the package 76 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/' 77 | 'blob/{revision}/{package}/' 78 | '{path}#L{lineno}') 79 | """ 80 | revision = _get_git_revision() 81 | return partial( 82 | _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt 83 | ) 84 | -------------------------------------------------------------------------------- /docs/images/woe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/docs/images/woe.png -------------------------------------------------------------------------------- /docs/index_tutorial.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | Tutorial 5 | ======== 6 | 7 | .. index:: tutorial 8 | 9 | The tutorial goes from a simple example which 10 | converts a pipeline to a more complex example 11 | involving operator not actually implemented in 12 | :epkg:`ONNX operators` or :epkg:`ONNX ML operators`. 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | tutorial_1_simple 18 | tutorial_1-5_external 19 | tutorial_2_new_converter 20 | tutorial_4_advanced 21 | tutorial_2-5_extlib 22 | 23 | The tutorial was tested with following version: 24 | 25 | .. runpython:: 26 | :showcode: 27 | 28 | try: 29 | import catboost 30 | except Exception as e: 31 | print("Unable to import catboost due to", e) 32 | catboost = None 33 | import numpy 34 | import scipy 35 | import sklearn 36 | import lightgbm 37 | import onnx 38 | import onnxmltools 39 | import onnxruntime 40 | import xgboost 41 | import skl2onnx 42 | 43 | mods = [numpy, scipy, sklearn, lightgbm, xgboost, catboost, 44 | onnx, onnxmltools, onnxruntime, 45 | skl2onnx] 46 | mods = [(m.__name__, m.__version__) for m in mods if m is not None] 47 | mx = max(len(_[0]) for _ in mods) + 1 48 | for name, vers in sorted(mods): 49 | print("%s%s%s" % (name, " " * (mx - len(name)), vers)) 50 | -------------------------------------------------------------------------------- /docs/logo_main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/docs/logo_main.png -------------------------------------------------------------------------------- /docs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/docs/pipeline.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | autopep8 2 | catboost 3 | category_encoders 4 | coverage 5 | flake8 6 | furo 7 | joblib 8 | lightgbm; sys_platform != 'darwin' 9 | loky 10 | matplotlib 11 | mlinsights>=0.3.631 12 | nbsphinx 13 | onnx 14 | onnx-array-api 15 | onnxmltools 16 | onnxruntime 17 | pillow 18 | py-spy 19 | pandas 20 | pydot 21 | pyinstrument 22 | pyod 23 | pytest 24 | pytest-cov 25 | skl2onnx 26 | sphinx 27 | sphinxcontrib-blockdiag 28 | sphinx-gallery 29 | sphinx-runpython 30 | tabulate 31 | tqdm 32 | wheel 33 | xgboost 34 | -------------------------------------------------------------------------------- /docs/supported.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | ============================= 5 | Supported scikit-learn Models 6 | ============================= 7 | 8 | *skl2onnx* currently can convert the following list 9 | of models for *skl2onnx* :skl2onnxversion:`v`. They 10 | were tested using *onnxruntime* :skl2onnxversion:`rt`. 11 | All the following classes overloads the following methods 12 | such as :class:`OnnxSklearnPipeline` does. They wrap existing 13 | *scikit-learn* classes by dynamically creating a new one 14 | which inherits from :class:`OnnxOperatorMixin` which 15 | implements *to_onnx* methods. 16 | 17 | .. _l-converter-list: 18 | 19 | Covered Converters 20 | ================== 21 | 22 | .. covered-sklearn-ops:: 23 | 24 | Converters Documentation 25 | ======================== 26 | 27 | .. supported-sklearn-ops:: 28 | 29 | Pipeline 30 | ======== 31 | 32 | .. autoclass:: skl2onnx.algebra.sklearn_ops.OnnxSklearnPipeline 33 | :members: to_onnx, to_onnx_operator, onnx_parser, onnx_shape_calculator, onnx_converter 34 | 35 | .. autoclass:: skl2onnx.algebra.sklearn_ops.OnnxSklearnColumnTransformer 36 | :members: to_onnx, to_onnx_operator, onnx_parser, onnx_shape_calculator, onnx_converter 37 | 38 | .. autoclass:: skl2onnx.algebra.sklearn_ops.OnnxSklearnFeatureUnion 39 | :members: to_onnx, to_onnx_operator, onnx_parser, onnx_shape_calculator, onnx_converter 40 | 41 | Available ONNX operators 42 | ======================== 43 | 44 | *skl2onnx* maps every ONNX operators into a class 45 | easy to insert into a graph. These operators get 46 | dynamically added and the list depends on the installed 47 | *ONNX* package. The documentation for these operators 48 | can be found on github: `ONNX Operators.md 49 | `_ 50 | and `ONNX-ML Operators 51 | `_. 52 | Associated to `onnxruntime `_, 53 | the mapping makes it easier to easily check the output 54 | of the *ONNX* operators on any data as shown 55 | in example :ref:`l-onnx-operators`. 56 | 57 | .. supported-onnx-ops:: 58 | -------------------------------------------------------------------------------- /docs/tests/test_utils_benchmark.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | @brief test log(time=3s) 5 | """ 6 | 7 | import unittest 8 | import numpy 9 | from skl2onnx.tutorial import measure_time 10 | 11 | 12 | class TestMeasureTime(unittest.TestCase): 13 | def test_vector_count(self): 14 | def fct(): 15 | X = numpy.ones((1000, 5)) 16 | return X 17 | 18 | res = measure_time("fct", context={"fct": fct}, div_by_number=False, number=100) 19 | self.assertIn("average", res) 20 | res = measure_time("fct", context={"fct": fct}, div_by_number=True, number=100) 21 | self.assertIn("average", res) 22 | res = measure_time("fct", context={"fct": fct}, div_by_number=True, number=1000) 23 | self.assertIn("average", res) 24 | 25 | 26 | if __name__ == "__main__": 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /docs/tests/test_utils_classes.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | @brief test log(time=3s) 5 | """ 6 | 7 | import unittest 8 | from skl2onnx.tutorial.imagenet_classes import class_names 9 | 10 | 11 | class TestUtilsClasses(unittest.TestCase): 12 | def test_classes(self): 13 | cl = class_names 14 | self.assertIsInstance(cl, dict) 15 | self.assertEqual(len(cl), 1000) 16 | 17 | 18 | if __name__ == "__main__": 19 | unittest.main() 20 | -------------------------------------------------------------------------------- /docs/tutorial/README.txt: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | -------------------------------------------------------------------------------- /docs/tutorial/plot_ngrams.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | .. _example-ngrams: 5 | 6 | Tricky issue when converting CountVectorizer or TfidfVectorizer 7 | =============================================================== 8 | 9 | This issue is described at `scikit-learn/issues/13733 10 | `_. 11 | If a CountVectorizer or a TfidfVectorizer produces a token with a space, 12 | skl2onnx cannot know if it a bi-grams or a unigram with a space. 13 | 14 | A simple example impossible to convert 15 | ++++++++++++++++++++++++++++++++++++++ 16 | """ 17 | 18 | import pprint 19 | import numpy 20 | from numpy.testing import assert_almost_equal 21 | from onnxruntime import InferenceSession 22 | from sklearn.feature_extraction.text import TfidfVectorizer 23 | from skl2onnx import to_onnx 24 | from skl2onnx.sklapi import TraceableTfidfVectorizer 25 | import skl2onnx.sklapi.register # noqa: F401 26 | 27 | corpus = numpy.array( 28 | [ 29 | "This is the first document.", 30 | "This document is the second document.", 31 | "Is this the first document?", 32 | "", 33 | ] 34 | ).reshape((4,)) 35 | 36 | pattern = r"\b[a-z ]{1,10}\b" 37 | mod1 = TfidfVectorizer(ngram_range=(1, 2), token_pattern=pattern) 38 | mod1.fit(corpus) 39 | 40 | 41 | ###################################### 42 | # Unigrams and bi-grams are placed into the following container 43 | # which maps it to its column index. 44 | 45 | pprint.pprint(mod1.vocabulary_) 46 | 47 | 48 | #################################### 49 | # Conversion. 50 | 51 | try: 52 | to_onnx(mod1, corpus) 53 | except RuntimeError as e: 54 | print(e) 55 | 56 | 57 | ####################################### 58 | # TraceableTfidfVectorizer 59 | # ++++++++++++++++++++++++ 60 | # 61 | # Class :class:`TraceableTfidfVectorizer` is equivalent to 62 | # :class:`sklearn.feature_extraction.text.TfidfVectorizer` 63 | # but stores the unigrams and bi-grams of the vocabulary with tuple 64 | # instead of concatenating every piece into a string. 65 | 66 | 67 | mod2 = TraceableTfidfVectorizer(ngram_range=(1, 2), token_pattern=pattern) 68 | mod2.fit(corpus) 69 | 70 | pprint.pprint(mod2.vocabulary_) 71 | 72 | ####################################### 73 | # Let's check it produces the same results. 74 | 75 | assert_almost_equal(mod1.transform(corpus).todense(), mod2.transform(corpus).todense()) 76 | 77 | #################################### 78 | # Conversion. Line `import skl2onnx.sklapi.register` 79 | # was added to register the converters associated to these 80 | # new class. By default, only converters for scikit-learn are 81 | # declared. 82 | 83 | onx = to_onnx(mod2, corpus) 84 | sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) 85 | got = sess.run(None, {"X": corpus}) 86 | 87 | ################################### 88 | # Let's check if there are discrepancies... 89 | 90 | assert_almost_equal(mod2.transform(corpus).todense(), got[0]) 91 | -------------------------------------------------------------------------------- /docs/tutorial_1-5_external.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | Using converters from other libraries 4 | ===================================== 5 | 6 | Before starting writing our own converter, 7 | we can use some available in other libraries 8 | than :epkg:`sklearn-onnx`. :epkg:`onnxmltools` implements 9 | converters for :epkg:`xgboost` and :epkg:`LightGBM`. 10 | Following examples show how to use the conveter when the 11 | model are part of a pipeline. 12 | 13 | .. toctree:: 14 | :maxdepth: 1 15 | 16 | auto_tutorial/plot_gexternal_lightgbm 17 | auto_tutorial/plot_gexternal_lightgbm_reg 18 | auto_tutorial/plot_gexternal_xgboost 19 | auto_tutorial/plot_gexternal_catboost 20 | -------------------------------------------------------------------------------- /docs/tutorial_1_simple.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | The easy case 5 | ============= 6 | 7 | The easy case is when the machine learned model 8 | can be converter into ONNX with a converting library 9 | without writing any specific code. That means that a converter 10 | exists for the model or each piece of the model, 11 | the converter produces an ONNX graph where every node 12 | is part of the existing ONNX specifications, and the runtime 13 | used to compute the predictions implements every node 14 | used in the ONNX graph. 15 | 16 | .. toctree:: 17 | :maxdepth: 1 18 | 19 | auto_tutorial/plot_abegin_convert_pipeline 20 | auto_tutorial/plot_bbegin_measure_time 21 | auto_tutorial/plot_cbegin_opset 22 | auto_tutorial/plot_dbegin_options 23 | auto_tutorial/plot_dbegin_options_zipmap 24 | auto_tutorial/plot_dbegin_options_list 25 | auto_tutorial/plot_ebegin_float_double 26 | auto_tutorial/plot_fbegin_investigate 27 | auto_tutorial/plot_gbegin_cst 28 | auto_tutorial/plot_gbegin_dataframe 29 | auto_tutorial/plot_gconverting 30 | -------------------------------------------------------------------------------- /docs/tutorial_2-5_extlib.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | Write converters for other libraries 4 | ==================================== 5 | 6 | *sklearn-onnx* only converts models from *scikit-learn*. It 7 | implements a mechanism to register converters from other libraries. 8 | Converters for models from other libraries will not be added to 9 | *sklearn-onnx*. Every library has its own maintenance cycle and 10 | it would become difficult to maintain a package having too many 11 | dependencies. Following examples were added to show how to 12 | develop converters for new libraries. 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | auto_tutorial/plot_wext_pyod_forest 18 | -------------------------------------------------------------------------------- /docs/tutorial_2_new_converter.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | A custom converter for a custom model 4 | ===================================== 5 | 6 | When :epkg:`sklearn-onnx` converts a :epkg:`scikit-learn` 7 | pipeline, it looks into every transformer and predictor 8 | and fetches the associated converter. The resulting 9 | ONNX graph combines the outcome of every converter 10 | in a single graph. If a model does not have its converter, 11 | it displays an error message telling it misses a converter. 12 | 13 | .. runpython:: 14 | :showcode: 15 | 16 | import numpy 17 | from sklearn.linear_model import LogisticRegression 18 | from skl2onnx import to_onnx 19 | 20 | 21 | class MyLogisticRegression(LogisticRegression): 22 | pass 23 | 24 | 25 | X = numpy.array([[0, 0.1]]) 26 | try: 27 | to_onnx(MyLogisticRegression(), X) 28 | except Exception as e: 29 | print(e) 30 | 31 | Following sections show how to create a custom converter. 32 | It assumes this new converter is not meant to be added to 33 | this package but only to be registered and used when converting 34 | a pipeline. To to contribute and add a converter 35 | for a :epkg:`scikit-learn` model, the logic is still the same, 36 | only the converter registration changes. `PR 737 37 | `_ can be used as 38 | an example. 39 | 40 | .. toctree:: 41 | :maxdepth: 1 42 | 43 | auto_tutorial/plot_icustom_converter 44 | auto_tutorial/plot_jcustom_syntax 45 | auto_tutorial/plot_jfunction_transformer 46 | auto_tutorial/plot_kcustom_converter_wrapper 47 | auto_tutorial/plot_lcustom_options 48 | auto_tutorial/plot_mcustom_parser 49 | -------------------------------------------------------------------------------- /docs/tutorial_4_advanced.rst: -------------------------------------------------------------------------------- 1 | .. SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | Advanced scenarios 5 | ================== 6 | 7 | Unexpected discrepencies may appear. This a list of examples 8 | with issues and resolved issues. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | auto_tutorial/plot_ngrams 14 | auto_tutorial/plot_usparse_xgboost 15 | auto_tutorial/plot_woe_transformer 16 | auto_tutorial/plot_output_onnx_single_probability 17 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # tests 2 | black 3 | jinja2 4 | onnxruntime-extensions 5 | onnxscript 6 | pandas 7 | py-cpuinfo 8 | pybind11 9 | pytest 10 | pytest-cov 11 | ruff 12 | wheel 13 | 14 | # docs/examples 15 | xgboost 16 | lightgbm; sys_platform != 'darwin' 17 | matplotlib 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | onnx>=1.2.1 2 | scikit-learn>=1.1 3 | -------------------------------------------------------------------------------- /skl2onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Main entry point to the converter from the *scikit-learn* to *onnx*. 5 | """ 6 | 7 | __version__ = "1.19.1" 8 | __author__ = "Microsoft" 9 | __producer__ = "skl2onnx" 10 | __producer_version__ = __version__ 11 | __domain__ = "ai.onnx" 12 | __model_version__ = 0 13 | __max_supported_opset__ = 21 # Converters are tested up to this version. 14 | 15 | 16 | from .convert import convert_sklearn, to_onnx, wrap_as_onnx_mixin 17 | from ._supported_operators import update_registered_converter, get_model_alias 18 | from ._parse import update_registered_parser 19 | from .proto import get_latest_tested_opset_version 20 | 21 | 22 | def supported_converters(from_sklearn=False): 23 | """ 24 | Returns the list of supported converters. 25 | To find the converter associated to a specific model, 26 | the library gets the name of the model class, 27 | adds ``'Sklearn'`` as a prefix and retrieves 28 | the associated converter if available. 29 | 30 | :param from_sklearn: every supported model is mapped to converter 31 | by a name prefixed with ``'Sklearn'``, the prefix is removed 32 | if this parameter is False but the function only returns converters 33 | whose name is prefixed by ``'Sklearn'`` 34 | :return: list of supported models as string 35 | """ 36 | from .common._registration import _converter_pool 37 | 38 | # The two following lines populates the list of supported converters. 39 | from . import shape_calculators 40 | from . import operator_converters 41 | 42 | names = sorted(_converter_pool.keys()) 43 | if from_sklearn: 44 | return [_[7:] for _ in names if _.startswith("Sklearn")] 45 | return list(names) 46 | -------------------------------------------------------------------------------- /skl2onnx/__main__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import sys 3 | from textwrap import dedent 4 | 5 | 6 | def _help(): 7 | print( 8 | dedent( 9 | """ 10 | python -m skl2onnx [command] 11 | 12 | command is: 13 | 14 | setup generate rst documentation for every ONNX operator 15 | before building the package""" 16 | ) 17 | ) 18 | 19 | 20 | def _setup(): 21 | from skl2onnx.algebra.onnx_ops import dynamic_class_creation 22 | 23 | dynamic_class_creation(True) 24 | 25 | 26 | def main(argv): 27 | if len(argv) <= 1 or "--help" in argv: 28 | _help() 29 | return 30 | 31 | if "setup" in argv: 32 | print("generate rst documentation for every ONNX operator") 33 | _setup() 34 | return 35 | 36 | _help() 37 | 38 | 39 | if __name__ == "__main__": 40 | main(sys.argv) 41 | -------------------------------------------------------------------------------- /skl2onnx/algebra/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from .onnx_operator import OnnxOperator 5 | -------------------------------------------------------------------------------- /skl2onnx/algebra/custom_ops.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from .onnx_operator import OnnxOperator 4 | 5 | 6 | class OnnxCDist(OnnxOperator): 7 | """ 8 | Defines a custom operator not defined by ONNX 9 | specifications but in onnxruntime. 10 | """ 11 | 12 | since_version = 1 13 | expected_inputs = [("X", "T"), ("Y", "T")] 14 | expected_outputs = [("dist", "T")] 15 | input_range = [2, 2] 16 | output_range = [1, 1] 17 | is_deprecated = False 18 | domain = "com.microsoft" 19 | operator_name = "CDist" 20 | past_version = {} 21 | 22 | def __init__(self, X, Y, metric="sqeuclidean", op_version=None, **kwargs): 23 | """ 24 | :param X: array or OnnxOperatorMixin 25 | :param Y: array or OnnxOperatorMixin 26 | :param metric: distance type 27 | :param dtype: *np.float32* or *np.float64* 28 | :param op_version: opset version 29 | :param kwargs: addition parameter 30 | """ 31 | OnnxOperator.__init__( 32 | self, X, Y, metric=metric, op_version=op_version, **kwargs 33 | ) 34 | 35 | 36 | class OnnxSolve(OnnxOperator): 37 | """ 38 | Defines a custom operator not defined by ONNX 39 | specifications but in onnxruntime. 40 | """ 41 | 42 | since_version = 1 43 | expected_inputs = [("A", "T"), ("Y", "T")] 44 | expected_outputs = [("X", "T")] 45 | input_range = [2, 2] 46 | output_range = [1, 1] 47 | is_deprecated = False 48 | domain = "com.microsoft" 49 | operator_name = "Solve" 50 | past_version = {} 51 | 52 | def __init__(self, A, Y, lower=False, transposed=False, op_version=None, **kwargs): 53 | """ 54 | :param A: array or OnnxOperatorMixin 55 | :param Y: array or OnnxOperatorMixin 56 | :param lower: see :epkg:`solve` 57 | :param transposed: see :epkg:`solve` 58 | :param dtype: *np.float32* or *np.float64* 59 | :param op_version: opset version 60 | :param kwargs: additional parameters 61 | """ 62 | OnnxOperator.__init__( 63 | self, 64 | A, 65 | Y, 66 | lower=lower, 67 | transposed=transposed, 68 | op_version=op_version, 69 | **kwargs, 70 | ) 71 | -------------------------------------------------------------------------------- /skl2onnx/algebra/onnx_subgraph_operator_mixin.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from .onnx_operator_mixin import OnnxOperatorMixin 4 | 5 | 6 | class OnnxSubGraphOperatorMixin(OnnxOperatorMixin): 7 | """ 8 | :class:`OnnxOperatorMixin` for converters. 9 | """ 10 | -------------------------------------------------------------------------------- /skl2onnx/common/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from .exceptions import MissingShapeCalculator, MissingConverter 4 | -------------------------------------------------------------------------------- /skl2onnx/common/exceptions.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Common errors. 5 | """ 6 | 7 | _missing_converter = """ 8 | It usually means the pipeline being converted contains a 9 | transformer or a predictor with no corresponding converter 10 | implemented in sklearn-onnx. If the converted is implemented 11 | in another library, you need to register 12 | the converted so that it can be used by sklearn-onnx (function 13 | update_registered_converter). If the model is not yet covered 14 | by sklearn-onnx, you may raise an issue to 15 | https://github.com/onnx/sklearn-onnx/issues 16 | to get the converter implemented or even contribute to the 17 | project. If the model is a custom model, a new converter must 18 | be implemented. Examples can be found in the gallery. 19 | """ 20 | 21 | 22 | class MissingShapeCalculator(RuntimeError): 23 | """ 24 | Raised when there is no registered shape calculator 25 | for a machine learning operator. 26 | """ 27 | 28 | def __init__(self, msg): 29 | super().__init__(msg + _missing_converter) 30 | 31 | 32 | class MissingConverter(RuntimeError): 33 | """ 34 | Raised when there is no registered converter 35 | for a machine learning operator. If the model is 36 | part of scikit-learn, you may raise an issue at 37 | https://github.com/onnx/sklearn-onnx/issues. 38 | """ 39 | 40 | def __init__(self, msg): 41 | super().__init__(msg + _missing_converter) 42 | -------------------------------------------------------------------------------- /skl2onnx/common/utils_checking.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from inspect import signature 5 | from collections import OrderedDict 6 | 7 | 8 | def check_signature(fct, reference, skip=None): 9 | """ 10 | Checks that two functions have the same signature 11 | (same parameter names). 12 | Raises an exception otherwise. 13 | """ 14 | 15 | def select_parameters(pars): 16 | new_pars = OrderedDict() 17 | for i, (name, p) in enumerate(pars.items()): 18 | if ( 19 | i >= 3 20 | and name in ("op_type", "op_domain", "op_version") 21 | and p.default is not None 22 | ): 23 | # Parameters op_type and op_domain are skipped. 24 | continue 25 | new_pars[name] = p 26 | return new_pars 27 | 28 | sig = signature(fct) 29 | sig_ref = signature(reference) 30 | fct_pars = select_parameters(sig.parameters) 31 | ref_pars = select_parameters(sig_ref.parameters) 32 | if len(fct_pars) != len(ref_pars): 33 | raise TypeError( 34 | "Function '{}' must have {} parameters but has {}." 35 | "".format(fct.__name__, len(ref_pars), len(fct_pars)) 36 | ) 37 | for i, (a, b) in enumerate(zip(fct_pars, ref_pars)): 38 | if a != b and skip is not None and b not in skip and a not in skip: 39 | raise NameError( 40 | "Parameter name mismatch at position {}." 41 | "Function '{}' has '{}' but '{}' is expected." 42 | "".format(i + 1, fct.__name__, a, b) 43 | ) 44 | -------------------------------------------------------------------------------- /skl2onnx/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from .investigate import collect_intermediate_steps, compare_objects 5 | from .investigate import enumerate_pipeline_models 6 | from .integration import add_onnx_graph 7 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/array_feature_extractor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..proto import onnx_proto 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_array_feature_extractor( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | """ 14 | Extracts a subset of columns. This is used by *ColumnTransformer*. 15 | """ 16 | column_indices_name = scope.get_unique_variable_name("column_indices") 17 | 18 | for i, ind in enumerate(operator.column_indices): 19 | assert isinstance(ind, int), ( 20 | "Column {0}:'{1}' indices must be specified " 21 | "as integers. This error may happen when " 22 | "column names are used to define a " 23 | "ColumnTransformer. Column name in input data " 24 | "do not necessarily match input variables " 25 | "defined for the ONNX model." 26 | ).format(i, ind) 27 | container.add_initializer( 28 | column_indices_name, 29 | onnx_proto.TensorProto.INT64, 30 | [len(operator.column_indices)], 31 | operator.column_indices, 32 | ) 33 | 34 | container.add_node( 35 | "ArrayFeatureExtractor", 36 | [operator.inputs[0].full_name, column_indices_name], 37 | operator.outputs[0].full_name, 38 | name=scope.get_unique_operator_name("ArrayFeatureExtractor"), 39 | op_domain="ai.onnx.ml", 40 | ) 41 | 42 | 43 | register_converter( 44 | "SklearnArrayFeatureExtractor", convert_sklearn_array_feature_extractor 45 | ) 46 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/binariser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..proto import onnx_proto 5 | from ..common.data_types import DoubleTensorType 6 | from ..common._registration import register_converter 7 | from ..common._topology import Scope, Operator 8 | from ..common._container import ModelComponentContainer 9 | from .common import concatenate_variables 10 | 11 | 12 | def convert_sklearn_binarizer( 13 | scope: Scope, operator: Operator, container: ModelComponentContainer 14 | ): 15 | feature_name = concatenate_variables(scope, operator.inputs, container) 16 | 17 | if isinstance(operator.inputs[0].type, DoubleTensorType): 18 | name0 = scope.get_unique_variable_name("cst0") 19 | name1 = scope.get_unique_variable_name("cst1") 20 | thres = scope.get_unique_variable_name("th") 21 | container.add_initializer(name0, onnx_proto.TensorProto.DOUBLE, [], [0.0]) 22 | container.add_initializer(name1, onnx_proto.TensorProto.DOUBLE, [], [1.0]) 23 | container.add_initializer( 24 | thres, 25 | onnx_proto.TensorProto.DOUBLE, 26 | [], 27 | [float(operator.raw_operator.threshold)], 28 | ) 29 | binbool = scope.get_unique_variable_name("binbool") 30 | container.add_node( 31 | "Less", 32 | [feature_name, thres], 33 | binbool, 34 | name=scope.get_unique_operator_name("Less"), 35 | ) 36 | container.add_node( 37 | "Where", [binbool, name0, name1], operator.output_full_names, name="Where" 38 | ) 39 | return 40 | 41 | op_type = "Binarizer" 42 | attrs = { 43 | "name": scope.get_unique_operator_name(op_type), 44 | "threshold": float(operator.raw_operator.threshold), 45 | } 46 | container.add_node( 47 | op_type, 48 | feature_name, 49 | operator.output_full_names, 50 | op_domain="ai.onnx.ml", 51 | **attrs, 52 | ) 53 | 54 | 55 | register_converter("SklearnBinarizer", convert_sklearn_binarizer) 56 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/cast_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._apply_operation import apply_cast 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | from .._supported_operators import sklearn_operator_name_map 9 | 10 | 11 | def convert_sklearn_cast( 12 | scope: Scope, operator: Operator, container: ModelComponentContainer 13 | ): 14 | inp = operator.inputs[0] 15 | exptype = operator.outputs[0] 16 | res = exptype.type.to_onnx_type() 17 | et = res.tensor_type.elem_type 18 | apply_cast(scope, inp.full_name, exptype.full_name, container, to=et) 19 | 20 | 21 | def convert_sklearn_cast_regressor( 22 | scope: Scope, operator: Operator, container: ModelComponentContainer 23 | ): 24 | op = operator.raw_operator 25 | estimator = op.estimator 26 | 27 | op_type = sklearn_operator_name_map[type(estimator)] 28 | this_operator = scope.declare_local_operator(op_type, estimator) 29 | this_operator.inputs = operator.inputs 30 | 31 | cls = operator.inputs[0].type.__class__ 32 | var_name = scope.declare_local_variable("cast_est", cls()) 33 | this_operator.outputs.append(var_name) 34 | var_name = var_name.onnx_name 35 | 36 | exptype = operator.outputs[0] 37 | res = exptype.type.to_onnx_type() 38 | et = res.tensor_type.elem_type 39 | apply_cast(scope, var_name, exptype.full_name, container, to=et) 40 | 41 | 42 | register_converter("SklearnCastTransformer", convert_sklearn_cast) 43 | register_converter("SklearnCastRegressor", convert_sklearn_cast_regressor) 44 | register_converter("SklearnCast", convert_sklearn_cast) 45 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/common.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._apply_operation import apply_cast 5 | from ..common.data_types import ( 6 | Int64TensorType, 7 | FloatTensorType, 8 | DoubleTensorType, 9 | StringTensorType, 10 | guess_proto_type, 11 | ) 12 | 13 | 14 | def concatenate_variables(scope, variables, container, main_type=None): 15 | """ 16 | This function allocate operators to from a float tensor by concatenating 17 | all input variables. Notice that if all integer inputs would be converted 18 | to floats before concatenation. 19 | """ 20 | if main_type is None: 21 | main_type = variables[0].type.__class__ 22 | 23 | # Check if it's possible to concatenate those inputs. 24 | type_set = {type(variable.type) for variable in variables} 25 | number_type_set = { 26 | FloatTensorType, 27 | Int64TensorType, 28 | DoubleTensorType, 29 | StringTensorType, 30 | } 31 | if any(itype not in number_type_set for itype in type_set): 32 | raise RuntimeError( 33 | "Numerical tensor(s) and string tensor(s) cannot be concatenated." 34 | ) 35 | # input variables' names we want to concatenate 36 | input_names = [] 37 | # dimensions of the variables that is going to be concatenated 38 | input_dims = [] 39 | 40 | # Collect input variable names and do cast if needed 41 | for variable in variables: 42 | if not isinstance(variable.type, main_type): 43 | proto_type = guess_proto_type(main_type()) 44 | new_name = scope.get_unique_variable_name("cast") 45 | apply_cast(scope, variable.full_name, new_name, container, to=proto_type) 46 | input_names.append(new_name) 47 | else: 48 | input_names.append(variable.full_name) 49 | # We assume input variables' shape are [1, C_1], ..., [1, C_n], 50 | # if there are n inputs. 51 | input_dims.append(variable.type.shape[1]) 52 | 53 | if len(input_names) == 1: 54 | # No need to concatenate tensors if there is only one input 55 | return input_names[0] 56 | 57 | # To combine all inputs, we need a FeatureVectorizer 58 | op_type = "FeatureVectorizer" 59 | attrs = { 60 | "name": scope.get_unique_operator_name(op_type), 61 | "inputdimensions": input_dims, 62 | } 63 | # Create a variable name to capture feature vectorizer's output 64 | # Set up our FeatureVectorizer 65 | concatenated_name = scope.get_unique_variable_name("concatenated") 66 | container.add_node( 67 | op_type, input_names, concatenated_name, op_domain="ai.onnx.ml", **attrs 68 | ) 69 | if main_type == FloatTensorType: 70 | return concatenated_name 71 | # Cast output as FeatureVectorizer always produces float32. 72 | concatenated_name_cast = scope.get_unique_variable_name("concatenated_cast") 73 | container.add_node( 74 | "CastLike", [concatenated_name, input_names[0]], concatenated_name_cast 75 | ) 76 | 77 | return concatenated_name_cast 78 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/concat_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._apply_operation import apply_concat, apply_cast 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_concat( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | exptype = operator.outputs[0].type 14 | new_inputs = [] 15 | for inp in operator.inputs: 16 | if inp.type.__class__ is exptype.__class__: 17 | new_inputs.append(inp.full_name) 18 | continue 19 | name = scope.get_unique_variable_name("{}_cast".format(inp.full_name)) 20 | res = exptype.to_onnx_type() 21 | et = res.tensor_type.elem_type 22 | apply_cast(scope, inp.full_name, name, container, to=et) 23 | new_inputs.append(name) 24 | 25 | apply_concat(scope, new_inputs, operator.outputs[0].full_name, container, axis=1) 26 | 27 | 28 | register_converter("SklearnConcat", convert_sklearn_concat) 29 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/cross_decomposition.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from ..proto import onnx_proto 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | from ..common.data_types import Int64TensorType, guess_numpy_type, guess_proto_type 9 | from ..algebra.onnx_ops import OnnxAdd, OnnxCast, OnnxDiv, OnnxMatMul, OnnxSub 10 | 11 | 12 | def _skl150() -> bool: 13 | import sklearn 14 | import packaging.version as pv 15 | 16 | return pv.Version(sklearn.__version__) >= pv.Version("1.5.0") 17 | 18 | 19 | def convert_pls_regression( 20 | scope: Scope, operator: Operator, container: ModelComponentContainer 21 | ): 22 | X = operator.inputs[0] 23 | op = operator.raw_operator 24 | opv = container.target_opset 25 | dtype = guess_numpy_type(X.type) 26 | if dtype != np.float64: 27 | dtype = np.float32 28 | proto_dtype = guess_proto_type(operator.inputs[0].type) 29 | if proto_dtype != onnx_proto.TensorProto.DOUBLE: 30 | proto_dtype = onnx_proto.TensorProto.FLOAT 31 | 32 | if isinstance(X.type, Int64TensorType): 33 | X = OnnxCast(X, to=proto_dtype, op_version=opv) 34 | 35 | coefs = op.x_mean_ if hasattr(op, "x_mean_") else op._x_mean 36 | std = op.x_std_ if hasattr(op, "x_std_") else op._x_std 37 | if hasattr(op, "intercept_") and _skl150(): 38 | # scikit-learn==1.5.0 39 | # https://github.com/scikit-learn/scikit-learn/pull/28612 40 | ym = op.intercept_ 41 | centered_x = OnnxSub(X, coefs.astype(dtype), op_version=opv) 42 | coefs = op.coef_.T.astype(dtype) 43 | dot = OnnxMatMul(centered_x, coefs, op_version=opv) 44 | else: 45 | ym = op.y_mean_ if hasattr(op, "x_mean_") else op._y_mean 46 | 47 | norm_x = OnnxDiv( 48 | OnnxSub(X, coefs.astype(dtype), op_version=opv), 49 | std.astype(dtype), 50 | op_version=opv, 51 | ) 52 | if hasattr(op, "set_predict_request"): 53 | # new in 1.3 54 | coefs = op.coef_.T.astype(dtype) 55 | else: 56 | coefs = op.coef_.astype(dtype) 57 | dot = OnnxMatMul(norm_x, coefs, op_version=opv) 58 | 59 | pred = OnnxAdd(dot, ym.astype(dtype), op_version=opv, output_names=operator.outputs) 60 | pred.add_to(scope, container) 61 | 62 | 63 | register_converter("SklearnPLSRegression", convert_pls_regression) 64 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/dict_vectoriser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import numbers 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_dict_vectorizer( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | """ 14 | When a *DictVectorizer* converts numbers into strings, 15 | scikit-learn adds a separator to disambiguate strings 16 | and still outputs floats. The method *predict* 17 | contains the following lines: 18 | 19 | :: 20 | 21 | if isinstance(v, str): 22 | f = "%s%s%s" % (f, self.separator, v) 23 | v = 1 24 | 25 | This cannot be implemented in ONNX. The converter 26 | raises an exception in that case. 27 | """ 28 | op_type = "DictVectorizer" 29 | op = operator.raw_operator 30 | attrs = {"name": scope.get_unique_operator_name(op_type)} 31 | if all(isinstance(feature_name, str) for feature_name in op.feature_names_): 32 | # all strings, scikit-learn does the following: 33 | new_cats = [] 34 | unique_cats = set() 35 | nbsep = 0 36 | for i in op.feature_names_: 37 | if op.separator in i: 38 | nbsep += 1 39 | if i in unique_cats: 40 | raise RuntimeError("Duplicated category '{}'.".format(i)) 41 | unique_cats.add(i) 42 | new_cats.append(i) 43 | if nbsep >= len(new_cats): 44 | raise RuntimeError( 45 | "All categories contain a separator '{}'. " 46 | "This case is not supported by the converter. " 47 | "The mapping must map to numbers not string.".format(op.separator) 48 | ) 49 | attrs["string_vocabulary"] = new_cats 50 | elif all( 51 | isinstance(feature_name, numbers.Integral) for feature_name in op.feature_names_ 52 | ): 53 | attrs["int64_vocabulary"] = list( # noqa: C400 54 | int(i) for i in op.feature_names_ 55 | ) 56 | else: 57 | raise ValueError("Keys must be all integers or all strings.") 58 | 59 | container.add_node( 60 | op_type, 61 | operator.input_full_names, 62 | operator.output_full_names, 63 | op_domain="ai.onnx.ml", 64 | **attrs, 65 | ) 66 | 67 | 68 | register_converter("SklearnDictVectorizer", convert_sklearn_dict_vectorizer) 69 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/feature_selection.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..proto import onnx_proto 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_feature_selection( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | op = operator.raw_operator 14 | # Get indices of the features selected 15 | index = op.get_support(indices=True) 16 | if len(index) == 0: 17 | raise RuntimeError( 18 | "Model '{}' did not select any feature. " 19 | "This model cannot be converted into ONNX." 20 | "".format(op.__class__.__name__) 21 | ) 22 | output_name = operator.outputs[0].full_name 23 | if index.any(): 24 | column_indices_name = scope.get_unique_variable_name("column_indices") 25 | 26 | container.add_initializer( 27 | column_indices_name, onnx_proto.TensorProto.INT64, [len(index)], index 28 | ) 29 | 30 | container.add_node( 31 | "ArrayFeatureExtractor", 32 | [operator.inputs[0].full_name, column_indices_name], 33 | output_name, 34 | op_domain="ai.onnx.ml", 35 | name=scope.get_unique_operator_name("ArrayFeatureExtractor"), 36 | ) 37 | else: 38 | container.add_node( 39 | "ConstantOfShape", operator.inputs[0].full_name, output_name, op_version=9 40 | ) 41 | 42 | 43 | register_converter("SklearnGenericUnivariateSelect", convert_sklearn_feature_selection) 44 | register_converter("SklearnRFE", convert_sklearn_feature_selection) 45 | register_converter("SklearnRFECV", convert_sklearn_feature_selection) 46 | register_converter("SklearnSelectFdr", convert_sklearn_feature_selection) 47 | register_converter("SklearnSelectFpr", convert_sklearn_feature_selection) 48 | register_converter("SklearnSelectFromModel", convert_sklearn_feature_selection) 49 | register_converter("SklearnSelectFwe", convert_sklearn_feature_selection) 50 | register_converter("SklearnSelectKBest", convert_sklearn_feature_selection) 51 | register_converter("SklearnSelectPercentile", convert_sklearn_feature_selection) 52 | register_converter("SklearnVarianceThreshold", convert_sklearn_feature_selection) 53 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/flatten_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_converter 4 | from ..common._topology import Scope, Operator 5 | from ..common._container import ModelComponentContainer 6 | 7 | 8 | def convert_sklearn_flatten( 9 | scope: Scope, operator: Operator, container: ModelComponentContainer 10 | ): 11 | name = scope.get_unique_operator_name("Flatten") 12 | container.add_node( 13 | "Flatten", 14 | operator.inputs[0].full_name, 15 | operator.outputs[0].full_name, 16 | name=name, 17 | axis=1, 18 | ) 19 | 20 | 21 | register_converter("SklearnFlatten", convert_sklearn_flatten) 22 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/function_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_converter 5 | from ..common._apply_operation import apply_concat, apply_identity 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_function_transformer( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | op = operator.raw_operator 14 | if op.func is not None: 15 | raise RuntimeError( 16 | "FunctionTransformer is not supported unless the " 17 | "transform function is None (= identity). " 18 | "You may raise an issue at " 19 | "https://github.com/onnx/sklearn-onnx/issues." 20 | ) 21 | if len(operator.inputs) == 1: 22 | apply_identity( 23 | scope, 24 | operator.inputs[0].full_name, 25 | operator.outputs[0].full_name, 26 | container, 27 | ) 28 | else: 29 | apply_concat( 30 | scope, 31 | [i.full_name for i in operator.inputs], 32 | operator.outputs[0].full_name, 33 | container, 34 | axis=1, 35 | ) 36 | 37 | 38 | register_converter("SklearnFunctionTransformer", convert_sklearn_function_transformer) 39 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/grid_search_cv.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._apply_operation import apply_identity 4 | from ..common._registration import register_converter 5 | from ..common._topology import Scope, Operator 6 | from ..common._container import ModelComponentContainer 7 | from .._supported_operators import sklearn_operator_name_map 8 | 9 | 10 | def convert_sklearn_grid_search_cv( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | """ 14 | Converter for scikit-learn's GridSearchCV. 15 | """ 16 | opts = scope.get_options(operator.raw_operator) 17 | grid_search_op = operator.raw_operator 18 | best_estimator = grid_search_op.best_estimator_ 19 | op_type = sklearn_operator_name_map[type(best_estimator)] 20 | grid_search_operator = scope.declare_local_operator(op_type, best_estimator) 21 | container.add_options(id(best_estimator), opts) 22 | scope.add_options(id(best_estimator), opts) 23 | grid_search_operator.inputs = operator.inputs 24 | 25 | for _i, o in enumerate(operator.outputs): 26 | v = scope.declare_local_variable(o.onnx_name, type=o.type) 27 | grid_search_operator.outputs.append(v) 28 | apply_identity(scope, v.full_name, o.full_name, container) 29 | 30 | 31 | register_converter( 32 | "SklearnGridSearchCV", convert_sklearn_grid_search_cv, options="passthrough" 33 | ) 34 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/id_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._apply_operation import apply_identity 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_identity( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | apply_identity( 14 | scope, 15 | operator.inputs[0].full_name, 16 | operator.outputs[0].full_name, 17 | container, 18 | operator_name=scope.get_unique_operator_name("CIdentity"), 19 | ) 20 | 21 | 22 | register_converter("SklearnIdentity", convert_sklearn_identity) 23 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/label_encoder.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import numpy as np 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | 9 | 10 | def convert_sklearn_label_encoder( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | op = operator.raw_operator 14 | op_type = "LabelEncoder" 15 | attrs = {"name": scope.get_unique_operator_name(op_type)} 16 | classes = op.classes_ 17 | if np.issubdtype(classes.dtype, np.floating): 18 | attrs["keys_floats"] = classes 19 | elif np.issubdtype(classes.dtype, np.signedinteger) or classes.dtype == np.bool_: 20 | attrs["keys_int64s"] = [int(i) for i in classes] 21 | else: 22 | attrs["keys_strings"] = np.array([s.encode("utf-8") for s in classes]) 23 | attrs["values_int64s"] = np.arange(len(classes)) 24 | 25 | cop = container.target_opset_any_domain("ai.onnx.ml") 26 | if cop is not None and cop < 2: 27 | raise RuntimeError( 28 | "LabelEncoder requires at least opset 2 for domain 'ai.onnx.ml' " 29 | "not {}".format(cop) 30 | ) 31 | 32 | container.add_node( 33 | op_type, 34 | operator.input_full_names, 35 | operator.output_full_names, 36 | op_domain="ai.onnx.ml", 37 | op_version=2, 38 | **attrs, 39 | ) 40 | 41 | 42 | register_converter("SklearnLabelEncoder", convert_sklearn_label_encoder) 43 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/multiply_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._apply_operation import apply_mul 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | from ..common.data_types import guess_proto_type 9 | 10 | 11 | def convert_sklearn_multiply( 12 | scope: Scope, operator: Operator, container: ModelComponentContainer 13 | ): 14 | for input, output in zip(operator.inputs, operator.outputs): 15 | operand_name = scope.get_unique_variable_name("operand") 16 | 17 | container.add_initializer( 18 | operand_name, guess_proto_type(input.type), [], [operator.operand] 19 | ) 20 | 21 | apply_mul( 22 | scope, 23 | [input.full_name, operand_name], 24 | output.full_name, 25 | container, 26 | ) 27 | 28 | 29 | register_converter("SklearnMultiply", convert_sklearn_multiply) 30 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/normaliser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_converter 5 | from ..common._topology import Scope, Operator 6 | from ..common._container import ModelComponentContainer 7 | from ..common._apply_operation import apply_normalizer 8 | from ..common.data_types import DoubleTensorType 9 | from .common import concatenate_variables 10 | 11 | 12 | def convert_sklearn_normalizer( 13 | scope: Scope, operator: Operator, container: ModelComponentContainer 14 | ): 15 | if len(operator.inputs) > 1: 16 | # If there are multiple input tensors, 17 | # we combine them using a FeatureVectorizer 18 | feature_name = concatenate_variables(scope, operator.inputs, container) 19 | else: 20 | # No concatenation is needed, we just use the first variable's name 21 | feature_name = operator.inputs[0].full_name 22 | op = operator.raw_operator 23 | norm_map = {"max": "MAX", "l1": "L1", "l2": "L2"} 24 | if op.norm in norm_map: 25 | norm = norm_map[op.norm] 26 | else: 27 | raise RuntimeError( 28 | "Invalid norm '%s'. You may raise an issue" 29 | "at https://github.com/onnx/sklearn-onnx/" 30 | "issues." % op.norm 31 | ) 32 | use_float = type(operator.inputs[0].type) not in (DoubleTensorType,) 33 | apply_normalizer( 34 | scope, 35 | feature_name, 36 | operator.outputs[0].full_name, 37 | container, 38 | norm=norm, 39 | use_float=use_float, 40 | ) 41 | 42 | 43 | register_converter("SklearnNormalizer", convert_sklearn_normalizer) 44 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/pipelines.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from sklearn.base import is_classifier 4 | from sklearn.pipeline import Pipeline 5 | from ..common._registration import register_converter 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | from ..common._apply_operation import apply_cast 9 | from ..common.data_types import guess_proto_type 10 | from .._parse import _parse_sklearn 11 | 12 | 13 | def convert_pipeline( 14 | scope: Scope, operator: Operator, container: ModelComponentContainer 15 | ): 16 | model = operator.raw_operator 17 | inputs = operator.inputs 18 | for step in model.steps: 19 | step_model = step[1] 20 | if is_classifier(step_model) or isinstance(step_model, Pipeline): 21 | scope.add_options(id(step_model), options={"zipmap": False}) 22 | container.add_options(id(step_model), options={"zipmap": False}) 23 | outputs = _parse_sklearn(scope, step_model, inputs, custom_parsers=None) 24 | inputs = outputs 25 | if len(outputs) != len(operator.outputs): 26 | raise RuntimeError( 27 | "Mismatch between pipeline output %d and " 28 | "last step outputs %d." % (len(outputs), len(operator.outputs)) 29 | ) 30 | for fr, to in zip(outputs, operator.outputs): 31 | if isinstance(to.type, type(fr.type)): 32 | container.add_node( 33 | "Identity", 34 | fr.full_name, 35 | to.full_name, 36 | name=scope.get_unique_operator_name("Id" + operator.onnx_name), 37 | ) 38 | else: 39 | # If Pipeline output types are different with last stage output type 40 | apply_cast( 41 | scope, 42 | fr.full_name, 43 | to.full_name, 44 | container, 45 | operator_name=scope.get_unique_operator_name( 46 | "Cast" + operator.onnx_name 47 | ), 48 | to=guess_proto_type(to.type), 49 | ) 50 | 51 | 52 | def convert_feature_union( 53 | scope: Scope, operator: Operator, container: ModelComponentContainer 54 | ): 55 | raise NotImplementedError( 56 | "This converter not needed so far. It is usually handled during parsing." 57 | ) 58 | 59 | 60 | def convert_column_transformer( 61 | scope: Scope, operator: Operator, container: ModelComponentContainer 62 | ): 63 | raise NotImplementedError( 64 | "This converter not needed so far. It is usually handled during parsing." 65 | ) 66 | 67 | 68 | register_converter( 69 | "SklearnPipeline", 70 | convert_pipeline, 71 | options={ 72 | "zipmap": [True, False, "columns"], 73 | "nocl": [True, False], 74 | "output_class_labels": [False, True], 75 | "raw_scores": [True, False], 76 | }, 77 | ) 78 | register_converter("SklearnFeatureUnion", convert_feature_union) 79 | register_converter("SklearnColumnTransformer", convert_column_transformer) 80 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/random_projection.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from ..common._registration import register_converter 5 | from ..common.data_types import guess_numpy_type 6 | from ..common._topology import Scope, Operator 7 | from ..common._container import ModelComponentContainer 8 | from ..algebra.onnx_ops import OnnxMatMul 9 | 10 | 11 | def convert_random_projection( 12 | scope: Scope, operator: Operator, container: ModelComponentContainer 13 | ): 14 | """Converter for PowerTransformer""" 15 | op_in = operator.inputs[0] 16 | op_out = operator.outputs[0].full_name 17 | op = operator.raw_operator 18 | opv = container.target_opset 19 | dtype = guess_numpy_type(operator.inputs[0].type) 20 | if dtype != np.float64: 21 | dtype = np.float32 22 | 23 | y = OnnxMatMul( 24 | op_in, op.components_.T.astype(dtype), op_version=opv, output_names=[op_out] 25 | ) 26 | y.add_to(scope, container) 27 | 28 | 29 | register_converter("SklearnGaussianRandomProjection", convert_random_projection) 30 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/random_trees_embedding.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from ..common._registration import register_converter 5 | from ..common._topology import Scope, Operator 6 | from ..common._container import ModelComponentContainer 7 | from ..algebra.onnx_operator import OnnxSubEstimator 8 | from ..algebra.onnx_ops import OnnxIdentity, OnnxConcat, OnnxReshape 9 | 10 | 11 | def convert_sklearn_random_tree_embedding( 12 | scope: Scope, operator: Operator, container: ModelComponentContainer 13 | ): 14 | X = operator.inputs[0] 15 | out = operator.outputs 16 | op = operator.raw_operator 17 | opv = container.target_opset 18 | 19 | if op.sparse_output: 20 | raise RuntimeError( 21 | "The converter cannot convert the model with sparse outputs." 22 | ) 23 | 24 | outputs = [] 25 | for est in op.estimators_: 26 | leave = OnnxSubEstimator( 27 | est, X, op_version=opv, options={"decision_leaf": True} 28 | ) 29 | outputs.append( 30 | OnnxReshape(leave[1], np.array([-1, 1], dtype=np.int64), op_version=opv) 31 | ) 32 | merged = OnnxConcat(*outputs, axis=1, op_version=opv) 33 | ohe = OnnxSubEstimator(op.one_hot_encoder_, merged, op_version=opv) 34 | y = OnnxIdentity(ohe, op_version=opv, output_names=out) 35 | y.add_to(scope, container) 36 | 37 | 38 | register_converter("SklearnRandomTreesEmbedding", convert_sklearn_random_tree_embedding) 39 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/ransac_regressor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from .._supported_operators import sklearn_operator_name_map 5 | from ..common._apply_operation import apply_identity 6 | from ..common._registration import register_converter 7 | from ..common._topology import Scope, Operator 8 | from ..common._container import ModelComponentContainer 9 | 10 | 11 | def convert_sklearn_ransac_regressor( 12 | scope: Scope, operator: Operator, container: ModelComponentContainer 13 | ): 14 | """ 15 | Converter for RANSACRegressor. 16 | """ 17 | ransac_op = operator.raw_operator 18 | op_type = sklearn_operator_name_map[type(ransac_op.estimator_)] 19 | this_operator = scope.declare_local_operator(op_type, ransac_op.estimator_) 20 | this_operator.inputs = operator.inputs 21 | label_name = scope.declare_local_variable( 22 | "label", operator.inputs[0].type.__class__() 23 | ) 24 | this_operator.outputs.append(label_name) 25 | apply_identity( 26 | scope, label_name.full_name, operator.outputs[0].full_name, container 27 | ) 28 | 29 | 30 | register_converter("SklearnRANSACRegressor", convert_sklearn_ransac_regressor) 31 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/replace_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_converter 5 | from ..common._topology import Scope, Operator 6 | from ..common._container import ModelComponentContainer 7 | from ..common.data_types import guess_proto_type 8 | 9 | 10 | def convert_sklearn_replace_transformer( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | op = operator.raw_operator 14 | input_name = operator.inputs[0].full_name 15 | output_name = operator.outputs[0].full_name 16 | 17 | proto_dtype = guess_proto_type(operator.inputs[0].type) 18 | 19 | cst_nan_name = scope.get_unique_variable_name("nan_name") 20 | container.add_initializer(cst_nan_name, proto_dtype, [1], [op.to_value]) 21 | cst_zero_name = scope.get_unique_variable_name("zero_name") 22 | container.add_initializer(cst_zero_name, proto_dtype, [1], [op.from_value]) 23 | 24 | mask_name = scope.get_unique_variable_name("mask_name") 25 | container.add_node( 26 | "Equal", 27 | [input_name, cst_zero_name], 28 | mask_name, 29 | name=scope.get_unique_operator_name("Equal"), 30 | ) 31 | 32 | container.add_node( 33 | "Where", 34 | [mask_name, cst_nan_name, input_name], 35 | output_name, 36 | name=scope.get_unique_operator_name("Where"), 37 | ) 38 | 39 | 40 | register_converter("SklearnReplaceTransformer", convert_sklearn_replace_transformer) 41 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/sequence.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..proto import onnx_proto 4 | from ..common._registration import register_converter 5 | from ..common._topology import Scope, Operator 6 | from ..common._container import ModelComponentContainer 7 | 8 | 9 | def convert_sklearn_sequence_at( 10 | scope: Scope, operator: Operator, container: ModelComponentContainer 11 | ): 12 | i_index = operator.index 13 | index_name = scope.get_unique_variable_name("seq_at%d" % i_index) 14 | container.add_initializer(index_name, onnx_proto.TensorProto.INT64, [], [i_index]) 15 | container.add_node( 16 | "SequenceAt", 17 | [operator.inputs[0].full_name, index_name], 18 | operator.outputs[0].full_name, 19 | name=scope.get_unique_operator_name("SequenceAt%d" % i_index), 20 | ) 21 | 22 | 23 | def convert_sklearn_sequence_construct( 24 | scope: Scope, operator: Operator, container: ModelComponentContainer 25 | ): 26 | container.add_node( 27 | "SequenceConstruct", 28 | [i.full_name for i in operator.inputs], 29 | operator.outputs[0].full_name, 30 | name=scope.get_unique_operator_name("SequenceConstruct"), 31 | ) 32 | 33 | 34 | register_converter("SklearnSequenceAt", convert_sklearn_sequence_at) 35 | register_converter("SklearnSequenceConstruct", convert_sklearn_sequence_construct) 36 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/sgd_oneclass_svm.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._apply_operation import apply_cast, apply_sub 5 | from ..common.data_types import BooleanTensorType, Int64TensorType, guess_proto_type 6 | from ..common._registration import register_converter 7 | from ..common._topology import Scope, Operator 8 | from ..common._container import ModelComponentContainer 9 | from ..proto import onnx_proto 10 | 11 | 12 | def convert_sklearn_sgd_oneclass_svm( 13 | scope: Scope, operator: Operator, container: ModelComponentContainer 14 | ): 15 | input_name = operator.inputs[0].full_name 16 | output_names = operator.output_full_names 17 | model = operator.raw_operator 18 | coef = model.coef_.T 19 | offset = model.offset_ 20 | 21 | proto_dtype = guess_proto_type(operator.inputs[0].type) 22 | if proto_dtype != onnx_proto.TensorProto.DOUBLE: 23 | proto_dtype = onnx_proto.TensorProto.FLOAT 24 | 25 | if isinstance(operator.inputs[0].type, (BooleanTensorType, Int64TensorType)): 26 | cast_input_name = scope.get_unique_variable_name("cast_input") 27 | apply_cast( 28 | scope, operator.input_full_names, cast_input_name, container, to=proto_dtype 29 | ) 30 | input_name = cast_input_name 31 | 32 | coef_name = scope.get_unique_variable_name("coef") 33 | container.add_initializer(coef_name, proto_dtype, coef.shape, coef.ravel()) 34 | 35 | offset_name = scope.get_unique_variable_name("offset") 36 | container.add_initializer(offset_name, proto_dtype, offset.shape, offset) 37 | 38 | matmul_result_name = scope.get_unique_variable_name("matmul_result") 39 | container.add_node( 40 | "MatMul", 41 | [input_name, coef_name], 42 | matmul_result_name, 43 | name=scope.get_unique_operator_name("MatMul"), 44 | ) 45 | 46 | apply_sub( 47 | scope, 48 | [matmul_result_name, offset_name], 49 | output_names[1], 50 | container, 51 | broadcast=0, 52 | ) 53 | 54 | pred = scope.get_unique_variable_name("class_prediction") 55 | container.add_node("Sign", output_names[1], pred, op_version=9) 56 | apply_cast(scope, pred, output_names[0], container, to=onnx_proto.TensorProto.INT64) 57 | 58 | 59 | register_converter("SklearnSGDOneClassSVM", convert_sklearn_sgd_oneclass_svm) 60 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/tfidf_vectoriser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from onnx import onnx_pb as onnx_proto 4 | from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer 5 | from ..common._apply_operation import apply_identity 6 | from ..common.data_types import FloatTensorType, DoubleTensorType, guess_proto_type 7 | from ..common._registration import register_converter 8 | from ..common._topology import Scope, Operator 9 | from ..common._container import ModelComponentContainer 10 | from .._supported_operators import sklearn_operator_name_map 11 | 12 | 13 | def convert_sklearn_tfidf_vectoriser( 14 | scope: Scope, operator: Operator, container: ModelComponentContainer 15 | ): 16 | """ 17 | Converter for scikit-learn's TfidfVectoriser. 18 | """ 19 | tfidf_op = operator.raw_operator 20 | op_type = sklearn_operator_name_map[CountVectorizer] 21 | cv_operator = scope.declare_local_operator(op_type, tfidf_op) 22 | cv_operator.inputs = operator.inputs 23 | columns = max(operator.raw_operator.vocabulary_.values()) + 1 24 | proto_dtype = guess_proto_type(operator.inputs[0].type) 25 | if proto_dtype != onnx_proto.TensorProto.DOUBLE: 26 | proto_dtype = onnx_proto.TensorProto.FLOAT 27 | if proto_dtype == onnx_proto.TensorProto.FLOAT: 28 | clr = FloatTensorType 29 | elif proto_dtype == onnx_proto.TensorProto.DOUBLE: 30 | clr = DoubleTensorType 31 | else: 32 | raise RuntimeError( 33 | "Unexpected dtype '{}'. Float or double expected.".format(proto_dtype) 34 | ) 35 | cv_output_name = scope.declare_local_variable( 36 | "count_vec_output", clr([None, columns]) 37 | ) 38 | cv_operator.outputs.append(cv_output_name) 39 | 40 | op_type = sklearn_operator_name_map[TfidfTransformer] 41 | tfidf_operator = scope.declare_local_operator(op_type, tfidf_op) 42 | tfidf_operator.inputs.append(cv_output_name) 43 | tfidf_output_name = scope.declare_local_variable("tfidf_output", clr()) 44 | tfidf_operator.outputs.append(tfidf_output_name) 45 | 46 | apply_identity( 47 | scope, tfidf_output_name.full_name, operator.outputs[0].full_name, container 48 | ) 49 | 50 | 51 | register_converter( 52 | "SklearnTfidfVectorizer", 53 | convert_sklearn_tfidf_vectoriser, 54 | options={ 55 | "tokenexp": None, 56 | "separators": None, 57 | "nan": [True, False], 58 | "keep_empty_string": [True, False], 59 | "locale": None, 60 | }, 61 | ) 62 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/tuned_threshold_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_converter 4 | from ..common._topology import Scope, Operator 5 | from ..common._container import ModelComponentContainer 6 | from ..common.data_types import Int64TensorType 7 | from .._supported_operators import sklearn_operator_name_map 8 | 9 | 10 | def convert_sklearn_tuned_threshold_classifier( 11 | scope: Scope, operator: Operator, container: ModelComponentContainer 12 | ): 13 | estimator = operator.raw_operator.estimator_ 14 | op_type = sklearn_operator_name_map[type(estimator)] 15 | 16 | this_operator = scope.declare_local_operator(op_type, estimator) 17 | this_operator.inputs = operator.inputs 18 | 19 | label_name = scope.declare_local_variable("label_tuned", Int64TensorType()) 20 | prob_name = scope.declare_local_variable( 21 | "proba_tuned", operator.outputs[1].type.__class__() 22 | ) 23 | this_operator.outputs.append(label_name) 24 | this_operator.outputs.append(prob_name) 25 | 26 | container.add_node( 27 | "Identity", [label_name.onnx_name], [operator.outputs[0].full_name] 28 | ) 29 | container.add_node( 30 | "Identity", [prob_name.onnx_name], [operator.outputs[1].full_name] 31 | ) 32 | 33 | 34 | register_converter( 35 | "SklearnTunedThresholdClassifierCV", 36 | convert_sklearn_tuned_threshold_classifier, 37 | options={ 38 | "zipmap": [True, False, "columns"], 39 | "output_class_labels": [False, True], 40 | "nocl": [True, False], 41 | }, 42 | ) 43 | -------------------------------------------------------------------------------- /skl2onnx/operator_converters/voting_regressor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_converter 5 | from ..common._topology import Scope, Operator 6 | from ..common._container import ModelComponentContainer 7 | from ..common._apply_operation import apply_mul 8 | from ..common.data_types import guess_proto_type 9 | from .._supported_operators import sklearn_operator_name_map 10 | 11 | 12 | def convert_voting_regressor( 13 | scope: Scope, operator: Operator, container: ModelComponentContainer 14 | ): 15 | """ 16 | Converts a *VotingRegressor* into *ONNX* format. 17 | """ 18 | op = operator.raw_operator 19 | proto_dtype = guess_proto_type(operator.outputs[0].type) 20 | 21 | vars_names = [] 22 | for i, estimator in enumerate(op.estimators_): 23 | if estimator is None: 24 | continue 25 | 26 | op_type = sklearn_operator_name_map[type(estimator)] 27 | 28 | this_operator = scope.declare_local_operator(op_type, estimator) 29 | this_operator.inputs = operator.inputs 30 | 31 | var_name = scope.declare_local_variable( 32 | "var_%d" % i, operator.outputs[0].type.__class__() 33 | ) 34 | this_operator.outputs.append(var_name) 35 | var_name = var_name.onnx_name 36 | 37 | if op.weights is not None: 38 | val = op.weights[i] / op.weights.sum() 39 | else: 40 | val = 1.0 / len(op.estimators_) 41 | 42 | weights_name = scope.get_unique_variable_name("w%d" % i) 43 | container.add_initializer(weights_name, proto_dtype, [1], [val]) 44 | wvar_name = scope.get_unique_variable_name("wvar_%d" % i) 45 | apply_mul(scope, [var_name, weights_name], wvar_name, container, broadcast=1) 46 | 47 | flat_name = scope.get_unique_variable_name("fvar_%d" % i) 48 | container.add_node("Flatten", wvar_name, flat_name) 49 | vars_names.append(flat_name) 50 | 51 | container.add_node( 52 | "Sum", 53 | vars_names, 54 | operator.outputs[0].full_name, 55 | name=scope.get_unique_operator_name("Sum"), 56 | ) 57 | 58 | 59 | register_converter("SklearnVotingRegressor", convert_voting_regressor) 60 | -------------------------------------------------------------------------------- /skl2onnx/proto/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | # Rather than using ONNX protobuf definition throughout our codebase, 5 | # we import ONNX protobuf definition here so that we can conduct quick 6 | # fixes by overwriting ONNX functions without changing any lines 7 | # elsewhere. 8 | from onnx import onnx_pb as onnx_proto 9 | from onnx import defs 10 | 11 | # Overwrite the make_tensor defined in onnx.helper because of a bug 12 | # (string tensor get assigned twice) 13 | from onnx.onnx_pb import TensorProto, ValueInfoProto 14 | 15 | try: # noqa: SIM105 16 | from onnx.onnx_pb import SparseTensorProto 17 | except ImportError: 18 | # onnx is too old. 19 | pass 20 | 21 | 22 | def get_opset_number_from_onnx(): 23 | """ 24 | Returns the latest opset version supported 25 | by the *onnx* package. 26 | """ 27 | return defs.onnx_opset_version() 28 | 29 | 30 | def get_latest_tested_opset_version(): 31 | """ 32 | This module relies on *onnxruntime* to test every 33 | converter. The function returns the most recent 34 | target opset tested with *onnxruntime* or the opset 35 | version specified by *onnx* package if this one is lower 36 | (return by `onnx.defs.onnx_opset_version()`). 37 | """ 38 | from .. import __max_supported_opset__ 39 | 40 | return min(__max_supported_opset__, get_opset_number_from_onnx()) 41 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/array_feature_extractor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_array_feature_extractor(operator): 9 | check_input_and_output_numbers(operator, output_count_range=1) 10 | i = operator.inputs[0] 11 | N = i.get_first_dimension() 12 | C = len(operator.column_indices) 13 | operator.outputs[0].type = i.type.__class__([N, C]) 14 | 15 | 16 | register_shape_calculator( 17 | "SklearnArrayFeatureExtractor", calculate_sklearn_array_feature_extractor 18 | ) 19 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/cast_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | from ..common.data_types import _guess_numpy_type 7 | from ..common.shape_calculator import calculate_linear_regressor_output_shapes 8 | 9 | 10 | def calculate_sklearn_cast(operator): 11 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 12 | 13 | 14 | def calculate_sklearn_cast_transformer(operator): 15 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 16 | op = operator.raw_operator 17 | otype = _guess_numpy_type(op.dtype, operator.inputs[0].type.shape) 18 | operator.outputs[0].type = otype 19 | 20 | 21 | register_shape_calculator("SklearnCast", calculate_sklearn_cast) 22 | register_shape_calculator("SklearnCastTransformer", calculate_sklearn_cast_transformer) 23 | register_shape_calculator( 24 | "SklearnCastRegressor", calculate_linear_regressor_output_shapes 25 | ) 26 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/class_labels.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_class_labels(operator): 9 | check_input_and_output_numbers(operator, output_count_range=1) 10 | 11 | 12 | register_shape_calculator("SklearnClassLabels", calculate_sklearn_class_labels) 13 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/cross_decomposition.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 6 | from ..common.utils import check_input_and_output_numbers, check_input_and_output_types 7 | 8 | 9 | def calculate_pls_regression_output_shapes(operator): 10 | check_input_and_output_numbers(operator, input_count_range=1) 11 | check_input_and_output_types( 12 | operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] 13 | ) 14 | 15 | if len(operator.inputs[0].type.shape) != 2: 16 | raise RuntimeError("Input must be a [N, C]-tensor") 17 | 18 | op = operator.raw_operator 19 | cls_type = operator.inputs[0].type.__class__ 20 | if cls_type != DoubleTensorType: 21 | cls_type = FloatTensorType 22 | N = operator.inputs[0].get_first_dimension() 23 | operator.outputs[0].type = cls_type([N, op.coef_.shape[1]]) 24 | 25 | 26 | register_shape_calculator( 27 | "SklearnPLSRegression", calculate_pls_regression_output_shapes 28 | ) 29 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/dict_vectorizer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_dict_vectorizer_output_shapes(operator): 9 | """ 10 | Allowed input/output patterns are 11 | 1. Map ---> [1, C] 12 | 13 | C is the total number of allowed keys in the input dictionary. 14 | """ 15 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 16 | C = len(operator.raw_operator.feature_names_) 17 | operator.outputs[0].type.shape = [None, C] 18 | 19 | 20 | register_shape_calculator( 21 | "SklearnDictVectorizer", calculate_sklearn_dict_vectorizer_output_shapes 22 | ) 23 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/feature_hasher.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from ..common.data_types import ( 5 | StringTensorType, 6 | Int64TensorType, 7 | FloatTensorType, 8 | DoubleTensorType, 9 | ) 10 | from ..common._registration import register_shape_calculator 11 | from ..common.utils import check_input_and_output_numbers 12 | from ..common.utils import check_input_and_output_types 13 | 14 | 15 | def calculate_sklearn_feature_hasher(operator): 16 | check_input_and_output_numbers(operator, output_count_range=1) 17 | check_input_and_output_types( 18 | operator, good_input_types=[StringTensorType, Int64TensorType] 19 | ) 20 | 21 | N = operator.inputs[0].get_first_dimension() 22 | model = operator.raw_operator 23 | shape = [N, model.n_features] 24 | if model.dtype == np.float32: 25 | operator.outputs[0].type = FloatTensorType(shape=shape) 26 | elif model.dtype == np.float64: 27 | operator.outputs[0].type = DoubleTensorType(shape=shape) 28 | elif model.dtype in (np.int32, np.uint32, np.int64): 29 | operator.outputs[0].type = Int64TensorType(shape=shape) 30 | else: 31 | raise RuntimeError( 32 | f"Converter is not implemented for FeatureHasher.dtype={model.dtype}." 33 | ) 34 | 35 | 36 | register_shape_calculator("SklearnFeatureHasher", calculate_sklearn_feature_hasher) 37 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/feature_selection.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_select(operator): 9 | check_input_and_output_numbers(operator, output_count_range=1) 10 | i = operator.inputs[0] 11 | N = i.get_first_dimension() 12 | C = operator.raw_operator.get_support().sum() 13 | operator.outputs[0].type = i.type.__class__([N, C]) 14 | 15 | 16 | register_shape_calculator("SklearnGenericUnivariateSelect", calculate_sklearn_select) 17 | register_shape_calculator("SklearnRFE", calculate_sklearn_select) 18 | register_shape_calculator("SklearnRFECV", calculate_sklearn_select) 19 | register_shape_calculator("SklearnSelectFdr", calculate_sklearn_select) 20 | register_shape_calculator("SklearnSelectFpr", calculate_sklearn_select) 21 | register_shape_calculator("SklearnSelectFromModel", calculate_sklearn_select) 22 | register_shape_calculator("SklearnSelectFwe", calculate_sklearn_select) 23 | register_shape_calculator("SklearnSelectKBest", calculate_sklearn_select) 24 | register_shape_calculator("SklearnSelectPercentile", calculate_sklearn_select) 25 | register_shape_calculator("SklearnVarianceThreshold", calculate_sklearn_select) 26 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/flatten.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatType, Int64Type, StringType, TensorType 6 | from ..common.utils import check_input_and_output_numbers 7 | 8 | 9 | def calculate_sklearn_flatten(operator): 10 | check_input_and_output_numbers(operator, output_count_range=1, input_count_range=1) 11 | i = operator.inputs[0] 12 | N = i.get_first_dimension() 13 | if isinstance(i.type, TensorType): 14 | if i.type.shape[1] is None: 15 | C = None 16 | else: 17 | C = i.type.shape[1] 18 | elif isinstance(i.type, (Int64Type, FloatType, StringType)): 19 | C = 1 20 | else: 21 | C = None 22 | if C is None: 23 | operator.outputs[0].type.shape = [N, C] 24 | else: 25 | operator.outputs[0].type.shape = [N * C] 26 | 27 | 28 | register_shape_calculator("SklearnFlatten", calculate_sklearn_flatten) 29 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/function_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import copy 5 | from ..common._registration import register_shape_calculator 6 | 7 | 8 | def calculate_sklearn_function_transformer_output_shapes(operator): 9 | """ 10 | This operator is used only to merge columns in a pipeline. 11 | Only identity function is supported. 12 | """ 13 | if operator.raw_operator.func is not None: 14 | raise RuntimeError( 15 | "FunctionTransformer is not supported unless the " 16 | "transform function is None (= identity). " 17 | "You may raise an issue at " 18 | "https://github.com/onnx/sklearn-onnx/issues." 19 | ) 20 | N = operator.inputs[0].get_first_dimension() 21 | C = 0 22 | for variable in operator.inputs: 23 | if variable.type.shape[1] is not None: 24 | C += variable.type.shape[1] 25 | else: 26 | C = None 27 | break 28 | 29 | operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type) 30 | operator.outputs[0].type.shape = [N, C] 31 | 32 | 33 | register_shape_calculator( 34 | "SklearnFunctionTransformer", calculate_sklearn_function_transformer_output_shapes 35 | ) 36 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/gaussian_process.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.shape_calculator import calculate_linear_classifier_output_shapes 6 | from ..common.data_types import FloatTensorType, DoubleTensorType 7 | from ..common.utils import check_input_and_output_types 8 | 9 | 10 | def calculate_sklearn_gaussian_process_regressor_shape(operator): 11 | check_input_and_output_types( 12 | operator, 13 | good_input_types=[FloatTensorType, DoubleTensorType], 14 | good_output_types=[FloatTensorType, DoubleTensorType], 15 | ) 16 | if len(operator.inputs) != 1: 17 | raise RuntimeError( 18 | "Only one input vector is allowed for GaussianProcessRegressor." 19 | ) 20 | if len(operator.outputs) not in (1, 2): 21 | raise RuntimeError("One output is expected for GaussianProcessRegressor.") 22 | 23 | variable = operator.inputs[0] 24 | 25 | N = variable.get_first_dimension() 26 | op = operator.raw_operator 27 | 28 | # Output 1 is mean 29 | # Output 2 is cov or std 30 | if hasattr(op, "y_train_") and op.y_train_ is not None: 31 | dim = 1 if len(op.y_train_.shape) == 1 else op.y_train_.shape[1] 32 | else: 33 | dim = 1 34 | operator.outputs[0].type.shape = [N, dim] 35 | 36 | 37 | register_shape_calculator( 38 | "SklearnGaussianProcessRegressor", 39 | calculate_sklearn_gaussian_process_regressor_shape, 40 | ) 41 | register_shape_calculator( 42 | "SklearnGaussianProcessClassifier", calculate_linear_classifier_output_shapes 43 | ) 44 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/grid_search_cv.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import logging 4 | from ..common._registration import register_shape_calculator, get_shape_calculator 5 | from .._supported_operators import sklearn_operator_name_map 6 | 7 | 8 | def convert_sklearn_grid_search_cv(operator): 9 | grid_search_op = operator.raw_operator 10 | best_estimator = grid_search_op.best_estimator_ 11 | name = sklearn_operator_name_map.get(type(best_estimator), None) 12 | if name is None: 13 | logger = logging.getLogger("skl2onnx") 14 | logger.warning( 15 | "[convert_sklearn_grid_search_cv] failed to find alias " 16 | "to model type %r.", 17 | type(best_estimator), 18 | ) 19 | return 20 | op = operator.new_raw_operator(best_estimator, name) 21 | shape_calc = get_shape_calculator(name) 22 | shape_calc(op) 23 | operator.outputs = op.outputs 24 | 25 | 26 | register_shape_calculator("SklearnGridSearchCV", convert_sklearn_grid_search_cv) 27 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/identity.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_identity(operator): 9 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 10 | operator.outputs[0].type = operator.inputs[0].type 11 | 12 | 13 | register_shape_calculator("SklearnIdentity", calculate_sklearn_identity) 14 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/imputer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import ( 6 | FloatTensorType, 7 | Int64TensorType, 8 | DoubleTensorType, 9 | StringTensorType, 10 | ) 11 | from ..common.utils import check_input_and_output_numbers 12 | from ..common.utils import check_input_and_output_types 13 | 14 | 15 | def calculate_sklearn_imputer_output_shapes(operator): 16 | """ 17 | Allowed input/output patterns are 18 | 1. [N, C_1], ..., [N, C_n] ---> [N, C_1 + ... + C_n] 19 | 20 | It's possible to receive multiple inputs so we need to concatenate 21 | them along C-axis. The produced tensor's shape is used as the 22 | output shape. 23 | """ 24 | check_input_and_output_numbers( 25 | operator, input_count_range=[1, None], output_count_range=1 26 | ) 27 | check_input_and_output_types( 28 | operator, 29 | good_input_types=[ 30 | FloatTensorType, 31 | Int64TensorType, 32 | DoubleTensorType, 33 | StringTensorType, 34 | ], 35 | ) 36 | output = operator.outputs[0] 37 | for variable in operator.inputs: 38 | if not isinstance(variable.type, type(output.type)): 39 | raise RuntimeError( 40 | "Inputs and outputs should have the same type " 41 | "%r != %r." % (type(variable.type), type(output.type)) 42 | ) 43 | 44 | N = operator.inputs[0].get_first_dimension() 45 | C = 0 46 | for variable in operator.inputs: 47 | if variable.type.shape[1] is not None: 48 | C += variable.type.shape[1] 49 | else: 50 | C = None 51 | break 52 | 53 | output.type.shape = [N, C] 54 | 55 | 56 | register_shape_calculator("SklearnImputer", calculate_sklearn_imputer_output_shapes) 57 | register_shape_calculator( 58 | "SklearnSimpleImputer", calculate_sklearn_imputer_output_shapes 59 | ) 60 | register_shape_calculator("SklearnBinarizer", calculate_sklearn_imputer_output_shapes) 61 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/isolation_forest.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.data_types import Int64TensorType 5 | 6 | 7 | def calculate_isolation_forest_output_shapes(operator): 8 | N = operator.inputs[0].get_first_dimension() 9 | operator.outputs[0].type = Int64TensorType([N, 1]) 10 | operator.outputs[1].type.shape = [N, 1] 11 | 12 | 13 | register_shape_calculator( 14 | "SklearnIsolationForest", calculate_isolation_forest_output_shapes 15 | ) 16 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/k_bins_discretiser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 5 | from ..common._registration import register_shape_calculator 6 | from ..common.utils import check_input_and_output_numbers 7 | from ..common.utils import check_input_and_output_types 8 | 9 | 10 | def calculate_sklearn_k_bins_discretiser(operator): 11 | check_input_and_output_numbers(operator, output_count_range=1) 12 | check_input_and_output_types( 13 | operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] 14 | ) 15 | 16 | M = operator.inputs[0].get_first_dimension() 17 | model = operator.raw_operator 18 | N = len(model.n_bins_) if model.encode == "ordinal" else sum(model.n_bins_) 19 | operator.outputs[0].type.shape = [M, N] 20 | 21 | 22 | register_shape_calculator( 23 | "SklearnKBinsDiscretizer", calculate_sklearn_k_bins_discretiser 24 | ) 25 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/k_means.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 6 | from ..common.utils import check_input_and_output_types 7 | 8 | 9 | def calculate_sklearn_kmeans_output_shapes(operator): 10 | check_input_and_output_types( 11 | operator, 12 | good_input_types=[Int64TensorType, FloatTensorType, DoubleTensorType], 13 | good_output_types=[Int64TensorType, FloatTensorType, DoubleTensorType], 14 | ) 15 | if len(operator.inputs) != 1: 16 | raise RuntimeError("Only one input vector is allowed for KMeans.") 17 | if len(operator.outputs) != 2: 18 | raise RuntimeError("Two outputs are expected for KMeans.") 19 | 20 | variable = operator.inputs[0] 21 | N = variable.get_first_dimension() 22 | op = operator.raw_operator 23 | operator.outputs[0].type.shape = [N] 24 | operator.outputs[1].type.shape = [N, op.n_clusters] 25 | 26 | 27 | register_shape_calculator("SklearnKMeans", calculate_sklearn_kmeans_output_shapes) 28 | register_shape_calculator( 29 | "SklearnMiniBatchKMeans", calculate_sklearn_kmeans_output_shapes 30 | ) 31 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/kernel_pca.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType, DoubleTensorType 6 | from ..common.utils import check_input_and_output_numbers, check_input_and_output_types 7 | 8 | 9 | def calculate_sklearn_kernel_pca_output_shapes(operator): 10 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 11 | check_input_and_output_types( 12 | operator, 13 | good_input_types=[FloatTensorType, DoubleTensorType], 14 | good_output_types=[FloatTensorType, DoubleTensorType], 15 | ) 16 | N = operator.inputs[0].get_first_dimension() 17 | op = operator.raw_operator 18 | lbd = op.eigenvalues_ if hasattr(op, "eigenvalues_") else op.lambdas_ 19 | C = lbd.shape[0] 20 | operator.outputs[0].type.shape = [N, C] 21 | 22 | 23 | def calculate_sklearn_kernel_centerer_output_shapes(operator): 24 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 25 | check_input_and_output_types( 26 | operator, 27 | good_input_types=[FloatTensorType, DoubleTensorType], 28 | good_output_types=[FloatTensorType, DoubleTensorType], 29 | ) 30 | N = operator.inputs[0].get_first_dimension() 31 | C = operator.raw_operator.K_fit_rows_.shape[0] 32 | operator.outputs[0].type.shape = [N, C] 33 | 34 | 35 | register_shape_calculator( 36 | "SklearnKernelCenterer", calculate_sklearn_kernel_centerer_output_shapes 37 | ) 38 | register_shape_calculator( 39 | "SklearnKernelPCA", calculate_sklearn_kernel_pca_output_shapes 40 | ) 41 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/label_binariser.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import Int64TensorType, StringTensorType 6 | from ..common.utils import check_input_and_output_numbers 7 | from ..common.utils import check_input_and_output_types 8 | 9 | 10 | def calculate_sklearn_label_binariser_output_shapes(operator): 11 | check_input_and_output_numbers(operator, output_count_range=1) 12 | check_input_and_output_types( 13 | operator, good_input_types=[Int64TensorType, StringTensorType] 14 | ) 15 | 16 | N = operator.inputs[0].get_first_dimension() 17 | operator.outputs[0].type = Int64TensorType([N, len(operator.raw_operator.classes_)]) 18 | 19 | 20 | register_shape_calculator( 21 | "SklearnLabelBinarizer", calculate_sklearn_label_binariser_output_shapes 22 | ) 23 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/label_encoder.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import copy 5 | from ..common._registration import register_shape_calculator 6 | from ..common.data_types import FloatTensorType 7 | from ..common.data_types import Int64TensorType, StringTensorType 8 | from ..common.utils import check_input_and_output_numbers 9 | from ..common.utils import check_input_and_output_types 10 | 11 | 12 | def calculate_sklearn_label_encoder_output_shapes(operator): 13 | """ 14 | This function just copy the input shape to the output because label 15 | encoder only alters input features' values, not their shape. 16 | """ 17 | check_input_and_output_numbers(operator, output_count_range=1) 18 | check_input_and_output_types( 19 | operator, good_input_types=[FloatTensorType, Int64TensorType, StringTensorType] 20 | ) 21 | 22 | input_shape = copy.deepcopy(operator.inputs[0].type.shape) 23 | operator.outputs[0].type = Int64TensorType(copy.deepcopy(input_shape)) 24 | 25 | 26 | register_shape_calculator( 27 | "SklearnLabelEncoder", calculate_sklearn_label_encoder_output_shapes 28 | ) 29 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/linear_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.shape_calculator import calculate_linear_classifier_output_shapes 6 | 7 | 8 | register_shape_calculator( 9 | "SklearnLinearClassifier", calculate_linear_classifier_output_shapes 10 | ) 11 | register_shape_calculator("SklearnLinearSVC", calculate_linear_classifier_output_shapes) 12 | register_shape_calculator( 13 | "SklearnAdaBoostClassifier", calculate_linear_classifier_output_shapes 14 | ) 15 | register_shape_calculator( 16 | "SklearnBaggingClassifier", calculate_linear_classifier_output_shapes 17 | ) 18 | register_shape_calculator( 19 | "SklearnBernoulliNB", calculate_linear_classifier_output_shapes 20 | ) 21 | register_shape_calculator( 22 | "SklearnCategoricalNB", calculate_linear_classifier_output_shapes 23 | ) 24 | register_shape_calculator( 25 | "SklearnComplementNB", calculate_linear_classifier_output_shapes 26 | ) 27 | register_shape_calculator( 28 | "SklearnGaussianNB", calculate_linear_classifier_output_shapes 29 | ) 30 | register_shape_calculator( 31 | "SklearnMultinomialNB", calculate_linear_classifier_output_shapes 32 | ) 33 | register_shape_calculator( 34 | "SklearnCalibratedClassifierCV", calculate_linear_classifier_output_shapes 35 | ) 36 | register_shape_calculator( 37 | "SklearnMLPClassifier", calculate_linear_classifier_output_shapes 38 | ) 39 | register_shape_calculator( 40 | "SklearnSGDClassifier", calculate_linear_classifier_output_shapes 41 | ) 42 | register_shape_calculator( 43 | "SklearnStackingClassifier", calculate_linear_classifier_output_shapes 44 | ) 45 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/linear_regressor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers, check_input_and_output_types 6 | from ..common.shape_calculator import calculate_linear_regressor_output_shapes 7 | from ..common.data_types import ( 8 | BooleanTensorType, 9 | DoubleTensorType, 10 | FloatTensorType, 11 | Int64TensorType, 12 | ) 13 | 14 | 15 | def calculate_bayesian_ridge_output_shapes(operator): 16 | """ 17 | Allowed input/output patterns are 18 | 1. [N, C] ---> [N, 1] 19 | 20 | This operator produces a scalar prediction for every example in a 21 | batch. If the input batch size is N, the output shape may be 22 | [N, 1]. 23 | """ 24 | check_input_and_output_numbers( 25 | operator, input_count_range=1, output_count_range=[1, 2] 26 | ) 27 | check_input_and_output_types( 28 | operator, 29 | good_input_types=[ 30 | BooleanTensorType, 31 | DoubleTensorType, 32 | FloatTensorType, 33 | Int64TensorType, 34 | ], 35 | ) 36 | 37 | inp0 = operator.inputs[0].type 38 | if isinstance(inp0, (FloatTensorType, DoubleTensorType)): 39 | cls_type = inp0.__class__ 40 | else: 41 | cls_type = FloatTensorType 42 | 43 | N = operator.inputs[0].get_first_dimension() 44 | if ( 45 | hasattr(operator.raw_operator, "coef_") 46 | and len(operator.raw_operator.coef_.shape) > 1 47 | ): 48 | operator.outputs[0].type = cls_type([N, operator.raw_operator.coef_.shape[1]]) 49 | else: 50 | operator.outputs[0].type = cls_type([N, 1]) 51 | 52 | if len(operator.inputs) == 2: 53 | # option return_std is True 54 | operator.outputs[1].type = cls_type([N, 1]) 55 | 56 | 57 | register_shape_calculator( 58 | "SklearnAdaBoostRegressor", calculate_linear_regressor_output_shapes 59 | ) 60 | register_shape_calculator( 61 | "SklearnBaggingRegressor", calculate_linear_regressor_output_shapes 62 | ) 63 | register_shape_calculator( 64 | "SklearnBayesianRidge", calculate_bayesian_ridge_output_shapes 65 | ) 66 | register_shape_calculator( 67 | "SklearnLinearRegressor", calculate_linear_regressor_output_shapes 68 | ) 69 | register_shape_calculator("SklearnLinearSVR", calculate_linear_regressor_output_shapes) 70 | register_shape_calculator( 71 | "SklearnMLPRegressor", calculate_linear_regressor_output_shapes 72 | ) 73 | register_shape_calculator( 74 | "SklearnPoissonRegressor", calculate_linear_regressor_output_shapes 75 | ) 76 | register_shape_calculator( 77 | "SklearnRANSACRegressor", calculate_linear_regressor_output_shapes 78 | ) 79 | register_shape_calculator( 80 | "SklearnStackingRegressor", calculate_linear_regressor_output_shapes 81 | ) 82 | register_shape_calculator( 83 | "SklearnTweedieRegressor", calculate_linear_regressor_output_shapes 84 | ) 85 | register_shape_calculator( 86 | "SklearnGammaRegressor", calculate_linear_regressor_output_shapes 87 | ) 88 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/local_outlier_factor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.data_types import Int64TensorType 5 | 6 | 7 | def calculate_local_outlier_factor_output_shapes(operator): 8 | N = operator.inputs[0].get_first_dimension() 9 | operator.outputs[0].type = Int64TensorType([N, 1]) 10 | operator.outputs[1].type.shape = [N, 1] 11 | 12 | 13 | register_shape_calculator( 14 | "SklearnLocalOutlierFactor", calculate_local_outlier_factor_output_shapes 15 | ) 16 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/mixture.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 6 | from ..common.utils import check_input_and_output_numbers, check_input_and_output_types 7 | 8 | 9 | def calculate_gaussian_mixture_output_shapes(operator): 10 | check_input_and_output_numbers( 11 | operator, input_count_range=1, output_count_range=[2, 3] 12 | ) 13 | check_input_and_output_types( 14 | operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] 15 | ) 16 | 17 | if len(operator.inputs[0].type.shape) != 2: 18 | raise RuntimeError("Input must be a [N, C]-tensor") 19 | 20 | op = operator.raw_operator 21 | N = operator.inputs[0].get_first_dimension() 22 | operator.outputs[0].type = Int64TensorType([N, 1]) 23 | operator.outputs[1].type.shape = [N, op.n_components] 24 | if len(operator.outputs) > 2: 25 | operator.outputs[2].type.shape = [N, 1] 26 | 27 | 28 | register_shape_calculator( 29 | "SklearnGaussianMixture", calculate_gaussian_mixture_output_shapes 30 | ) 31 | register_shape_calculator( 32 | "SklearnBayesianGaussianMixture", calculate_gaussian_mixture_output_shapes 33 | ) 34 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/multioutput.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | from ..common.data_types import SequenceType 7 | 8 | _stack = [] 9 | 10 | 11 | def multioutput_regressor_shape_calculator(operator): 12 | """Shape calculator for MultiOutputRegressor""" 13 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 14 | i = operator.inputs[0] 15 | o = operator.outputs[0] 16 | N = i.get_first_dimension() 17 | C = len(operator.raw_operator.estimators_) 18 | o.type = o.type.__class__([N, C]) 19 | 20 | 21 | def multioutput_classifier_shape_calculator(operator): 22 | """Shape calculator for MultiOutputClassifier""" 23 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=2) 24 | if not isinstance(operator.outputs[1].type, SequenceType): 25 | raise RuntimeError( 26 | "Probabilites should be a sequence not %r." % operator.outputs[1].type 27 | ) 28 | i = operator.inputs[0] 29 | outputs = operator.outputs 30 | N = i.get_first_dimension() 31 | C = len(operator.raw_operator.estimators_) 32 | outputs[0].type.shape = [N, C] 33 | 34 | 35 | register_shape_calculator( 36 | "SklearnMultiOutputRegressor", multioutput_regressor_shape_calculator 37 | ) 38 | register_shape_calculator( 39 | "SklearnMultiOutputClassifier", multioutput_classifier_shape_calculator 40 | ) 41 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/multiply.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import copy 3 | 4 | from ..common._registration import register_shape_calculator 5 | 6 | 7 | def calculate_sklearn_multiply(operator): 8 | for variable, output in zip(operator.inputs, operator.outputs): 9 | output.type = copy.copy(variable.type) 10 | 11 | 12 | register_shape_calculator("SklearnMultiply", calculate_sklearn_multiply) 13 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/one_hot_encoder.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import numpy as np 5 | from ..common._registration import register_shape_calculator 6 | from ..common.data_types import FloatTensorType, Int64TensorType 7 | 8 | 9 | def calculate_sklearn_one_hot_encoder_output_shapes(operator): 10 | op = operator.raw_operator 11 | categories_len = 0 12 | for index, categories in enumerate(op.categories_): 13 | if hasattr(op, "drop_idx_") and op.drop_idx_ is not None: 14 | categories = categories[np.arange(len(categories)) != op.drop_idx_[index]] 15 | categories_len += len(categories) 16 | instances = operator.inputs[0].get_first_dimension() 17 | if np.issubdtype(op.dtype, np.signedinteger): 18 | operator.outputs[0].type = Int64TensorType([instances, categories_len]) 19 | else: 20 | operator.outputs[0].type = FloatTensorType([instances, categories_len]) 21 | 22 | 23 | register_shape_calculator( 24 | "SklearnOneHotEncoder", calculate_sklearn_one_hot_encoder_output_shapes 25 | ) 26 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/one_vs_one_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.shape_calculator import calculate_linear_classifier_output_shapes 5 | 6 | 7 | register_shape_calculator( 8 | "SklearnOneVsOneClassifier", calculate_linear_classifier_output_shapes 9 | ) 10 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/one_vs_rest_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.data_types import Int64TensorType 5 | from ..common.shape_calculator import calculate_linear_classifier_output_shapes 6 | 7 | 8 | def calculate_constant_predictor_output_shapes(operator): 9 | N = operator.inputs[0].get_first_dimension() 10 | operator.outputs[0].type = Int64TensorType([N]) 11 | operator.outputs[1].type.shape = [N, 2] 12 | 13 | 14 | register_shape_calculator( 15 | "Sklearn_ConstantPredictor", calculate_constant_predictor_output_shapes 16 | ) 17 | 18 | register_shape_calculator( 19 | "SklearnOneVsRestClassifier", calculate_linear_classifier_output_shapes 20 | ) 21 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/ordinal_encoder.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import numpy as np 5 | from ..common._registration import register_shape_calculator 6 | from ..common.data_types import Int64TensorType, FloatTensorType 7 | 8 | 9 | def calculate_sklearn_ordinal_encoder_output_shapes(operator): 10 | ordinal_op = operator.raw_operator 11 | op_features = sum(list(map(lambda x: x.type.shape[1], operator.inputs))) 12 | if np.issubdtype(ordinal_op.dtype, np.floating): 13 | operator.outputs[0].type = FloatTensorType( 14 | [operator.inputs[0].get_first_dimension(), op_features] 15 | ) 16 | else: 17 | operator.outputs[0].type = Int64TensorType( 18 | [operator.inputs[0].get_first_dimension(), op_features] 19 | ) 20 | 21 | 22 | register_shape_calculator( 23 | "SklearnOrdinalEncoder", calculate_sklearn_ordinal_encoder_output_shapes 24 | ) 25 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/ovr_decision_function.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | 6 | 7 | def calculate_sklearn_ovr_decision_function(operator): 8 | N = operator.inputs[0].get_first_dimension() 9 | operator.outputs[0].type = operator.inputs[0].type.__class__( 10 | [N, len(operator.raw_operator.classes_)] 11 | ) 12 | 13 | 14 | register_shape_calculator( 15 | "SklearnOVRDecisionFunction", calculate_sklearn_ovr_decision_function 16 | ) 17 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/pipelines.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | 5 | 6 | def pipeline_shape_calculator(operator): 7 | pass 8 | 9 | 10 | def feature_union_shape_calculator(operator): 11 | pass 12 | 13 | 14 | def column_transformer_shape_calculator(operator): 15 | pass 16 | 17 | 18 | register_shape_calculator("SklearnPipeline", pipeline_shape_calculator) 19 | register_shape_calculator("SklearnFeatureUnion", feature_union_shape_calculator) 20 | register_shape_calculator( 21 | "SklearnColumnTransformer", column_transformer_shape_calculator 22 | ) 23 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/polynomial_features.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import copy 5 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 6 | from ..common._registration import register_shape_calculator 7 | from ..common.utils import check_input_and_output_numbers 8 | from ..common.utils import check_input_and_output_types 9 | 10 | 11 | def calculate_sklearn_polynomial_features(operator): 12 | check_input_and_output_numbers(operator, output_count_range=1) 13 | check_input_and_output_types( 14 | operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] 15 | ) 16 | 17 | N = operator.inputs[0].get_first_dimension() 18 | model = operator.raw_operator 19 | operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type) 20 | operator.outputs[0].type.shape = [N, model.n_output_features_] 21 | 22 | 23 | register_shape_calculator( 24 | "SklearnPolynomialFeatures", calculate_sklearn_polynomial_features 25 | ) 26 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/power_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType 6 | 7 | 8 | def powertransformer_shape_calculator(operator): 9 | """Shape calculator for PowerTransformer""" 10 | inputs = operator.inputs[0] 11 | output = operator.outputs[0] 12 | n, c = inputs.type.shape 13 | output.type = FloatTensorType([n, c]) 14 | 15 | 16 | register_shape_calculator("SklearnPowerTransformer", powertransformer_shape_calculator) 17 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/quadratic_discriminant_analysis.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.data_types import Int64TensorType, StringTensorType 5 | 6 | 7 | def calculate_quadratic_discriminant_analysis_shapes(operator): 8 | classes = operator.raw_operator.classes_ 9 | if all((isinstance(s, str)) for s in classes): 10 | label_tensor_type = StringTensorType 11 | else: 12 | label_tensor_type = Int64TensorType 13 | 14 | n_clasess = len(classes) 15 | operator.outputs[0].type = label_tensor_type([1, None]) 16 | operator.outputs[1].type.shape = [None, n_clasess] 17 | 18 | 19 | register_shape_calculator( 20 | "SklearnQuadraticDiscriminantAnalysis", 21 | calculate_quadratic_discriminant_analysis_shapes, 22 | ) 23 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/quantile_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import copy 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers, check_input_and_output_types 6 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 7 | 8 | 9 | def quantile_transformer_shape_calculator(operator): 10 | """Shape calculator for QuantileTransformer""" 11 | check_input_and_output_numbers(operator, output_count_range=1) 12 | check_input_and_output_types( 13 | operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] 14 | ) 15 | 16 | N = operator.inputs[0].get_first_dimension() 17 | model = operator.raw_operator 18 | operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type) 19 | operator.outputs[0].type.shape = [N, model.quantiles_.shape[1]] 20 | 21 | 22 | register_shape_calculator( 23 | "SklearnQuantileTransformer", quantile_transformer_shape_calculator 24 | ) 25 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/random_projection.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | 6 | 7 | def random_projection_shape_calculator(operator): 8 | """Shape calculator for PowerTransformer""" 9 | inputs = operator.inputs[0] 10 | op = operator.raw_operator 11 | n = inputs.get_first_dimension() 12 | c = op.components_.shape[0] 13 | operator.outputs[0].type.shape = [n, c] 14 | 15 | 16 | register_shape_calculator( 17 | "SklearnGaussianRandomProjection", random_projection_shape_calculator 18 | ) 19 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/random_trees_embedding.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import numpy as np 5 | from ..common._registration import register_shape_calculator 6 | from ..common.data_types import FloatTensorType, Int64TensorType 7 | 8 | 9 | def calculate_sklearn_random_trees_embedding_output_shapes(operator): 10 | op = operator.raw_operator.one_hot_encoder_ 11 | categories_len = 0 12 | for index, categories in enumerate(op.categories_): 13 | if hasattr(op, "drop_idx_") and op.drop_idx_ is not None: 14 | categories = categories[np.arange(len(categories)) != op.drop_idx_[index]] 15 | categories_len += len(categories) 16 | instances = operator.inputs[0].get_first_dimension() 17 | if np.issubdtype(op.dtype, np.signedinteger): 18 | operator.outputs[0].type = Int64TensorType([instances, categories_len]) 19 | else: 20 | operator.outputs[0].type = FloatTensorType([instances, categories_len]) 21 | 22 | 23 | register_shape_calculator( 24 | "SklearnRandomTreesEmbedding", 25 | calculate_sklearn_random_trees_embedding_output_shapes, 26 | ) 27 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/replace_op.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_replace_transformer(operator): 9 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 10 | operator.outputs[0].type = operator.inputs[0].type 11 | 12 | 13 | register_shape_calculator( 14 | "SklearnReplaceTransformer", calculate_sklearn_replace_transformer 15 | ) 16 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/scaler.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import numbers 5 | from ..common._registration import register_shape_calculator 6 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 7 | from ..common.utils import check_input_and_output_numbers 8 | from ..common.utils import check_input_and_output_types 9 | 10 | 11 | def calculate_sklearn_scaler_output_shapes(operator): 12 | """ 13 | Allowed input/output patterns are 14 | 1. [N, C_1], ..., [N, C_n] ---> [N, C_1 + ... + C_n] 15 | 16 | Similar to imputer, this operator can take multiple input feature 17 | tensors and concatenate them along C-axis. 18 | """ 19 | check_input_and_output_numbers( 20 | operator, input_count_range=[1, None], output_count_range=1 21 | ) 22 | check_input_and_output_types( 23 | operator, 24 | good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType], 25 | good_output_types=[FloatTensorType, DoubleTensorType], 26 | ) 27 | # Inputs: multiple float- and integer-tensors 28 | # Output: one float tensor 29 | for variable in operator.inputs: 30 | if len({variable.get_first_dimension() for variable in operator.inputs}) > 1: 31 | raise RuntimeError("Batch size must be identical across inputs.") 32 | 33 | N = operator.inputs[0].get_first_dimension() 34 | C = 0 35 | for variable in operator.inputs: 36 | c = variable.get_second_dimension() 37 | if isinstance(c, numbers.Integral): 38 | C += c 39 | else: 40 | C = None 41 | break 42 | 43 | operator.outputs[0].type.shape = [N, C] 44 | 45 | 46 | register_shape_calculator("SklearnRobustScaler", calculate_sklearn_scaler_output_shapes) 47 | register_shape_calculator("SklearnScaler", calculate_sklearn_scaler_output_shapes) 48 | register_shape_calculator("SklearnNormalizer", calculate_sklearn_scaler_output_shapes) 49 | register_shape_calculator("SklearnMinMaxScaler", calculate_sklearn_scaler_output_shapes) 50 | register_shape_calculator("SklearnMaxAbsScaler", calculate_sklearn_scaler_output_shapes) 51 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/sequence.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | 5 | 6 | def calculate_sklearn_sequence_at(operator): 7 | pass 8 | 9 | 10 | def calculate_sklearn_sequence_construct(operator): 11 | pass 12 | 13 | 14 | register_shape_calculator("SklearnSequenceAt", calculate_sklearn_sequence_at) 15 | register_shape_calculator( 16 | "SklearnSequenceConstruct", calculate_sklearn_sequence_construct 17 | ) 18 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/sgd_oneclass_svm.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.data_types import Int64TensorType 5 | 6 | 7 | def calculate_sgd_oneclass_svm_output_shapes(operator): 8 | N = operator.inputs[0].get_first_dimension() 9 | operator.outputs[0].type = Int64TensorType( 10 | [ 11 | N, 12 | ] 13 | ) 14 | operator.outputs[1].type.shape = [ 15 | N, 16 | ] 17 | 18 | 19 | register_shape_calculator( 20 | "SklearnSGDOneClassSVM", calculate_sgd_oneclass_svm_output_shapes 21 | ) 22 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/svd.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType 6 | from ..common.utils import check_input_and_output_numbers 7 | from ..common.utils import check_input_and_output_types 8 | 9 | 10 | def calculate_sklearn_truncated_svd_output_shapes(operator): 11 | """ 12 | Allowed input/output patterns are 13 | 1. [N, C] ---> [N, K] 14 | 15 | Transform feature dimension from C to K 16 | """ 17 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 18 | check_input_and_output_types( 19 | operator, 20 | good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType], 21 | good_output_types=[FloatTensorType, DoubleTensorType], 22 | ) 23 | 24 | if len(operator.inputs[0].type.shape) != 2: 25 | raise RuntimeError("Only 2-D tensor(s) can be input(s).") 26 | 27 | cls_type = operator.inputs[0].type.__class__ 28 | if cls_type != DoubleTensorType: 29 | cls_type = FloatTensorType 30 | N = operator.inputs[0].get_first_dimension() 31 | K = ( 32 | operator.raw_operator.n_components 33 | if operator.type == "SklearnTruncatedSVD" 34 | else operator.raw_operator.n_components_ 35 | ) 36 | 37 | operator.outputs[0].type = cls_type([N, K]) 38 | 39 | 40 | register_shape_calculator( 41 | "SklearnIncrementalPCA", calculate_sklearn_truncated_svd_output_shapes 42 | ) 43 | register_shape_calculator("SklearnPCA", calculate_sklearn_truncated_svd_output_shapes) 44 | register_shape_calculator( 45 | "SklearnTruncatedSVD", calculate_sklearn_truncated_svd_output_shapes 46 | ) 47 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/target_encoder.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.data_types import FloatTensorType 6 | from ..common.data_types import Int64TensorType, StringTensorType 7 | from ..common.utils import check_input_and_output_numbers 8 | from ..common.utils import check_input_and_output_types 9 | 10 | 11 | def calculate_sklearn_target_encoder_output_shapes(operator): 12 | """ 13 | This function just copy the input shape to the output because target 14 | encoder only alters input features' values, not their shape. 15 | """ 16 | check_input_and_output_numbers(operator, output_count_range=1) 17 | check_input_and_output_types( 18 | operator, good_input_types=[FloatTensorType, Int64TensorType, StringTensorType] 19 | ) 20 | 21 | N = operator.inputs[0].get_first_dimension() 22 | shape = [N, len(operator.raw_operator.categories_)] 23 | 24 | operator.outputs[0].type = FloatTensorType(shape=shape) 25 | 26 | 27 | register_shape_calculator( 28 | "SklearnTargetEncoder", calculate_sklearn_target_encoder_output_shapes 29 | ) 30 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/text_vectorizer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_text_vectorizer_output_shapes(operator): 9 | """ 10 | Allowed input/output patterns are 11 | 1. Map ---> [1, C] 12 | 13 | C is the total number of allowed keys in the input dictionary. 14 | """ 15 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 16 | 17 | C = max(operator.raw_operator.vocabulary_.values()) + 1 18 | operator.outputs[0].type.shape = [None, C] 19 | 20 | 21 | register_shape_calculator( 22 | "SklearnCountVectorizer", calculate_sklearn_text_vectorizer_output_shapes 23 | ) 24 | register_shape_calculator( 25 | "SklearnTfidfVectorizer", calculate_sklearn_text_vectorizer_output_shapes 26 | ) 27 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/tfidf_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | 7 | 8 | def calculate_sklearn_tfidf_transformer_output_shapes(operator): 9 | check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1) 10 | C = operator.inputs[0].type.shape[1] 11 | operator.outputs[0].type.shape = [1, C] 12 | 13 | 14 | register_shape_calculator( 15 | "SklearnTfidfTransformer", calculate_sklearn_tfidf_transformer_output_shapes 16 | ) 17 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/tuned_threshold_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.utils import check_input_and_output_numbers 5 | from ..common.shape_calculator import _infer_linear_classifier_output_types 6 | 7 | 8 | def tuned_threshold_classifier_shape_calculator(operator): 9 | check_input_and_output_numbers(operator, output_count_range=2) 10 | 11 | _infer_linear_classifier_output_types(operator) 12 | 13 | 14 | register_shape_calculator( 15 | "SklearnTunedThresholdClassifierCV", tuned_threshold_classifier_shape_calculator 16 | ) 17 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/voting_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from ..common._registration import register_shape_calculator 4 | from ..common.utils import check_input_and_output_numbers 5 | from ..common.shape_calculator import _infer_linear_classifier_output_types 6 | 7 | 8 | def voting_classifier_shape_calculator(operator): 9 | check_input_and_output_numbers(operator, output_count_range=2) 10 | 11 | _infer_linear_classifier_output_types(operator) 12 | 13 | 14 | register_shape_calculator("SklearnVotingClassifier", voting_classifier_shape_calculator) 15 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/voting_regressor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | from ..common.utils import check_input_and_output_numbers 6 | from ..common.shape_calculator import _infer_linear_regressor_output_types 7 | 8 | 9 | def voting_regressor_shape_calculator(operator): 10 | check_input_and_output_numbers(operator, output_count_range=1) 11 | return _infer_linear_regressor_output_types(operator) 12 | 13 | 14 | register_shape_calculator("SklearnVotingRegressor", voting_regressor_shape_calculator) 15 | -------------------------------------------------------------------------------- /skl2onnx/shape_calculators/zip_map.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from ..common._registration import register_shape_calculator 5 | 6 | 7 | def calculate_sklearn_zipmap(operator): 8 | if len(operator.inputs) != len(operator.outputs) or len(operator.inputs) not in ( 9 | 1, 10 | 2, 11 | ): 12 | raise RuntimeError( 13 | "SklearnZipMap expects the same number of inputs and outputs." 14 | ) 15 | if len(operator.inputs) == 2: 16 | operator.outputs[0].type = operator.inputs[0].type.__class__( 17 | operator.inputs[0].type.shape 18 | ) 19 | if operator.outputs[1].type is not None: 20 | operator.outputs[1].type.element_type.value_type = operator.inputs[ 21 | 1 22 | ].type.__class__([]) 23 | 24 | 25 | def calculate_sklearn_zipmap_columns(operator): 26 | N = operator.inputs[0].get_first_dimension() 27 | operator.outputs[0].type = operator.inputs[0].type.__class__( 28 | operator.inputs[0].type.shape 29 | ) 30 | for i in range(1, len(operator.outputs)): 31 | operator.outputs[i].type.shape = [N] 32 | 33 | 34 | register_shape_calculator("SklearnZipMap", calculate_sklearn_zipmap) 35 | register_shape_calculator("SklearnZipMapColumns", calculate_sklearn_zipmap_columns) 36 | -------------------------------------------------------------------------------- /skl2onnx/sklapi/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from .cast_transformer import CastTransformer 5 | from .cast_regressor import CastRegressor 6 | from .replace_transformer import ReplaceTransformer 7 | from .sklearn_text import TraceableCountVectorizer, TraceableTfidfVectorizer 8 | from .woe_transformer import WOETransformer 9 | -------------------------------------------------------------------------------- /skl2onnx/sklapi/cast_regressor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from sklearn.base import RegressorMixin, BaseEstimator 5 | 6 | try: 7 | from sklearn.utils.validation import _deprecate_positional_args 8 | except ImportError: 9 | 10 | def _deprecate_positional_args(x): 11 | return x 12 | 13 | 14 | class CastRegressor(RegressorMixin, BaseEstimator): 15 | """ 16 | Cast predictions into a specific types. 17 | This should be used to minimize the conversion 18 | of a pipeline using float32 instead of double 19 | when onnx do not support double. 20 | 21 | Parameters 22 | ---------- 23 | estimator : regressor 24 | wrapped regressor 25 | dtype : numpy type, 26 | output are cast into that type 27 | """ 28 | 29 | @_deprecate_positional_args 30 | def __init__(self, estimator, *, dtype=np.float32): 31 | self.dtype = dtype 32 | self.estimator = estimator 33 | 34 | def _cast(self, a, name): 35 | try: 36 | a2 = a.astype(self.dtype) 37 | except ValueError as e: 38 | raise ValueError( 39 | "Unable to cast {} from {} into {}.".format(name, a.dtype, self.dtype) 40 | ) from e 41 | return a2 42 | 43 | def fit(self, X, y=None, sample_weight=None): 44 | """ 45 | Does nothing except checking *dtype* may be applied. 46 | """ 47 | self.estimator.fit(X, y=y, sample_weight=sample_weight) 48 | return self 49 | 50 | def predict(self, X, y=None): 51 | """ 52 | Predicts and casts the prediction. 53 | """ 54 | return self._cast(self.estimator.predict(X), "predict(X)") 55 | 56 | def decision_function(self, X, y=None): 57 | """ 58 | Calls *decision_function* and casts the outputs. 59 | """ 60 | if not hasattr(self.estimator, "decision_function"): 61 | raise AttributeError( 62 | "%r object has no attribute 'decision_function'." 63 | % self.estimator.__class__.__name__ 64 | ) 65 | return self._cast(self.estimator.decision_function(X), "decision_function(X)") 66 | -------------------------------------------------------------------------------- /skl2onnx/sklapi/cast_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from sklearn.base import TransformerMixin, BaseEstimator 5 | 6 | try: 7 | from sklearn.utils.validation import _deprecate_positional_args 8 | except ImportError: 9 | 10 | def _deprecate_positional_args(x): 11 | return x 12 | 13 | 14 | class CastTransformer(TransformerMixin, BaseEstimator): 15 | """ 16 | Cast features into a specific types. 17 | This should be used to minimize the conversion 18 | of a pipeline using float32 instead of double. 19 | 20 | Parameters 21 | ---------- 22 | dtype : numpy type, 23 | output are cast into that type 24 | """ 25 | 26 | @_deprecate_positional_args 27 | def __init__(self, *, dtype=np.float32): 28 | self.dtype = dtype 29 | 30 | def _cast(self, a, name): 31 | if not isinstance(a, np.ndarray): 32 | if hasattr(a, "values") and hasattr(a, "iloc"): 33 | # dataframe 34 | a = a.values 35 | elif not hasattr(a, "astype"): 36 | raise TypeError("{} must be a numpy array or a dataframe.".format(name)) 37 | try: 38 | a2 = a.astype(self.dtype) 39 | except ValueError as e: 40 | raise ValueError( 41 | "Unable to cast {} from {} into {}.".format(name, a.dtype, self.dtype) 42 | ) from e 43 | return a2 44 | 45 | def fit(self, X, y=None, sample_weight=None): 46 | """ 47 | Does nothing except checking *dtype* may be applied. 48 | """ 49 | self._cast(X, "X") 50 | return self 51 | 52 | def transform(self, X, y=None): 53 | """ 54 | Casts array X. 55 | """ 56 | return self._cast(X, "X") 57 | -------------------------------------------------------------------------------- /skl2onnx/sklapi/register.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from .sklearn_text_onnx import register as register_text 4 | from .woe_transformer_onnx import register as register_woe 5 | 6 | 7 | register_text() 8 | register_woe() 9 | -------------------------------------------------------------------------------- /skl2onnx/sklapi/replace_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | from sklearn.base import TransformerMixin, BaseEstimator 5 | 6 | try: 7 | from sklearn.utils.validation import _deprecate_positional_args 8 | except ImportError: 9 | 10 | def _deprecate_positional_args(x): 11 | return x 12 | 13 | 14 | class ReplaceTransformer(TransformerMixin, BaseEstimator): 15 | """ 16 | Replaces a value by another one. 17 | It can be used to replace 0 by nan. 18 | 19 | Parameters 20 | ---------- 21 | from_value : value to replace 22 | to_value : new value 23 | dtype: dtype of replaced values 24 | """ 25 | 26 | @_deprecate_positional_args 27 | def __init__(self, *, from_value=0, to_value=np.nan, dtype=np.float32): 28 | BaseEstimator.__init__(self) 29 | self.dtype = dtype 30 | self.from_value = from_value 31 | self.to_value = to_value 32 | 33 | def _replace(self, a): 34 | if hasattr(a, "todense"): 35 | if np.isnan(self.to_value) and self.from_value == 0: 36 | # implicit 37 | return a 38 | raise RuntimeError( 39 | "Unable to replace 0 by nan one value by another in sparse matrix." 40 | ) 41 | return np.where(a == self.from_value, self.to_value, a) 42 | 43 | def fit(self, X, y=None, sample_weight=None): 44 | """ 45 | Does nothing except checking *dtype* may be applied. 46 | """ 47 | self._replace(X) 48 | return self 49 | 50 | def transform(self, X, y=None): 51 | """ 52 | Casts array X. 53 | """ 54 | return self._replace(X) 55 | -------------------------------------------------------------------------------- /skl2onnx/sklapi/sklearn_text_onnx.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from .. import update_registered_converter 4 | from ..shape_calculators.text_vectorizer import ( 5 | calculate_sklearn_text_vectorizer_output_shapes, 6 | ) 7 | from ..operator_converters.text_vectoriser import convert_sklearn_text_vectorizer 8 | from ..operator_converters.tfidf_vectoriser import convert_sklearn_tfidf_vectoriser 9 | from .sklearn_text import TraceableCountVectorizer, TraceableTfidfVectorizer 10 | 11 | 12 | def register(): 13 | """Register converter for TraceableCountVectorizer, 14 | TraceableTfidfVectorizer.""" 15 | update_registered_converter( 16 | TraceableCountVectorizer, 17 | "Skl2onnxTraceableCountVectorizer", 18 | calculate_sklearn_text_vectorizer_output_shapes, 19 | convert_sklearn_text_vectorizer, 20 | options={ 21 | "tokenexp": None, 22 | "separators": None, 23 | "nan": [True, False], 24 | "keep_empty_string": [True, False], 25 | "locale": None, 26 | }, 27 | ) 28 | 29 | update_registered_converter( 30 | TraceableTfidfVectorizer, 31 | "Skl2onnxTraceableTfidfVectorizer", 32 | calculate_sklearn_text_vectorizer_output_shapes, 33 | convert_sklearn_tfidf_vectoriser, 34 | options={ 35 | "tokenexp": None, 36 | "separators": None, 37 | "nan": [True, False], 38 | "keep_empty_string": [True, False], 39 | "locale": None, 40 | }, 41 | ) 42 | -------------------------------------------------------------------------------- /skl2onnx/tutorial/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Shortcuts to *tutorial*. 5 | """ 6 | 7 | from .benchmark import measure_time 8 | -------------------------------------------------------------------------------- /skl2onnx/tutorial/benchmark.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tools to help benchmarking. 5 | """ 6 | 7 | from timeit import Timer 8 | import numpy 9 | 10 | 11 | def measure_time(stmt, context, repeat=10, number=50, div_by_number=False): 12 | """ 13 | Measures a statement and returns the results as a dictionary. 14 | 15 | :param stmt: string 16 | :param context: variable to know in a dictionary 17 | :param repeat: average over *repeat* experiment 18 | :param number: number of executions in one row 19 | :param div_by_number: divide by the number of executions 20 | :return: dictionary 21 | 22 | .. runpython:: 23 | :showcode: 24 | 25 | from skl2onnx.tutorial import measure_time 26 | from math import cos 27 | 28 | res = measure_time("cos(x)", context=dict(cos=cos, x=5.)) 29 | print(res) 30 | 31 | See `Timer.repeat `_ 33 | for a better understanding of parameter *repeat* and *number*. 34 | The function returns a duration corresponding to 35 | *number* times the execution of the main statement. 36 | """ 37 | tim = Timer(stmt, globals=context) 38 | res = numpy.array(tim.repeat(repeat=repeat, number=number)) 39 | if div_by_number: 40 | res /= number 41 | mean = numpy.mean(res) 42 | dev = numpy.mean(res**2) 43 | dev = (dev - mean**2) ** 0.5 44 | mes = dict( 45 | average=mean, 46 | deviation=dev, 47 | min_exec=numpy.min(res), 48 | max_exec=numpy.max(res), 49 | repeat=repeat, 50 | number=number, 51 | ) 52 | return mes 53 | -------------------------------------------------------------------------------- /tests/benchmark.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | You can run this file with to get a report on every tested model conversion. 5 | 6 | :: 7 | 8 | python -u tests/benchmark.py 9 | 10 | Folder contains the model to compare implemented as unit tests. 11 | """ 12 | 13 | import os 14 | import sys 15 | import unittest 16 | import warnings 17 | 18 | 19 | def run_all_tests(folder=None, verbose=True): 20 | """ 21 | Runs all unit tests or unit tests specific to one library. 22 | The tests produce a series of files dumped into ``folder`` 23 | which can be later used to tests a backend (or a runtime). 24 | 25 | :param folder: where to put the dumped files 26 | :param verbose: verbose 27 | """ 28 | if folder is None: 29 | folder = "TESTDUMP" 30 | os.environ["ONNXTESTDUMP"] = folder 31 | os.environ["ONNXTESTDUMPERROR"] = "1" 32 | os.environ["ONNXTESTBENCHMARK"] = "1" 33 | 34 | if verbose: 35 | print("[benchmark] look into '{0}'".format(folder)) 36 | 37 | try: 38 | import onnxmltools # noqa: F401 39 | except ImportError: 40 | warnings.warn("Cannot import onnxmltools. Some tests won't work.") 41 | 42 | this = os.path.abspath(os.path.dirname(__file__)) 43 | subs = [this] 44 | loader = unittest.TestLoader() 45 | suites = [] 46 | 47 | for sub in subs: 48 | fold = os.path.join(this, sub) 49 | if not os.path.exists(fold): 50 | raise FileNotFoundError("Unable to find '{0}'".format(fold)) 51 | 52 | # ts = loader.discover(fold) 53 | sys.path.append(fold) 54 | names = [_ for _ in os.listdir(fold) if _.startswith("test")] 55 | for name in names: 56 | name = os.path.splitext(name)[0] 57 | ts = loader.loadTestsFromName(name) 58 | suites.append(ts) 59 | index = sys.path.index(fold) 60 | del sys.path[index] 61 | 62 | with warnings.catch_warnings(): 63 | warnings.filterwarnings(category=DeprecationWarning, action="ignore") 64 | warnings.filterwarnings(category=FutureWarning, action="ignore") 65 | runner = unittest.TextTestRunner() 66 | for tsi, ts in enumerate(suites): 67 | for k in ts: 68 | try: 69 | for t in k: 70 | print(t.__class__.__name__) 71 | break 72 | except TypeError as e: 73 | raise RuntimeError("Unable to run test '{}'.".format(ts)) from e 74 | runner.run(ts) 75 | 76 | from test_utils.tests_helper import make_report_backend 77 | 78 | df = make_report_backend(folder, as_df=True) 79 | 80 | from pandas import set_option 81 | 82 | set_option("display.max_columns", None) 83 | set_option("display.max_rows", None) 84 | exfile = os.path.join(folder, "report_backend.xlsx") 85 | df.to_excel(exfile) 86 | if verbose: 87 | print("[benchmark] wrote report in '{0}'".format(exfile)) 88 | return df 89 | 90 | 91 | if __name__ == "__main__": 92 | folder = None if len(sys.argv) < 2 else sys.argv[1] 93 | run_all_tests(folder=folder) 94 | -------------------------------------------------------------------------------- /tests/datasets/treecl.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/tests/datasets/treecl.onnx -------------------------------------------------------------------------------- /tests/datasets/treecl2.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/tests/datasets/treecl2.onnx -------------------------------------------------------------------------------- /tests/datasets/treecl3.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onnx/sklearn-onnx/eaac0e13333962a2391a33c9d5192e382b7a985d/tests/datasets/treecl3.onnx -------------------------------------------------------------------------------- /tests/test_algebra_complex.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_almost_equal 4 | from onnxruntime import InferenceSession 5 | 6 | try: 7 | from onnxruntime.capi.onnxruntime_pybind11_state import ( 8 | InvalidGraph, 9 | Fail, 10 | InvalidArgument, 11 | ) 12 | except ImportError: 13 | InvalidGraph = RuntimeError 14 | InvalidArgument = RuntimeError 15 | Fail = RuntimeError 16 | from skl2onnx.common.data_types import Complex64TensorType, Complex128TensorType 17 | from skl2onnx.algebra.onnx_ops import OnnxAdd 18 | from test_utils import TARGET_OPSET 19 | 20 | 21 | class TestAlgebraComplex(unittest.TestCase): 22 | @unittest.skipIf(Complex64TensorType is None, reason="not available") 23 | @unittest.skipIf(TARGET_OPSET < 13, reason="not implemented") 24 | def test_complex(self): 25 | for dt, var, pr in ( 26 | (np.complex64, Complex64TensorType, 14), 27 | (np.complex128, Complex128TensorType, 15), 28 | ): 29 | X = np.array([[1 - 2j, -12j], [-1 - 2j, 1 + 2j]]).astype(dt) 30 | 31 | for opv in range(10, 20): 32 | if opv > TARGET_OPSET: 33 | continue 34 | with self.subTest(dt=dt, opset=opv): 35 | out = OnnxAdd( 36 | "X", 37 | np.array([1 + 2j], dtype=dt), 38 | output_names=["Y"], 39 | op_version=opv, 40 | ) 41 | onx = out.to_onnx( 42 | [("X", var((None, 2)))], 43 | outputs=[("Y", var())], 44 | target_opset=opv, 45 | ) 46 | self.assertIn("elem_type: %d" % pr, str(onx)) 47 | 48 | try: 49 | ort = InferenceSession( 50 | onx.SerializeToString(), providers=["CPUExecutionProvider"] 51 | ) 52 | except InvalidGraph as e: 53 | if "Type Error: Type 'tensor(complex" in str(e): 54 | continue 55 | raise e 56 | assert ort is not None 57 | got = ort.run(None, {"X": X})[0] 58 | assert_almost_equal(X + np.array([1 + 2j]), got) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /tests/test_algebra_double.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import unittest 4 | import packaging.version as pv 5 | import numpy 6 | from numpy.testing import assert_almost_equal 7 | from skl2onnx.algebra.onnx_ops import OnnxMatMul, OnnxSub 8 | import onnxruntime 9 | from onnxruntime import InferenceSession 10 | from test_utils import TARGET_OPSET 11 | 12 | 13 | class TestAlgebraDouble(unittest.TestCase): 14 | @unittest.skipIf(TARGET_OPSET < 10, reason="not available") 15 | @unittest.skipIf( 16 | pv.Version(onnxruntime.__version__) <= pv.Version("0.4.0"), 17 | reason="Sub(7) not available", 18 | ) 19 | def test_algebra_converter(self): 20 | coef = numpy.array([[1, 2], [3, 4]], dtype=numpy.float64) 21 | intercept = 1 22 | X_test = numpy.array([[1, -2], [3, -4]], dtype=numpy.float64) 23 | 24 | onnx_fct = OnnxSub( 25 | OnnxMatMul("X", coef, op_version=TARGET_OPSET), 26 | numpy.array([intercept], dtype=numpy.float64), 27 | output_names=["Y"], 28 | op_version=TARGET_OPSET, 29 | ) 30 | onnx_model = onnx_fct.to_onnx({"X": X_test}, target_opset=TARGET_OPSET) 31 | 32 | sess = InferenceSession( 33 | onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] 34 | ) 35 | ort_pred = sess.run(None, {"X": X_test})[0] 36 | assert_almost_equal(ort_pred, numpy.array([[-6.0, -7.0], [-10.0, -11.0]])) 37 | 38 | 39 | if __name__ == "__main__": 40 | unittest.main() 41 | -------------------------------------------------------------------------------- /tests/test_algebra_onnx_doc.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import unittest 4 | import sys 5 | import numpy as np 6 | from numpy.testing import assert_almost_equal 7 | import onnx 8 | from skl2onnx.algebra.onnx_ops import dynamic_class_creation 9 | from skl2onnx.algebra.automation import get_rst_doc_sklearn 10 | from test_utils import TARGET_OPSET 11 | 12 | 13 | class TestAlgebraOnnxDoc(unittest.TestCase): 14 | def setUp(self): 15 | self._algebra = dynamic_class_creation() 16 | 17 | def predict_with_onnxruntime(self, model_def, *inputs): 18 | import onnxruntime as ort 19 | 20 | sess = ort.InferenceSession( 21 | model_def.SerializeToString(), providers=["CPUExecutionProvider"] 22 | ) 23 | names = [i.name for i in sess.get_inputs()] 24 | input = {name: input for name, input in zip(names, inputs)} 25 | res = sess.run(None, input) 26 | names = [o.name for o in sess.get_outputs()] 27 | return {name: output for name, output in zip(names, res)} 28 | 29 | @unittest.skipIf(TARGET_OPSET < 10, reason="not available") 30 | def test_transpose2(self): 31 | from skl2onnx.algebra.onnx_ops import OnnxTranspose 32 | 33 | node = OnnxTranspose( 34 | OnnxTranspose("X", perm=[1, 0, 2], op_version=TARGET_OPSET), 35 | perm=[1, 0, 2], 36 | output_names=["Y"], 37 | op_version=TARGET_OPSET, 38 | ) 39 | X = np.arange(2 * 3 * 4).reshape((2, 3, 4)).astype(np.float32) 40 | 41 | model_def = node.to_onnx({"X": X}) 42 | onnx.checker.check_model(model_def) 43 | res = self.predict_with_onnxruntime(model_def, X) 44 | assert_almost_equal(res["Y"], X) 45 | 46 | @unittest.skipIf( 47 | sys.platform.startswith("win"), reason="onnx schema are incorrect on Windows" 48 | ) 49 | @unittest.skipIf(TARGET_OPSET <= 20, reason="not available") 50 | def test_doc_sklearn(self): 51 | rst = get_rst_doc_sklearn() 52 | assert ( 53 | ".. _l-sklops-OnnxSklearnBernoulliNB:" in rst 54 | ), f"Unable to find a substring in {rst}" 55 | 56 | 57 | if __name__ == "__main__": 58 | unittest.main(verbosity=2) 59 | -------------------------------------------------------------------------------- /tests/test_algebra_onnx_operators_opset.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import unittest 4 | import numpy as np 5 | from numpy.testing import assert_almost_equal 6 | import onnx 7 | import onnxruntime as ort 8 | from skl2onnx.algebra.onnx_ops import OnnxPad 9 | 10 | 11 | class TestOnnxOperatorsOpset(unittest.TestCase): 12 | @unittest.skipIf(onnx.defs.onnx_opset_version() < 10, "irrelevant") 13 | def test_pad_opset_10(self): 14 | pad = OnnxPad( 15 | "X", 16 | output_names=["Y"], 17 | mode="constant", 18 | value=1.5, 19 | pads=[0, 1, 0, 1], 20 | op_version=2, 21 | ) 22 | 23 | X = np.array([[0, 1]], dtype=np.float32) 24 | model_def = pad.to_onnx({"X": X}, target_opset=10) 25 | onnx.checker.check_model(model_def) 26 | 27 | def predict_with_onnxruntime(model_def, *inputs): 28 | sess = ort.InferenceSession( 29 | model_def.SerializeToString(), providers=["CPUExecutionProvider"] 30 | ) 31 | names = [i.name for i in sess.get_inputs()] 32 | dinputs = {name: input for name, input in zip(names, inputs)} 33 | res = sess.run(None, dinputs) 34 | names = [o.name for o in sess.get_outputs()] 35 | return {name: output for name, output in zip(names, res)} 36 | 37 | Y = predict_with_onnxruntime(model_def, X) 38 | assert_almost_equal(np.array([[1.5, 0.0, 1.0, 1.5]], dtype=np.float32), Y["Y"]) 39 | 40 | 41 | if __name__ == "__main__": 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /tests/test_issues_2025.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import unittest 3 | from sklearn.utils._testing import ignore_warnings 4 | from sklearn.exceptions import ConvergenceWarning 5 | 6 | 7 | class TestInvestigate2025(unittest.TestCase): 8 | @ignore_warnings(category=(ConvergenceWarning, FutureWarning)) 9 | def test_issue_1161_gaussian(self): 10 | # https://github.com/onnx/sklearn-onnx/issues/1161 11 | import numpy as np 12 | from sklearn.gaussian_process import GaussianProcessRegressor 13 | from sklearn.gaussian_process.kernels import WhiteKernel 14 | from skl2onnx import convert_sklearn 15 | from skl2onnx.common.data_types import FloatTensorType 16 | 17 | # Generate sample data 18 | X = np.array([[1], [3], [5], [6], [7], [8], [10], [12], [14], [15]]) 19 | y = np.array([3, 2, 7, 8, 7, 6, 9, 11, 10, 12]) 20 | 21 | # Define the kernel 22 | kernel = WhiteKernel() 23 | 24 | # Create and train the Gaussian Process Regressor 25 | gpr = GaussianProcessRegressor( 26 | kernel=kernel, n_restarts_optimizer=10, alpha=1e-2 27 | ) 28 | gpr.fit(X, y) 29 | 30 | # Convert the trained model to ONNX format 31 | initial_type = [("float_input", FloatTensorType([None, 1]))] 32 | onnx_model = convert_sklearn( 33 | gpr, 34 | initial_types=initial_type, 35 | options={GaussianProcessRegressor: {"return_std": True}}, 36 | ) 37 | self.assertTrue(onnx_model is not None) 38 | 39 | 40 | if __name__ == "__main__": 41 | unittest.main(verbosity=2) 42 | -------------------------------------------------------------------------------- /tests/test_onnx_rare_helper.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tests on functions in *onnx_helper*. 5 | """ 6 | 7 | import unittest 8 | from sklearn.datasets import load_iris 9 | from sklearn.cluster import KMeans 10 | from sklearn.neighbors import NearestNeighbors 11 | from onnx.defs import onnx_opset_version 12 | from skl2onnx import convert_sklearn 13 | from skl2onnx.common.data_types import FloatTensorType 14 | from skl2onnx.helpers.onnx_rare_helper import upgrade_opset_number 15 | from test_utils import TARGET_OPSET 16 | 17 | 18 | class TestOnnxRareHelper(unittest.TestCase): 19 | def test_kmeans_upgrade(self): 20 | data = load_iris() 21 | X = data.data 22 | model = KMeans(n_clusters=3) 23 | model.fit(X) 24 | model_onnx = convert_sklearn( 25 | model, "kmeans", [("input", FloatTensorType([None, 4]))], target_opset=7 26 | ) 27 | model8 = upgrade_opset_number(model_onnx, 8) 28 | assert "version: 8" in str(model8) 29 | 30 | @unittest.skipIf(onnx_opset_version() < 11, reason="Needs opset >= 11") 31 | def test_knn_upgrade(self): 32 | iris = load_iris() 33 | X, _ = iris.data, iris.target 34 | 35 | clr = NearestNeighbors(n_neighbors=3, radius=None) 36 | clr.fit(X) 37 | 38 | model_onnx = convert_sklearn( 39 | clr, "up", [("input", FloatTensorType([None, 4]))], target_opset=9 40 | ) 41 | try: 42 | upgrade_opset_number(model_onnx, 8) 43 | raise AssertionError() 44 | except RuntimeError: 45 | pass 46 | try: 47 | upgrade_opset_number(model_onnx, TARGET_OPSET) 48 | except RuntimeError as e: 49 | assert "was updated" in str(e) 50 | 51 | 52 | if __name__ == "__main__": 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /tests/test_raw_name.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy 4 | import onnxruntime as rt 5 | from numpy.testing import assert_almost_equal 6 | from skl2onnx import convert_sklearn 7 | from skl2onnx.common.data_types import FloatTensorType 8 | from sklearn.datasets import load_iris 9 | from sklearn.linear_model import LogisticRegression 10 | from test_utils import TARGET_OPSET 11 | 12 | 13 | class RawNameTest(unittest.TestCase): 14 | _raw_names = ( 15 | "float_input", 16 | "float_input--", 17 | "float_input(", 18 | "float_input)", 19 | ) 20 | 21 | @staticmethod 22 | def _load_data(): 23 | iris = load_iris() 24 | return iris.data[:, :2], iris.target 25 | 26 | @staticmethod 27 | def _train_model(X, y): 28 | return LogisticRegression().fit(X, y) 29 | 30 | @staticmethod 31 | def _get_initial_types(X, raw_name): 32 | return [(raw_name, FloatTensorType([None, X.shape[1]]))] 33 | 34 | @staticmethod 35 | def _predict(clr_onnx, X): 36 | sess = rt.InferenceSession( 37 | clr_onnx.SerializeToString(), providers=["CPUExecutionProvider"] 38 | ) 39 | input_name = sess.get_inputs()[0].name 40 | label_name = sess.get_outputs()[0].name 41 | return sess.run([label_name], {input_name: X.astype(numpy.float32)})[0] 42 | 43 | def test_raw_name(self): 44 | """ 45 | Assert that input raw names do not break the compilation 46 | of the graph and that the ONNX model still produces 47 | correct predictions. 48 | """ 49 | X, y = self._load_data() 50 | clr = self._train_model(X, y) 51 | pred = clr.predict(X) 52 | for raw_name in self._raw_names: 53 | with self.subTest(raw_name=raw_name): 54 | clr_onnx = convert_sklearn( 55 | clr, 56 | initial_types=self._get_initial_types(X, raw_name), 57 | target_opset=TARGET_OPSET, 58 | ) 59 | pred_onnx = self._predict(clr_onnx, X) 60 | assert_almost_equal(pred, pred_onnx) 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /tests/test_scikit_pandas.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tests scikit-learn's binarizer converter. 5 | """ 6 | 7 | import unittest 8 | import pandas 9 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 10 | 11 | from skl2onnx.common.data_types import FloatTensorType 12 | from skl2onnx import convert_sklearn 13 | 14 | 15 | def has_scikit_pandas(): 16 | try: 17 | import sklearn_pandas # noqa: F401 18 | 19 | return True 20 | except ImportError: 21 | return False 22 | 23 | 24 | def dataframe_mapper_shape_calculator(operator): 25 | if len(operator.inputs) == 1: 26 | raise RuntimeError("DataFrameMapper has no associated parser.") 27 | 28 | 29 | class TestOtherLibrariesInPipelineScikitPandas(unittest.TestCase): 30 | @unittest.skipIf(not has_scikit_pandas(), reason="scikit-pandas not installed") 31 | def test_scikit_pandas(self): 32 | from sklearn_pandas import DataFrameMapper 33 | 34 | df = pandas.DataFrame( 35 | { 36 | "feat1": [1, 2, 3, 4, 5, 6], 37 | "feat2": [1.0, 2.0, 3.0, 2.0, 3.0, 4.0], 38 | } 39 | ) 40 | 41 | mapper = DataFrameMapper( 42 | [ 43 | (["feat1", "feat2"], StandardScaler()), 44 | (["feat1", "feat2"], MinMaxScaler()), 45 | ] 46 | ) 47 | 48 | try: 49 | convert_sklearn( 50 | mapper, 51 | "predictable_tsne", 52 | [("input", FloatTensorType([None, df.shape[1]]))], 53 | custom_shape_calculators={ 54 | DataFrameMapper: dataframe_mapper_shape_calculator 55 | }, 56 | ) 57 | except RuntimeError as e: 58 | assert "DataFrameMapper has no associated parser." in str(e) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /tests/test_sklearn_binarizer_converter.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tests scikit-learn's binarizer converter. 5 | """ 6 | 7 | import unittest 8 | import numpy as np 9 | from sklearn.preprocessing import Binarizer 10 | from skl2onnx import convert_sklearn 11 | from skl2onnx.common.data_types import FloatTensorType 12 | from test_utils import dump_data_and_model, TARGET_OPSET 13 | 14 | 15 | class TestSklearnBinarizer(unittest.TestCase): 16 | def test_model_binarizer(self): 17 | data = np.array( 18 | [[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]], dtype=np.float32 19 | ) 20 | model = Binarizer(threshold=0.5) 21 | model.fit(data) 22 | model_onnx = convert_sklearn( 23 | model, 24 | "scikit-learn binarizer", 25 | [("input", FloatTensorType(data.shape))], 26 | target_opset=TARGET_OPSET, 27 | ) 28 | self.assertTrue(model_onnx is not None) 29 | dump_data_and_model( 30 | data, model, model_onnx, basename="SklearnBinarizer-SkipDim1" 31 | ) 32 | 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /tests/test_sklearn_classifiers_extreme.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import unittest 4 | import numpy as np 5 | 6 | try: 7 | from onnx.reference import ReferenceEvaluator 8 | except ImportError: 9 | ReferenceEvaluator = None 10 | from sklearn.tree import DecisionTreeClassifier 11 | from onnxruntime import InferenceSession 12 | from skl2onnx import to_onnx 13 | from test_utils import TARGET_OPSET 14 | 15 | 16 | class TestSklearnClassifiersExtreme(unittest.TestCase): 17 | def test_one_training_class(self): 18 | x = np.eye(4, dtype=np.float32) 19 | y = np.array([5, 5, 5, 5], dtype=np.int64) 20 | 21 | cl = DecisionTreeClassifier() 22 | cl = cl.fit(x, y) 23 | 24 | expected = [cl.predict(x), cl.predict_proba(x)] 25 | onx = to_onnx(cl, x, target_opset=TARGET_OPSET, options={"zipmap": False}) 26 | 27 | for cls in [ 28 | ( 29 | (lambda onx: ReferenceEvaluator(onx, verbose=0)) 30 | if ReferenceEvaluator is not None 31 | else None 32 | ), 33 | lambda onx: InferenceSession( 34 | onx.SerializeToString(), providers=["CPUExecutionProvider"] 35 | ), 36 | ]: 37 | if cls is None: 38 | continue 39 | sess = cls(onx) 40 | res = sess.run(None, {"X": x}) 41 | self.assertEqual(len(res), len(expected)) 42 | for e, g in zip(expected, res): 43 | self.assertEqual(e.tolist(), g.tolist()) 44 | 45 | 46 | if __name__ == "__main__": 47 | unittest.main(verbosity=2) 48 | -------------------------------------------------------------------------------- /tests/test_sklearn_constant_predictor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """Tests scikit-learn's SGDClassifier converter.""" 4 | 5 | import unittest 6 | import numpy as np 7 | from sklearn.multiclass import _ConstantPredictor 8 | from onnxruntime import __version__ as ort_version 9 | from skl2onnx import to_onnx 10 | 11 | from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType 12 | 13 | from test_utils import dump_data_and_model, TARGET_OPSET 14 | 15 | ort_version = ".".join(ort_version.split(".")[:2]) 16 | 17 | 18 | class TestConstantPredictorConverter(unittest.TestCase): 19 | def test_constant_predictor_float(self): 20 | model = _ConstantPredictor() 21 | X = np.array([[1, 2]]) 22 | y = np.array([0]) 23 | model.fit(X, y) 24 | test_x = np.array([[1, 0], [2, 8]]) 25 | 26 | model_onnx = to_onnx( 27 | model, 28 | "scikit-learn ConstantPredictor", 29 | initial_types=[("input", FloatTensorType([None, X.shape[1]]))], 30 | target_opset=TARGET_OPSET, 31 | options={"zipmap": False}, 32 | ) 33 | 34 | self.assertIsNotNone(model_onnx is not None) 35 | dump_data_and_model( 36 | test_x.astype(np.float32), 37 | model, 38 | model_onnx, 39 | basename="SklearnConstantPredictorFloat", 40 | ) 41 | 42 | def test_constant_predictor_double(self): 43 | model = _ConstantPredictor() 44 | X = np.array([[1, 2]]) 45 | y = np.array([0]) 46 | model.fit(X, y) 47 | test_x = np.array([[1, 0], [2, 8]]) 48 | 49 | model_onnx = to_onnx( 50 | model, 51 | "scikit-learn ConstantPredictor", 52 | initial_types=[("input", DoubleTensorType([None, X.shape[1]]))], 53 | target_opset=TARGET_OPSET, 54 | options={"zipmap": False}, 55 | ) 56 | 57 | self.assertIsNotNone(model_onnx is not None) 58 | dump_data_and_model( 59 | test_x.astype(np.float64), 60 | model, 61 | model_onnx, 62 | basename="SklearnConstantPredictorDouble", 63 | ) 64 | 65 | 66 | if __name__ == "__main__": 67 | unittest.main(verbosity=3) 68 | -------------------------------------------------------------------------------- /tests/test_sklearn_quantile_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tests scikit-learn's polynomial features converter. 5 | """ 6 | import unittest 7 | import numpy as np 8 | from sklearn.preprocessing import QuantileTransformer 9 | from skl2onnx import convert_sklearn 10 | from skl2onnx.common.data_types import FloatTensorType 11 | from test_utils import dump_data_and_model, TARGET_OPSET 12 | 13 | 14 | class TestSklearnQuantileTransformer(unittest.TestCase): 15 | def test_quantile_transformer_simple(self): 16 | X = np.empty((100, 2), dtype=np.float32) 17 | X[:, 0] = np.arange(X.shape[0]) 18 | X[:, 1] = np.arange(X.shape[0]) * 2 19 | model = QuantileTransformer(n_quantiles=6).fit(X) 20 | model_onnx = convert_sklearn( 21 | model, 22 | "test", 23 | [("input", FloatTensorType([None, X.shape[1]]))], 24 | target_opset=TARGET_OPSET, 25 | ) 26 | dump_data_and_model( 27 | X.astype(np.float32), 28 | model, 29 | model_onnx, 30 | basename="SklearnQuantileTransformerSimple", 31 | ) 32 | 33 | def test_quantile_transformer_int(self): 34 | X = np.random.randint(0, 5, (100, 20)) 35 | model = QuantileTransformer(n_quantiles=6).fit(X) 36 | model_onnx = convert_sklearn( 37 | model, 38 | "test", 39 | [("input", FloatTensorType([None, X.shape[1]]))], 40 | target_opset=TARGET_OPSET, 41 | ) 42 | dump_data_and_model( 43 | X.astype(np.float32), 44 | model, 45 | model_onnx, 46 | basename="SklearnQuantileTransformerInt", 47 | ) 48 | 49 | def test_quantile_transformer_nan(self): 50 | X = np.random.randint(0, 5, (100, 20)) 51 | X = X.astype(np.float32) 52 | X[0][0] = np.nan 53 | X[1][1] = np.nan 54 | model = QuantileTransformer(n_quantiles=6).fit(X) 55 | model_onnx = convert_sklearn( 56 | model, 57 | "test", 58 | [("input", FloatTensorType([None, X.shape[1]]))], 59 | target_opset=TARGET_OPSET, 60 | ) 61 | dump_data_and_model( 62 | X.astype(np.float32), 63 | model, 64 | model_onnx, 65 | basename="SklearnQuantileTransformerNan", 66 | ) 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /tests/test_sklearn_random_projection.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import unittest 5 | import packaging.version as pv 6 | import numpy as np 7 | import onnxruntime 8 | from sklearn.random_projection import GaussianRandomProjection 9 | from skl2onnx import convert_sklearn, to_onnx 10 | from skl2onnx.common.data_types import FloatTensorType 11 | from test_utils import dump_data_and_model, TARGET_OPSET 12 | 13 | nort = pv.Version(onnxruntime.__version__) < pv.Version("0.5.0") 14 | 15 | 16 | class TestSklearnRandomProjection(unittest.TestCase): 17 | @unittest.skipIf(TARGET_OPSET < 9 or nort, reason="MatMul not available") 18 | def test_gaussian_random_projection_float32(self): 19 | rng = np.random.RandomState(42) 20 | pt = GaussianRandomProjection(n_components=4) 21 | X = rng.rand(10, 5) 22 | model = pt.fit(X) 23 | assert model.transform(X).shape[1] == 4 24 | model_onnx = convert_sklearn( 25 | model, 26 | "scikit-learn GaussianRandomProjection", 27 | [("inputs", FloatTensorType([None, X.shape[1]]))], 28 | target_opset=TARGET_OPSET, 29 | ) 30 | self.assertIsNotNone(model_onnx) 31 | dump_data_and_model( 32 | X.astype(np.float32), model, model_onnx, basename="GaussianRandomProjection" 33 | ) 34 | 35 | @unittest.skipIf(TARGET_OPSET < 9 or nort, reason="MatMul not available") 36 | def test_gaussian_random_projection_float64(self): 37 | rng = np.random.RandomState(42) 38 | pt = GaussianRandomProjection(n_components=4) 39 | X = rng.rand(10, 5).astype(np.float64) 40 | model = pt.fit(X) 41 | model_onnx = to_onnx(model, X[:1], target_opset=TARGET_OPSET) 42 | self.assertIsNotNone(model_onnx) 43 | dump_data_and_model(X, model, model_onnx, basename="GaussianRandomProjection64") 44 | 45 | 46 | if __name__ == "__main__": 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /tests/test_sklearn_random_trees_embedding.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import unittest 4 | import numpy 5 | from onnxruntime import InferenceSession 6 | 7 | try: 8 | # scikit-learn >= 0.22 9 | from sklearn.utils._testing import ignore_warnings 10 | except ImportError: 11 | # scikit-learn < 0.22 12 | from sklearn.utils.testing import ignore_warnings 13 | from sklearn.exceptions import ConvergenceWarning 14 | from sklearn.datasets import make_regression 15 | from sklearn.ensemble import RandomTreesEmbedding 16 | from skl2onnx import to_onnx 17 | from test_utils import TARGET_OPSET, dump_data_and_model 18 | 19 | 20 | class TestSklearnRandomTreeEmbeddings(unittest.TestCase): 21 | def check_model(self, model, X, name="X"): 22 | try: 23 | sess = InferenceSession( 24 | model.SerializeToString(), providers=["CPUExecutionProvider"] 25 | ) 26 | except Exception as e: 27 | raise AssertionError("Unable to load model\n%s" % str(model)) from e 28 | try: 29 | return sess.run(None, {name: X[:7]}) 30 | except Exception as e: 31 | raise AssertionError( 32 | "Unable to run model X.shape=%r X.dtype=%r\n%s" 33 | % (X[:7].shape, X.dtype, str(model)) 34 | ) from e 35 | 36 | @ignore_warnings(category=(FutureWarning, ConvergenceWarning, DeprecationWarning)) 37 | def test_random_trees_embedding(self): 38 | X, _ = make_regression( 39 | n_features=5, n_samples=100, n_targets=1, random_state=42, n_informative=3 40 | ) 41 | X = X.astype(numpy.float32) 42 | 43 | model = RandomTreesEmbedding( 44 | n_estimators=3, max_depth=2, sparse_output=False 45 | ).fit(X) 46 | model.transform(X) 47 | model_onnx = to_onnx(model, X[:1], target_opset=TARGET_OPSET) 48 | with open("model.onnx", "wb") as f: 49 | f.write(model_onnx.SerializeToString()) 50 | self.check_model(model_onnx, X) 51 | dump_data_and_model( 52 | X.astype(numpy.float32), 53 | model, 54 | model_onnx, 55 | basename="SklearnRandomTreesEmbedding", 56 | ) 57 | 58 | 59 | if __name__ == "__main__": 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /tests/test_sklearn_replace_transformer.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tests scikit-learn's cast transformer converter. 5 | """ 6 | 7 | import unittest 8 | import numpy 9 | from sklearn.pipeline import Pipeline 10 | 11 | try: 12 | from sklearn.compose import ColumnTransformer 13 | except ImportError: 14 | ColumnTransformer = None 15 | from skl2onnx.sklapi import ReplaceTransformer 16 | from skl2onnx import convert_sklearn 17 | from skl2onnx.common.data_types import FloatTensorType 18 | from test_utils import dump_data_and_model, TARGET_OPSET 19 | 20 | 21 | class TestSklearnCastTransformerConverter(unittest.TestCase): 22 | def common_test_replace_transformer(self, dtype, input_type): 23 | model = Pipeline( 24 | [ 25 | ("replace", ReplaceTransformer(dtype=numpy.float32)), 26 | ] 27 | ) 28 | data = numpy.array( 29 | [[0.1, 0.2, 3.1], [1, 1, 0], [0, 2, 1], [1, 0, 2]], dtype=numpy.float32 30 | ) 31 | model.fit(data) 32 | pred = model.steps[0][1].transform(data) 33 | assert pred.dtype == dtype 34 | model_onnx = convert_sklearn( 35 | model, 36 | "cast", 37 | [("input", FloatTensorType([None, 3]))], 38 | target_opset=TARGET_OPSET, 39 | ) 40 | self.assertTrue(model_onnx is not None) 41 | dump_data_and_model( 42 | data, 43 | model, 44 | model_onnx, 45 | basename="SklearnCastTransformer{}".format(input_type.__class__.__name__), 46 | ) 47 | 48 | @unittest.skipIf(TARGET_OPSET < 11, reason="not supported") 49 | def test_replace_transformer(self): 50 | self.common_test_replace_transformer(numpy.float32, FloatTensorType) 51 | 52 | 53 | if __name__ == "__main__": 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/test_sklearn_sgd_oneclass_svm_converter.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """Tests scikit-learn's SGDClassifier converter.""" 4 | 5 | import unittest 6 | import numpy as np 7 | 8 | try: 9 | from sklearn.linear_model import SGDOneClassSVM 10 | except ImportError: 11 | SGDOneClassSVM = None 12 | from onnxruntime import __version__ as ort_version 13 | from skl2onnx import convert_sklearn 14 | 15 | from skl2onnx.common.data_types import ( 16 | FloatTensorType, 17 | ) 18 | 19 | from test_utils import dump_data_and_model, TARGET_OPSET 20 | 21 | ort_version = ".".join(ort_version.split(".")[:2]) 22 | 23 | 24 | class TestSGDOneClassSVMConverter(unittest.TestCase): 25 | @unittest.skipIf(SGDOneClassSVM is None, reason="scikit-learn<1.0") 26 | def test_model_sgd_oneclass_svm(self): 27 | X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) 28 | model = SGDOneClassSVM(random_state=42) 29 | model.fit(X) 30 | test_x = np.array([[0, 0], [-1, -1], [1, 1]]).astype(np.float32) 31 | model.predict(test_x) 32 | 33 | model_onnx = convert_sklearn( 34 | model, 35 | "scikit-learn SGD OneClass SVM", 36 | [("input", FloatTensorType([None, X.shape[1]]))], 37 | target_opset=TARGET_OPSET, 38 | ) 39 | 40 | self.assertIsNotNone(model_onnx) 41 | dump_data_and_model( 42 | test_x.astype(np.float32), 43 | model, 44 | model_onnx, 45 | basename="SklearnSGDOneClassSVMBinaryHinge", 46 | ) 47 | 48 | 49 | if __name__ == "__main__": 50 | unittest.main(verbosity=3) 51 | -------------------------------------------------------------------------------- /tests/test_sklearn_tfidf_transformer_converter.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | # coding: utf-8 4 | """ 5 | Tests scikit-learn's TfidfTransformer converter. 6 | """ 7 | 8 | import unittest 9 | import numpy 10 | from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer 11 | from skl2onnx import convert_sklearn 12 | from skl2onnx.common.data_types import FloatTensorType 13 | from test_utils import dump_data_and_model, TARGET_OPSET 14 | 15 | 16 | class TestSklearnTfidfTransformerConverter(unittest.TestCase): 17 | def test_model_tfidf_transform(self): 18 | corpus = numpy.array( 19 | [ 20 | "This is the first document.", 21 | "This document is the second document.", 22 | "And this is the third one.", 23 | "Is this the first document?", 24 | "Troisième document en français", 25 | ] 26 | ).reshape((5, 1)) 27 | data = ( 28 | CountVectorizer(ngram_range=(1, 1)).fit_transform(corpus.ravel()).todense() 29 | ) 30 | data = numpy.array(data.astype(numpy.float32)) 31 | 32 | for sublinear_tf in (False, True): 33 | if sublinear_tf: 34 | # scikit-learn applies a log on a matrix 35 | # but only on strictly positive coefficients 36 | break 37 | for norm in (None, "l1", "l2"): 38 | for smooth_idf in (False, True): 39 | for use_idf in (False, True): 40 | model = TfidfTransformer( 41 | norm=norm, 42 | use_idf=use_idf, 43 | smooth_idf=smooth_idf, 44 | sublinear_tf=sublinear_tf, 45 | ) 46 | model.fit(data) 47 | model_onnx = convert_sklearn( 48 | model, 49 | "TfidfTransformer", 50 | [("input", FloatTensorType([None, data.shape[1]]))], 51 | target_opset=TARGET_OPSET, 52 | ) 53 | self.assertTrue(model_onnx is not None) 54 | suffix = norm.upper() if norm else "" 55 | suffix += "Sub" if sublinear_tf else "" 56 | suffix += "Idf" if use_idf else "" 57 | suffix += "Smooth" if smooth_idf else "" 58 | dump_data_and_model( 59 | data, 60 | model, 61 | model_onnx, 62 | basename="SklearnTfidfTransform" + suffix, 63 | ) 64 | 65 | 66 | if __name__ == "__main__": 67 | unittest.main() 68 | -------------------------------------------------------------------------------- /tests/test_sklearn_tfidf_transformer_converter_sparse.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | """ 3 | Tests examples from scikit-learn's documentation. 4 | """ 5 | 6 | import packaging.version as pv 7 | import unittest 8 | import urllib.error 9 | import sys 10 | import onnx 11 | from sklearn.datasets import fetch_20newsgroups 12 | from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer 13 | from sklearn.pipeline import Pipeline 14 | import onnxruntime as ort 15 | from skl2onnx.common.data_types import StringTensorType 16 | from skl2onnx import convert_sklearn 17 | from test_utils import dump_data_and_model, TARGET_OPSET 18 | 19 | BACKEND = ( 20 | "onnxruntime" 21 | if pv.Version(onnx.__version__) < pv.Version("1.16.0") 22 | else "onnx;onnxruntime" 23 | ) 24 | 25 | 26 | class TestSklearnTfidfVectorizerSparse(unittest.TestCase): 27 | @unittest.skipIf( 28 | TARGET_OPSET < 9, 29 | # issue with encoding 30 | reason="https://github.com/onnx/onnx/pull/1734", 31 | ) 32 | @unittest.skipIf(TARGET_OPSET < 18, reason="too long") 33 | @unittest.skipIf( 34 | pv.Version(ort.__version__) <= pv.Version("0.2.1"), 35 | reason="sparse not supported", 36 | ) 37 | @unittest.skipIf(sys.platform != "linux", reason="too long") 38 | def test_model_tfidf_transform_bug(self): 39 | categories = [ 40 | "alt.atheism", 41 | "soc.religion.christian", 42 | "comp.graphics", 43 | "sci.med", 44 | ] 45 | try: 46 | twenty_train = fetch_20newsgroups( 47 | subset="train", categories=categories, shuffle=True, random_state=0 48 | ) 49 | except urllib.error.HTTPError as e: 50 | raise unittest.SkipTest(f"HTTP fails due to {e}") 51 | text_clf = Pipeline( 52 | [("vect", CountVectorizer()), ("tfidf", TfidfTransformer())] 53 | ) 54 | twenty_train.data[0] = "bruît " + twenty_train.data[0] 55 | text_clf.fit(twenty_train.data, twenty_train.target) 56 | model_onnx = convert_sklearn( 57 | text_clf, 58 | name="DocClassifierCV-Tfidf", 59 | initial_types=[("input", StringTensorType([5]))], 60 | target_opset=TARGET_OPSET, 61 | ) 62 | dump_data_and_model( 63 | twenty_train.data[5:10], 64 | text_clf, 65 | model_onnx, 66 | basename="SklearnPipelineTfidfTransformer", 67 | backend=BACKEND, 68 | ) 69 | 70 | 71 | if __name__ == "__main__": 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /tests/test_sklearn_tfidf_vectorizer_converter_dataset.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """ 4 | Tests scikit-learn's tfidf converter using downloaded data. 5 | """ 6 | 7 | import unittest 8 | import urllib.error 9 | import packaging.version as pv 10 | import numpy as np 11 | import onnx 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.feature_extraction.text import TfidfVectorizer 14 | from sklearn.datasets import fetch_20newsgroups 15 | from skl2onnx import convert_sklearn 16 | from skl2onnx.common.data_types import StringTensorType 17 | from test_utils import dump_data_and_model, TARGET_OPSET 18 | 19 | BACKEND = ( 20 | "onnxruntime" 21 | if pv.Version(onnx.__version__) < pv.Version("1.16.0") 22 | else "onnx;onnxruntime" 23 | ) 24 | 25 | 26 | class TestSklearnTfidfVectorizerDataSet(unittest.TestCase): 27 | @unittest.skipIf(TARGET_OPSET < 9, reason="not available") 28 | @unittest.skipIf(TARGET_OPSET < 18, reason="too long") 29 | def test_tfidf_20newsgroups(self): 30 | try: 31 | data = fetch_20newsgroups() 32 | except urllib.error.HTTPError as e: 33 | raise unittest.SkipTest(f"HTTP fails due to {e}") 34 | X, y = np.array(data.data)[:100], np.array(data.target)[:100] 35 | X_train, X_test, y_train, y_test = train_test_split( 36 | X, y, test_size=0.5, random_state=42 37 | ) 38 | 39 | model = TfidfVectorizer().fit(X_train) 40 | onnx_model = convert_sklearn( 41 | model, 42 | "cv", 43 | [("input", StringTensorType(X_test.shape))], 44 | target_opset=TARGET_OPSET, 45 | ) 46 | dump_data_and_model( 47 | X_test, 48 | model, 49 | onnx_model, 50 | basename="SklearnTfidfVectorizer20newsgroups", 51 | backend=BACKEND, 52 | ) 53 | 54 | @unittest.skipIf(TARGET_OPSET < 9, reason="not available") 55 | @unittest.skipIf(TARGET_OPSET < 18, reason="too long") 56 | def test_tfidf_20newsgroups_nolowercase(self): 57 | try: 58 | data = fetch_20newsgroups() 59 | except urllib.error.HTTPError as e: 60 | raise unittest.SkipTest(f"HTTP fails due to {e}") 61 | X, y = np.array(data.data)[:100], np.array(data.target)[:100] 62 | X_train, X_test, y_train, y_test = train_test_split( 63 | X, y, test_size=0.5, random_state=42 64 | ) 65 | 66 | model = TfidfVectorizer(lowercase=False).fit(X_train) 67 | onnx_model = convert_sklearn( 68 | model, 69 | "cv", 70 | [("input", StringTensorType(X_test.shape))], 71 | target_opset=TARGET_OPSET, 72 | ) 73 | dump_data_and_model( 74 | X_test, 75 | model, 76 | onnx_model, 77 | basename="SklearnTfidfVectorizer20newsgroupsNOLower", 78 | backend=BACKEND, 79 | ) 80 | 81 | 82 | if __name__ == "__main__": 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /tests/test_sklearn_truncated_svd.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from sklearn.decomposition import TruncatedSVD 8 | 9 | from skl2onnx.common.data_types import FloatTensorType, Int64TensorType 10 | from skl2onnx import convert_sklearn 11 | from test_utils import create_tensor 12 | from test_utils import dump_data_and_model, TARGET_OPSET 13 | 14 | 15 | class TestTruncatedSVD(unittest.TestCase): 16 | def setUp(self): 17 | np.random.seed(0) 18 | 19 | def test_truncated_svd(self): 20 | N, C, K = 2, 3, 2 21 | x = create_tensor(N, C) 22 | 23 | svd = TruncatedSVD(n_components=K) 24 | svd.fit(x) 25 | model_onnx = convert_sklearn( 26 | svd, 27 | initial_types=[("input", FloatTensorType(shape=[None, C]))], 28 | target_opset=TARGET_OPSET, 29 | ) 30 | self.assertTrue(model_onnx is not None) 31 | dump_data_and_model(x, svd, model_onnx, basename="SklearnTruncatedSVD") 32 | 33 | def test_truncated_svd_arpack(self): 34 | X = create_tensor(10, 10) 35 | svd = TruncatedSVD( 36 | n_components=5, algorithm="arpack", n_iter=10, tol=0.1, random_state=42 37 | ).fit(X) 38 | model_onnx = convert_sklearn( 39 | svd, 40 | initial_types=[("input", FloatTensorType(shape=X.shape))], 41 | target_opset=TARGET_OPSET, 42 | ) 43 | self.assertTrue(model_onnx is not None) 44 | dump_data_and_model(X, svd, model_onnx, basename="SklearnTruncatedSVDArpack") 45 | 46 | def test_truncated_svd_int(self): 47 | X = create_tensor(5, 5).astype(np.int64) 48 | svd = TruncatedSVD(n_iter=20, random_state=42).fit(X) 49 | model_onnx = convert_sklearn( 50 | svd, 51 | initial_types=[("input", Int64TensorType([None, X.shape[1]]))], 52 | target_opset=TARGET_OPSET, 53 | ) 54 | self.assertTrue(model_onnx is not None) 55 | dump_data_and_model(X, svd, model_onnx, basename="SklearnTruncatedSVDInt") 56 | 57 | 58 | if __name__ == "__main__": 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /tests/test_sklearn_tuned_threshold_classifier.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import unittest 4 | import numpy as np 5 | from sklearn.datasets import make_classification 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.utils._testing import ignore_warnings 9 | from skl2onnx import to_onnx 10 | from skl2onnx.common.data_types import FloatTensorType 11 | from test_utils import dump_data_and_model, TARGET_OPSET 12 | 13 | 14 | def has_tuned_theshold_classifier(): 15 | try: 16 | from sklearn.model_selection import TunedThresholdClassifierCV # noqa: F401 17 | except ImportError: 18 | return False 19 | return True 20 | 21 | 22 | class TestSklearnTunedThresholdClassifierConverter(unittest.TestCase): 23 | @unittest.skipIf( 24 | not has_tuned_theshold_classifier(), 25 | reason="TunedThresholdClassifierCV not available", 26 | ) 27 | @ignore_warnings(category=FutureWarning) 28 | def test_tuned_threshold_classifier(self): 29 | from sklearn.model_selection import TunedThresholdClassifierCV 30 | 31 | X, y = make_classification( 32 | n_samples=1_000, weights=[0.9, 0.1], class_sep=0.8, random_state=42 33 | ) 34 | X_train, X_test, y_train, y_test = train_test_split( 35 | X, y, stratify=y, random_state=42 36 | ) 37 | classifier = RandomForestClassifier(random_state=0) 38 | 39 | classifier_tuned = TunedThresholdClassifierCV( 40 | classifier, scoring="balanced_accuracy" 41 | ).fit(X_train, y_train) 42 | 43 | model_onnx = to_onnx( 44 | classifier_tuned, 45 | initial_types=[("X", FloatTensorType([None, X_train.shape[1]]))], 46 | target_opset=TARGET_OPSET - 1, 47 | options={"zipmap": False}, 48 | ) 49 | self.assertTrue(model_onnx is not None) 50 | dump_data_and_model( 51 | X_test[:10].astype(np.float32), 52 | classifier_tuned, 53 | model_onnx, 54 | basename="SklearnTunedThresholdClassifier", 55 | ) 56 | 57 | 58 | if __name__ == "__main__": 59 | unittest.main(verbosity=2) 60 | -------------------------------------------------------------------------------- /tests/test_sklearn_voting_regressor_converter.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """Tests VotingRegressor converter.""" 4 | 5 | import unittest 6 | import numpy 7 | from sklearn.linear_model import LinearRegression 8 | 9 | try: 10 | from sklearn.ensemble import VotingRegressor 11 | except ImportError: 12 | # New in 0.21 13 | VotingRegressor = None 14 | from sklearn.tree import DecisionTreeRegressor 15 | from skl2onnx import convert_sklearn 16 | from skl2onnx.common.data_types import ( 17 | BooleanTensorType, 18 | FloatTensorType, 19 | Int64TensorType, 20 | ) 21 | from test_utils import dump_data_and_model, fit_regression_model, TARGET_OPSET 22 | 23 | 24 | def model_to_test(): 25 | return VotingRegressor( 26 | [ 27 | ("lr", LinearRegression()), 28 | ("dt", DecisionTreeRegressor()), 29 | ] 30 | ) 31 | 32 | 33 | class TestVotingRegressorConverter(unittest.TestCase): 34 | @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") 35 | def test_model_voting_regression(self): 36 | model, X = fit_regression_model(model_to_test()) 37 | model_onnx = convert_sklearn( 38 | model, 39 | "voting regression", 40 | [("input", FloatTensorType([None, X.shape[1]]))], 41 | target_opset=TARGET_OPSET, 42 | ) 43 | self.assertIsNotNone(model_onnx) 44 | dump_data_and_model( 45 | X.astype(numpy.float32), 46 | model, 47 | model_onnx, 48 | basename="SklearnVotingRegressor-Dec4", 49 | comparable_outputs=[0], 50 | ) 51 | 52 | @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") 53 | def test_model_voting_regression_int(self): 54 | model, X = fit_regression_model(model_to_test(), is_int=True) 55 | model_onnx = convert_sklearn( 56 | model, 57 | "voting regression", 58 | [("input", Int64TensorType([None, X.shape[1]]))], 59 | target_opset=TARGET_OPSET, 60 | ) 61 | self.assertIsNotNone(model_onnx) 62 | dump_data_and_model( 63 | X, 64 | model, 65 | model_onnx, 66 | basename="SklearnVotingRegressorInt-Dec4", 67 | comparable_outputs=[0], 68 | ) 69 | 70 | @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") 71 | def test_model_voting_regression_bool(self): 72 | model, X = fit_regression_model(model_to_test(), is_bool=True) 73 | model_onnx = convert_sklearn( 74 | model, 75 | "voting regression", 76 | [("input", BooleanTensorType([None, X.shape[1]]))], 77 | target_opset=TARGET_OPSET, 78 | ) 79 | self.assertIsNotNone(model_onnx) 80 | dump_data_and_model( 81 | X, 82 | model, 83 | model_onnx, 84 | basename="SklearnVotingRegressorBool", 85 | comparable_outputs=[0], 86 | ) 87 | 88 | 89 | if __name__ == "__main__": 90 | unittest.main() 91 | -------------------------------------------------------------------------------- /tests/test_utils/main.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | from skl2onnx.proto import onnx_proto 5 | from skl2onnx.common import utils as convert_utils 6 | 7 | 8 | def set_model_domain(model, domain): 9 | """ 10 | Sets the domain on the ONNX model. 11 | 12 | :param model: instance of an ONNX model 13 | :param domain: string containing the domain name of the model 14 | 15 | Example: 16 | 17 | :: 18 | from test_utils import set_model_domain 19 | onnx_model = load_model("SqueezeNet.onnx") 20 | set_model_domain(onnx_model, "com.acme") 21 | """ 22 | if model is None or not isinstance(model, onnx_proto.ModelProto): 23 | raise ValueError("Parameter model is not an onnx model.") 24 | if not convert_utils.is_string_type(domain): 25 | raise ValueError("Parameter domain must be a string type.") 26 | model.domain = domain 27 | 28 | 29 | def set_model_version(model, version): 30 | """ 31 | Sets the version of the ONNX model. 32 | 33 | :param model: instance of an ONNX model 34 | :param version: integer containing the version of the model 35 | 36 | Example: 37 | 38 | :: 39 | from test_utils import set_model_version 40 | onnx_model = load_model("SqueezeNet.onnx") 41 | set_model_version(onnx_model, 1) 42 | """ 43 | if model is None or not isinstance(model, onnx_proto.ModelProto): 44 | raise ValueError("Parameter model is not an onnx model.") 45 | if not convert_utils.is_numeric_type(version): 46 | raise ValueError("Parameter version must be a numeric type.") 47 | model.model_version = version 48 | 49 | 50 | def set_model_doc_string(model, doc, override=False): 51 | """ 52 | Sets the doc string of the ONNX model. 53 | 54 | :param model: instance of an ONNX model 55 | :param doc: string containing the doc string that describes the model. 56 | :param override: bool if true will always override the doc 57 | string with the new value 58 | 59 | Example: 60 | 61 | :: 62 | from test_utils import set_model_doc_string 63 | onnx_model = load_model("SqueezeNet.onnx") 64 | set_model_doc_string(onnx_model, "Sample doc string") 65 | """ 66 | if model is None or not isinstance(model, onnx_proto.ModelProto): 67 | raise ValueError("Parameter model is not an onnx model.") 68 | if not convert_utils.is_string_type(doc): 69 | raise ValueError("Parameter doc must be a string type.") 70 | if model.doc_string and not doc and override is False: 71 | raise ValueError( 72 | "Failed to overwrite the doc string with a blank string," 73 | " set override to True if intentional." 74 | ) 75 | model.doc_string = doc 76 | -------------------------------------------------------------------------------- /tests/test_utils/reference_implementation_afe.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | """ 3 | Helpers to test runtimes. 4 | """ 5 | 6 | from onnx.defs import onnx_opset_version 7 | 8 | 9 | def _array_feature_extrator(data, indices): 10 | """ 11 | Implementation of operator *ArrayFeatureExtractor* 12 | with :epkg:`numpy`. 13 | """ 14 | if len(indices.shape) == 2 and indices.shape[0] == 1: 15 | index = indices.ravel().tolist() 16 | add = len(index) 17 | elif len(indices.shape) == 1: 18 | index = indices.tolist() 19 | add = len(index) 20 | else: 21 | add = 1 22 | for s in indices.shape: 23 | add *= s 24 | index = indices.ravel().tolist() 25 | new_shape = (1, add) if len(data.shape) == 1 else list(data.shape[:-1]) + [add] 26 | try: 27 | tem = data[..., index] 28 | except IndexError as e: 29 | raise RuntimeError(f"data.shape={data.shape}, indices={indices}") from e 30 | res = tem.reshape(new_shape) 31 | return res 32 | 33 | 34 | if onnx_opset_version() >= 18: 35 | from onnx.reference.op_run import OpRun 36 | 37 | class ArrayFeatureExtractor(OpRun): 38 | op_domain = "ai.onnx.ml" 39 | 40 | def _run(self, data, indices): 41 | """ 42 | Runtime for operator *ArrayFeatureExtractor*. 43 | 44 | .. warning:: 45 | ONNX specifications may be imprecise in some cases. 46 | When the input data is a vector (one dimension), 47 | the output has still two like a matrix with one row. 48 | The implementation follows what :epkg:`onnxruntime` does in 49 | `array_feature_extractor.cc 50 | `_. 52 | """ 53 | res = _array_feature_extrator(data, indices) 54 | return (res,) 55 | --------------------------------------------------------------------------------