├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── CI-python-AutoML.yml
    │   ├── CI-python-minimal.yml
    │   ├── CI-python.yml
    │   ├── code-scan.yml
    │   ├── linkcheck.yml
    │   ├── python-linting.yml
    │   └── release-ml-wrappers.yml
├── .gitignore
├── .readthedocs.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── docs
    ├── WrapperSpecifications.md
    ├── object-detection-schema.md
    └── release-process.md
├── python
    ├── README.md
    ├── docs
    │   ├── api_reference.rst
    │   ├── code_of_conduct.rst
    │   ├── conf.py
    │   ├── contributing.rst
    │   ├── dataset_wrapping.rst
    │   ├── dependencies.rst
    │   ├── getting_started.rst
    │   ├── image_model_wrapping.rst
    │   ├── index.rst
    │   ├── license_information.rst
    │   ├── model_wrapper_specifications.rst
    │   ├── model_wrapping.rst
    │   ├── object_detection_model_wrapping.rst
    │   ├── overview.rst
    │   ├── privacy_policy.rst
    │   ├── pytorch_model_wrapping.rst
    │   ├── support.rst
    │   ├── supported_frameworks.rst
    │   ├── supported_models.rst
    │   ├── tensorflow_model_wrapping.rst
    │   ├── text_model_wrapping.rst
    │   └── versioning.rst
    ├── ml_wrappers
    │   ├── __init__.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── gpu_kmeans.py
    │   │   └── warnings_suppressor.py
    │   ├── dataset
    │   │   ├── __init__.py
    │   │   ├── dataset_utils.py
    │   │   ├── dataset_wrapper.py
    │   │   └── timestamp_featurizer.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── base_wrapped_model.py
    │   │   ├── endpoint_wrapper.py
    │   │   ├── evaluator.py
    │   │   ├── fastai_wrapper.py
    │   │   ├── function_wrapper.py
    │   │   ├── image_model_wrapper.py
    │   │   ├── model_utils.py
    │   │   ├── model_wrapper.py
    │   │   ├── openai_wrapper.py
    │   │   ├── predictions_wrapper.py
    │   │   ├── pytorch_wrapper.py
    │   │   ├── tensorflow_wrapper.py
    │   │   ├── text_model_wrapper.py
    │   │   ├── wrapped_classification_model.py
    │   │   ├── wrapped_classification_without_proba_model.py
    │   │   └── wrapped_regression_model.py
    │   └── version.py
    ├── setup.cfg
    └── setup.py
├── requirements-automl.txt
├── requirements-dev.txt
├── requirements-doc.txt
├── requirements-linting.txt
├── requirements-test.txt
└── tests
    ├── automl
        ├── test_automl_image_model_wrapper.py
        └── test_automl_image_object_detection_model_wrapper.py
    ├── common_text_utils.py
    ├── common_utils.py
    ├── common_vision_utils.py
    ├── conftest.py
    ├── constants.py
    ├── main
        ├── test_dataset_wrapper.py
        ├── test_endpoint_wrapper.py
        ├── test_image_model_wrapper.py
        ├── test_model_wrapper.py
        ├── test_openai_wrapper.py
        ├── test_predictions_wrapper.py
        ├── test_pytorch_model_wrapper.py
        ├── test_text_model_wrapper.py
        ├── test_tf_model_wrapper.py
        └── test_timestamp_featurizer.py
    ├── minimal
        └── test_minimal.py
    ├── train_wrapper_utils.py
    └── wrapper_validator.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 119
3 | max-complexity = 20
4 | exclude = .git/, __pycache__/, dist/
5 | ignore = G001, B023, B902
6 | show-source = True
7 | statistics = True
8 | count = True
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/CI-python-AutoML.yml:
--------------------------------------------------------------------------------
  1 | name: CI Python AutoML
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main]
  6 |   pull_request:
  7 |     branches: [main]
  8 |   schedule:
  9 |     - cron:  '30 5 * * *'
 10 | 
 11 | jobs:
 12 |   ci-python-automl:
 13 |     strategy:
 14 |       matrix:
 15 |         packageDirectory: ["ml_wrappers"]
 16 |         operatingSystem: [ubuntu-latest]
 17 |         pythonVersion: ['3.9']
 18 | 
 19 |     runs-on: ${{ matrix.operatingSystem }}
 20 | 
 21 |     steps:
 22 |     - uses: actions/checkout@v4
 23 |     - uses: conda-incubator/setup-miniconda@v3
 24 |       with:
 25 |         auto-update-conda: true
 26 |         python-version: ${{ matrix.pythonVersion }}
 27 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
 28 |       name: Use Homebrew to install libomp on MacOS
 29 |       shell: bash -l {0}
 30 |       run: |
 31 |         brew install libomp
 32 |     - if: ${{ matrix.pythonVersion != '3.6' }}
 33 |       name: Install numpy
 34 |       shell: bash -l {0}
 35 |       run: |
 36 |         conda install --yes --quiet "numpy<2.0" -c conda-forge
 37 |     - if: ${{ matrix.operatingSystem != 'macos-latest' }}
 38 |       name: Install pytorch on non-MacOS
 39 |       shell: bash -l {0}
 40 |       run: |
 41 |         conda install --yes --quiet pytorch==2.2.2 torchvision captum cpuonly -c pytorch
 42 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
 43 |       name: Install Anaconda packages on MacOS, which should not include cpuonly according to official docs
 44 |       shell: bash -l {0}
 45 |       run: |
 46 |         conda install --yes --quiet pytorch==2.2.2 torchvision captum -c pytorch
 47 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
 48 |       name: Install lightgbm from conda on MacOS
 49 |       shell: bash -l {0}
 50 |       run: |
 51 |         conda install --yes -c conda-forge lightgbm
 52 |     - name: Install automl dependencies
 53 |       shell: bash -l {0}
 54 |       run: |
 55 |         pip install -r requirements-automl.txt
 56 |     - name: Install package
 57 |       shell: bash -l {0}
 58 |       run: |
 59 |         pip install -e ./python
 60 |     - name: Install test dependencies
 61 |       shell: bash -l {0}
 62 |       run: |
 63 |         pip install -r requirements-test.txt
 64 |     - name: Test with pytest
 65 |       shell: bash -l {0}
 66 |       run: |
 67 |         pytest ./tests/automl -s -v --durations=10 --cov='ml_wrappers' --cov-report=xml --cov-report=html
 68 |     - name: Upload code coverage results
 69 |       uses: actions/upload-artifact@v4
 70 |       with:
 71 |         name: ${{ matrix.packageDirectory }}-code-coverage-results
 72 |         path: htmlcov
 73 |       # Use always() to always run this step to publish test results when there are test failures
 74 |       if: ${{ always() }}
 75 |     - if: ${{ (matrix.operatingSystem == 'windows-latest') && (matrix.pythonVersion == '3.7') }}
 76 |       name: Upload to codecov
 77 |       id: codecovupload1
 78 |       uses: codecov/codecov-action@v3
 79 |       with:
 80 |         token: ${{ secrets.CODECOV_TOKEN }}
 81 |         directory: .
 82 |         env_vars: OS,PYTHON
 83 |         fail_ci_if_error: false
 84 |         files: ./coverage.xml
 85 |         flags: unittests
 86 |         name: codecov-umbrella
 87 |         verbose: true
 88 |     - if: ${{ (steps.codecovupload1.outcome == 'failure') && (matrix.pythonVersion == '3.7') && (matrix.operatingSystem == 'windows-latest') }}
 89 |       name: Retry upload to codecov
 90 |       id: codecovupload2
 91 |       uses: codecov/codecov-action@v3
 92 |       with:
 93 |         token: ${{ secrets.CODECOV_TOKEN }}
 94 |         directory: .
 95 |         env_vars: OS,PYTHON
 96 |         fail_ci_if_error: false
 97 |         files: ./coverage.xml
 98 |         flags: unittests
 99 |         name: codecov-umbrella
100 |         verbose: true
101 |     - name: Set codecov status
102 |       if: ${{ (matrix.pythonVersion == '3.7') && (matrix.operatingSystem == 'windows-latest') }}
103 |       shell: bash
104 |       run: |
105 |         if ${{ (steps.codecovupload1.outcome == 'success') || (steps.codecovupload2.outcome == 'success') }} ; then
106 |           echo fine
107 |         else
108 |           exit 1
109 |         fi
110 | 


--------------------------------------------------------------------------------
/.github/workflows/CI-python-minimal.yml:
--------------------------------------------------------------------------------
 1 | name: CI Python minimal environment
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 |   schedule:
 9 |     - cron:  '30 5 * * *'
10 | 
11 | jobs:
12 |   ci-python-minimal:
13 |     strategy:
14 |       matrix:
15 |         packageDirectory: ["ml_wrappers"]
16 |         operatingSystem: [ubuntu-latest, macos-latest, windows-latest]
17 |         pythonVersion: ['3.9', '3.10', '3.11']
18 | 
19 |     runs-on: ${{ matrix.operatingSystem }}
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - uses: conda-incubator/setup-miniconda@v3
24 |       with:
25 |         auto-update-conda: true
26 |         python-version: ${{ matrix.pythonVersion }}
27 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
28 |       name: Use Homebrew to install libomp on MacOS
29 |       shell: bash -l {0}
30 |       run: |
31 |         brew install libomp
32 |     - name: Install package
33 |       shell: bash -l {0}
34 |       run: |
35 |         pip install -e ./python
36 |     - name: Install test dependencies
37 |       shell: bash -l {0}
38 |       run: |
39 |         pip install -r requirements-test.txt
40 |     - name: Test with pytest
41 |       shell: bash -l {0}
42 |       run: |
43 |         pytest ./tests/minimal -s -v --durations=10 --cov='ml_wrappers' --cov-report=xml --cov-report=html
44 |     - name: Upload code coverage results
45 |       uses: actions/upload-artifact@v4
46 |       with:
47 |         name: ${{ matrix.packageDirectory }}-${{ matrix.pythonVersion }}-${{ matrix.operatingSystem }}-code-coverage-results
48 |         path: htmlcov
49 |       # Use always() to always run this step to publish test results when there are test failures
50 |       if: ${{ always() }}
51 |     - if: ${{ (matrix.operatingSystem == 'windows-latest') && (matrix.pythonVersion == '3.9') }}
52 |       name: Upload to codecov
53 |       id: codecovupload1
54 |       uses: codecov/codecov-action@v3
55 |       with:
56 |         token: ${{ secrets.CODECOV_TOKEN }}
57 |         directory: .
58 |         env_vars: OS,PYTHON
59 |         fail_ci_if_error: false
60 |         files: ./coverage.xml
61 |         flags: unittests
62 |         name: codecov-umbrella
63 |         verbose: true
64 |     - if: ${{ (steps.codecovupload1.outcome == 'failure') && (matrix.pythonVersion == '3.9') && (matrix.operatingSystem == 'windows-latest') }}
65 |       name: Retry upload to codecov
66 |       id: codecovupload2
67 |       uses: codecov/codecov-action@v3
68 |       with:
69 |         token: ${{ secrets.CODECOV_TOKEN }}
70 |         directory: .
71 |         env_vars: OS,PYTHON
72 |         fail_ci_if_error: false
73 |         files: ./coverage.xml
74 |         flags: unittests
75 |         name: codecov-umbrella
76 |         verbose: true
77 |     - name: Set codecov status
78 |       if: ${{ (matrix.pythonVersion == '3.9') && (matrix.operatingSystem == 'windows-latest') }}
79 |       shell: bash
80 |       run: |
81 |         if ${{ (steps.codecovupload1.outcome == 'success') || (steps.codecovupload2.outcome == 'success') }} ; then
82 |           echo fine
83 |         else
84 |           exit 1
85 |         fi
86 | 


--------------------------------------------------------------------------------
/.github/workflows/CI-python.yml:
--------------------------------------------------------------------------------
  1 | name: CI Python
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main]
  6 |   pull_request:
  7 |     branches: [main]
  8 |   schedule:
  9 |     - cron:  '30 5 * * *'
 10 | 
 11 | jobs:
 12 |   ci-python:
 13 |     strategy:
 14 |       matrix:
 15 |         packageDirectory: ["ml_wrappers"]
 16 |         operatingSystem: [ubuntu-latest, macos-latest, windows-latest]
 17 |         pythonVersion: ['3.9', '3.10', '3.11']
 18 |         openaiVersion: ['0.28.1', 'openai-latest']
 19 |         exclude:
 20 |           - openaiVersion: '0.28.1'
 21 |             pythonVersion: '3.9'
 22 |           - openaiVersion: '0.28.1'
 23 |             pythonVersion: '3.10'
 24 |           - openaiVersion: '0.28.1'
 25 |             operatingSystem: 'macos-latest'
 26 |           - openaiVersion: '0.28.1'
 27 |             operatingSystem: 'windows-latest'
 28 | 
 29 |     runs-on: ${{ matrix.operatingSystem }}
 30 | 
 31 |     steps:
 32 |     - uses: actions/checkout@v4
 33 |     - uses: conda-incubator/setup-miniconda@v3
 34 |       with:
 35 |         auto-update-conda: true
 36 |         python-version: ${{ matrix.pythonVersion }}
 37 |         channels: conda-forge
 38 | 
 39 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
 40 |       name: Use Homebrew to install libomp on MacOS
 41 |       shell: bash -l {0}
 42 |       run: |
 43 |         brew install libomp
 44 | 
 45 |     - if: ${{ matrix.operatingSystem == 'windows-latest' }}
 46 |       name: Install pytorch on windows for python 3.9 to 3.11
 47 |       shell: bash -l {0}
 48 |       run: |
 49 |         conda install --yes --quiet pytorch torchvision captum cpuonly "libtiff<4.5.0" -c pytorch -c conda-forge --strict-channel-priority
 50 | 
 51 |     - if: ${{ matrix.operatingSystem == 'ubuntu-latest' }}
 52 |       name: Install pytorch on ubuntu for python 3.9 to 3.11
 53 |       shell: bash -l {0}
 54 |       run: |
 55 |         conda install --yes --quiet pytorch torchvision captum cpuonly -c pytorch -c conda-forge --strict-channel-priority
 56 | 
 57 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
 58 |       name: Install pytorch on MacOS for python 3.9 to 3.11
 59 |       shell: bash -l {0}
 60 |       run: |
 61 |         conda install --yes --quiet pytorch torchvision captum "protobuf<5.26.0" -c pytorch -c conda-forge
 62 | 
 63 |     - if: ${{ matrix.operatingSystem == 'macos-latest' }}
 64 |       name: Install lightgbm from conda on MacOS
 65 |       shell: bash -l {0}
 66 |       run: |
 67 |         conda install --yes lightgbm -c conda-forge
 68 | 
 69 |     - name: Install backwards-compatible tf-keras for transformers
 70 |       shell: bash -l {0}
 71 |       run: |
 72 |         pip install tf-keras
 73 | 
 74 |     - name: Install package
 75 |       shell: bash -l {0}
 76 |       run: |
 77 |         pip install -e ./python
 78 | 
 79 |     - name: Install dev dependencies
 80 |       shell: bash -l {0}
 81 |       run: |
 82 |         pip install -r requirements-dev.txt
 83 | 
 84 |     - name: Install test dependencies
 85 |       shell: bash -l {0}
 86 |       run: |
 87 |         pip install -r requirements-test.txt
 88 | 
 89 |     - if: ${{ matrix.openaiVersion != 'openai-latest' }}
 90 |       name: Install openai version ${{ matrix.openaiVersion }}
 91 |       shell: bash -l {0}
 92 |       run: |
 93 |         pip install openai==${{ matrix.openaiVersion }}
 94 | 
 95 |     - name: Test with pytest
 96 |       shell: bash -l {0}
 97 |       run: |
 98 |         pytest ./tests/main -s -v --durations=10 --cov='ml_wrappers' --cov-report=xml --cov-report=html
 99 | 
100 |     - name: Upload code coverage results
101 |       uses: actions/upload-artifact@v4
102 |       with:
103 |         name: ${{ matrix.packageDirectory }}-${{ matrix.openaiVersion }}-${{ matrix.pythonVersion }}-${{ matrix.operatingSystem }}-code-coverage-results
104 |         path: htmlcov
105 |       # Use always() to always run this step to publish test results when there are test failures
106 |       if: ${{ always() }}
107 | 
108 |     - if: ${{ (matrix.operatingSystem == 'windows-latest') && (matrix.pythonVersion == '3.9') }}
109 |       name: Upload to codecov
110 |       id: codecovupload1
111 |       uses: codecov/codecov-action@v3
112 |       with:
113 |         token: ${{ secrets.CODECOV_TOKEN }}
114 |         directory: .
115 |         env_vars: OS,PYTHON
116 |         fail_ci_if_error: false
117 |         files: ./coverage.xml
118 |         flags: unittests
119 |         name: codecov-umbrella
120 |         verbose: true
121 | 
122 |     - if: ${{ (steps.codecovupload1.outcome == 'failure') && (matrix.pythonVersion == '3.9') && (matrix.operatingSystem == 'windows-latest') }}
123 |       name: Retry upload to codecov
124 |       id: codecovupload2
125 |       uses: codecov/codecov-action@v3
126 |       with:
127 |         token: ${{ secrets.CODECOV_TOKEN }}
128 |         directory: .
129 |         env_vars: OS,PYTHON
130 |         fail_ci_if_error: false
131 |         files: ./coverage.xml
132 |         flags: unittests
133 |         name: codecov-umbrella
134 |         verbose: true
135 | 
136 |     - name: Set codecov status
137 |       if: ${{ (matrix.pythonVersion == '3.9') && (matrix.operatingSystem == 'windows-latest') }}
138 |       shell: bash
139 |       run: |
140 |         if ${{ (steps.codecovupload1.outcome == 'success') || (steps.codecovupload2.outcome == 'success') }} ; then
141 |           echo fine
142 |         else
143 |           exit 1
144 |         fi
145 | 


--------------------------------------------------------------------------------
/.github/workflows/code-scan.yml:
--------------------------------------------------------------------------------
 1 | name: CI code scan
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 |   schedule:
 9 |     - cron:  '30 5 * * *'
10 | 
11 | jobs:
12 |   analyze:
13 |     name: Analyze
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       actions: read
17 |       contents: read
18 |       security-events: write
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: ["python"]
24 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
25 |         # Learn more:
26 |         # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
27 | 
28 |     steps:
29 |       - name: Checkout repository
30 |         uses: actions/checkout@v4
31 | 
32 |       # Initializes the CodeQL tools for scanning.
33 |       - name: Initialize CodeQL
34 |         uses: github/codeql-action/init@v1
35 |         with:
36 |           languages: ${{ matrix.language }}
37 |           # If you wish to specify custom queries, you can do so here or in a config file.
38 |           # By default, queries listed here will override any specified in a config file.
39 |           # Prefix the list here with "+" to use these queries and those in the config file.
40 |           # queries: ./path/to/local/query, your-org/your-repo/queries@main
41 | 
42 |       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
43 |       # If this step fails, then you should remove it and run the build manually (see below)
44 |       - name: Autobuild
45 |         uses: github/codeql-action/autobuild@v1
46 | 
47 |       # ℹ️ Command-line programs to run using the OS shell.
48 |       # 📚 https://git.io/JvXDl
49 | 
50 |       # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
51 |       #    and modify them (or add more) to build your code if your project
52 |       #    uses a compiled language
53 | 
54 |       #- run: |
55 |       #   make bootstrap
56 |       #   make release
57 | 
58 |       - name: Perform CodeQL Analysis
59 |         uses: github/codeql-action/analyze@v1
60 | 


--------------------------------------------------------------------------------
/.github/workflows/linkcheck.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # This is a basic workflow to help you get started with link checks in md files
 3 | 
 4 | name: Link check
 5 | 
 6 | # Controls when the workflow will run
 7 | on:
 8 |   # Triggers the workflow on push or pull request events but only for the "main" branch
 9 |   push:
10 |     branches: [ "main" ]
11 |   pull_request:
12 |     branches: [ "main" ]
13 | 
14 |   # Allows you to run this workflow manually from the Actions tab
15 |   workflow_dispatch:
16 | 
17 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
18 | jobs:
19 |   markdown-link-check:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - uses: gaurav-nelson/github-action-markdown-link-check@v1
24 |       with:
25 |         use-verbose-mode: 'yes'
26 | 


--------------------------------------------------------------------------------
/.github/workflows/python-linting.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will lint python code with flake8.
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python linting
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 |   schedule:
12 |     - cron:  '30 5 * * *'
13 | 
14 | jobs:
15 |   build:
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 |     - name: Set up Python 3.10
21 |       uses: actions/setup-python@v5
22 |       with:
23 |         python-version: '3.10'
24 |     - name: Install dependencies
25 |       run: |
26 |         python -m pip install --upgrade pip
27 |         pip install -r requirements-linting.txt
28 |     - name: Check sorted python imports using isort
29 |       run: |
30 |         isort . -c
31 |     - name: Lint code with flake8
32 |       run: |
33 |         flake8 .
34 | 
35 | 


--------------------------------------------------------------------------------
/.github/workflows/release-ml-wrappers.yml:
--------------------------------------------------------------------------------
 1 | name: Release ml-wrappers to PyPI
 2 | 
 3 | # trigger manually only ("collaborator" or more permissions required)
 4 | on:
 5 |   workflow_dispatch:
 6 |     inputs:
 7 |       releaseType:
 8 |         description: "Test or Prod PyPI?"
 9 |         required: true
10 |         default: "Test"
11 | 
12 | jobs:
13 |   release-build:
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - name: fail if Test nor Prod
18 |         if: ${{ ! (github.event.inputs.releaseType == 'Test' || github.event.inputs.releaseType == 'Prod') }}
19 |         run: |
20 |           echo "Only Test or Prod can be used."
21 |           exit 1
22 | 
23 |       - uses: actions/checkout@v4
24 | 
25 |       - uses: conda-incubator/setup-miniconda@v3
26 |         with:
27 |           auto-update-conda: true
28 |           python-version: 3.9
29 | 
30 |       - name: Install pytorch on non-MacOS
31 |         shell: bash -l {0}
32 |         run: |
33 |           conda install --yes --quiet pytorch torchvision captum cpuonly -c pytorch -c conda-forge --strict-channel-priority
34 | 
35 |       - name: update and upgrade pip, setuptools, wheel, and twine
36 |         shell: bash -l {0}
37 |         run: |
38 |           python -m pip install --upgrade pip
39 |           pip install --upgrade setuptools wheel twine
40 | 
41 |       - name: Install backwards-compatible tf-keras for transformers
42 |         shell: bash -l {0}
43 |         run: |
44 |           pip install tf-keras
45 | 
46 |       - name: Install dev dependencies
47 |         shell: bash -l {0}
48 |         run: |
49 |           pip install -r requirements-dev.txt
50 |     
51 |       - name: Install test dependencies
52 |         shell: bash -l {0}
53 |         run: |
54 |           pip install -r requirements-test.txt
55 | 
56 |       - name: pip freeze
57 |         shell: bash -l {0}
58 |         run: pip freeze
59 | 
60 |       - name: build wheel for ml-wrappers
61 |         shell: bash -l {0}
62 |         run: python setup.py sdist bdist_wheel
63 |         working-directory: python
64 | 
65 |       # run tests before publishing to PyPI
66 |       - name: install ml-wrappers wheel locally
67 |         shell: bash -l {0}
68 |         run: find ./dist/ -name '*.whl' -exec pip install {} \;
69 |         working-directory: python
70 | 
71 |       - name: run ml-wrappers tests
72 |         shell: bash -l {0}
73 |         run: pytest ./tests/main
74 | 
75 |       - name: Upload a ml-wrappers build result
76 |         uses: actions/upload-artifact@v4
77 |         with:
78 |           name: ml_wrappers-${{ github.event.inputs.releaseType }}
79 |           path: python/dist/
80 | 
81 |       # publish to PyPI
82 |       - name: Publish ml-wrappers package to Test PyPI
83 |         if: ${{ github.event.inputs.releaseType == 'Test' }}
84 |         uses: pypa/gh-action-pypi-publish@release/v1
85 |         with:
86 |           user: __token__
87 |           password: ${{ secrets.TEST_PYPI_API_TOKEN_ML_WRAPPERS }}
88 |           repository_url: https://test.pypi.org/legacy/
89 |           packages_dir: python/dist/
90 |       - name: Publish ml-wrappers package to PyPI
91 |         if: ${{ github.event.inputs.releaseType == 'Prod' }}
92 |         uses: pypa/gh-action-pypi-publish@release/v1
93 |         with:
94 |           user: __token__
95 |           password: ${{ secrets.PYPI_API_TOKEN_ML_WRAPPERS }}
96 |           packages_dir: python/dist/
97 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 2
 3 | 
 4 | build:
 5 |   os: ubuntu-20.04
 6 |   tools:
 7 |     python: "3.8"
 8 | 
 9 | sphinx:
10 |    builder: html
11 |    configuration: python/docs/conf.py
12 | 
13 | python:
14 |    install:
15 |    - requirements: requirements-doc.txt
16 |    - method: pip
17 |      path: python
18 | 
19 | formats:
20 |   - epub
21 |   - pdf
22 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Wrappers
 2 | ![PyPI](https://img.shields.io/pypi/v/ml-wrappers)
 3 | ![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)
 4 | ![versions](https://img.shields.io/pypi/pyversions/ml-wrappers)
 5 | [![Downloads](https://static.pepy.tech/badge/ml-wrappers)](https://pepy.tech/project/ml-wrappers)
 6 | 
 7 | [![CI Python minimal environment](https://github.com/microsoft/ml-wrappers/actions/workflows/CI-python-minimal.yml/badge.svg)](https://github.com/microsoft/ml-wrappers/actions/workflows/CI-python-minimal.yml)
 8 | [![CI Python](https://github.com/microsoft/ml-wrappers/actions/workflows/CI-python.yml/badge.svg)](https://github.com/microsoft/ml-wrappers/actions/workflows/CI-python.yml)
 9 | [![CI Python AutoML](https://github.com/microsoft/ml-wrappers/actions/workflows/CI-python-AutoML.yml/badge.svg)](https://github.com/microsoft/ml-wrappers/actions/workflows/CI-python-AutoML.yml)
10 | 
11 | 
12 | ## Overview and Motivation
13 | Responsible AI tools should be able to work with a broad spectrum of machine learning models and datasets. Much of this functionality is based on the ability to call predict or predict_proba on a model and get back the predicted values or probabilities in a specific format.
14 | 
15 | However, there are many different models outside of scikit-learn and even within scikit-learn which have unusual outputs or require the input in a specific format.  Some, like pytorch, don’t even have the predict/predict_proba function specification.
16 | 
17 | We initially started adding wrappers in the https://github.com/interpretml/interpret-community repository but found that they are needed by other teams as well, including https://github.com/fairlearn/fairlearn and https://github.com/microsoft/responsible-ai-toolbox, hence the code has been moved to this repository.  Anyone is welcome to use or contribute to these model and dataset wrappers.
18 | 
19 | These wrappers handle a variety of frameworks, including pytorch, tensorflow, keras wrappers on tensorflow, variations on scikit-learn models (such as the SVC classification model that doesn’t have a predict_proba function), lightgbm and xgboost, as well as certain strange pipelines we have encountered from customers and internal users in the past.
20 | 
21 | The dataset wrapper handles a variety of different dataset types and converts them to a common numpy or scipy sparse format for internal code to handle in one simple way.  Hence, the code doesn’t have to worry about whether the current input is pandas or some other format, it doesn’t have to include if/else branches everywhere in the code.
22 | 
23 | The dataset wrapper simply converts the input to the common format, and after the common code finishes running, we convert the representation back to the original format, which can be handled by the original model.
24 | 
25 | Currently supported data types include:
26 | 
27 | - numpy.ndarray
28 | - pandas.DataFrame
29 | - pandas.Series
30 | - scipy.sparse.csr_matrix
31 | - shap.DenseData
32 | - torch.Tensor
33 | - tensorflow.python.data.ops.dataset_ops.BatchDataset
34 | 
35 | For more information about common format from the wrappers, please see the [Wrapper Specifications](https://github.com/microsoft/ml-wrappers/tree/main/docs/WrapperSpecifications.md) documentation.
36 | 
37 | ## Installation
38 | 
39 | To install the package, simply run:
40 | 
41 | ```
42 | pip install ml-wrappers
43 | ```
44 | 
45 | ## Code example of wrap_model
46 | 
47 | ```python
48 | from ml_wrappers import wrap_model
49 | wrapped_model = wrap_model(model, input, model_task='regression')
50 | # Use wrapped model in any common code
51 | ```
52 | 
53 | ## Code example of DatasetWrapper
54 | 
55 | ```python
56 | from ml_wrappers import DatasetWrapper
57 | wrapped_dataset = DatasetWrappper(input)
58 | numpy_or_scipy = wrapped_dataset.dataset
59 | # Perform some operations on common converted numpy or scipy dataset
60 | ...
61 | # Get back the original dataset type after modifications
62 | modified_input = wrapped_dataset.typed_dataset(numpy_or_scipy)
63 | ```
64 | 
65 | <!-- markdown-link-check-disable -->
66 | 
67 | ## Contributing
68 | 
69 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
70 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
71 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
72 | 
73 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
74 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
75 | provided by the bot. You will only need to do this once across all repos using our CLA.
76 | 
77 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
78 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
79 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
80 | 
81 | ## Trademarks
82 | 
83 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
84 | trademarks or logos is subject to and must follow 
85 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks).
86 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
87 | Any use of third-party trademarks or logos are subject to those third-party's policies.
88 | 
89 | <!-- markdown-link-check-enable -->
90 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | <!-- markdown-link-check-disable -->
 3 | 
 4 | ## Security
 5 | 
 6 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 7 | 
 8 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 9 | 
10 | ## Reporting Security Issues
11 | 
12 | **Please do not report security vulnerabilities through public GitHub issues.**
13 | 
14 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
15 | 
16 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
17 | 
18 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/en-us/msrc?rtc=1). 
19 | 
20 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
21 | 
22 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
23 |   * Full paths of source file(s) related to the manifestation of the issue
24 |   * The location of the affected source code (tag/branch/commit or direct URL)
25 |   * Any special configuration required to reproduce the issue
26 |   * Step-by-step instructions to reproduce the issue
27 |   * Proof-of-concept or exploit code (if possible)
28 |   * Impact of the issue, including how an attacker might exploit the issue
29 | 
30 | This information will help us triage your report more quickly.
31 | 
32 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://www.microsoft.com/en-us/msrc/bounty?rtc=1) page for more details about our active programs.
33 | 
34 | ## Preferred Languages
35 | 
36 | We prefer all communications to be in English.
37 | 
38 | ## Policy
39 | 
40 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
41 | 
42 | <!-- markdown-link-check-enable -->
43 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help  
 4 | 
 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
 6 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
 7 | feature request as a new Issue.
 8 | 
 9 | ## Microsoft Support Policy  
10 | 
11 | Support for this project, ml-wrappers, is limited to the resources listed above.
12 | 


--------------------------------------------------------------------------------
/docs/WrapperSpecifications.md:
--------------------------------------------------------------------------------
 1 | # Wrapper Specifications: How to infer classification or regression model type
 2 | 
 3 | In the ML Wrappers SDK there needs to be a clear understanding of the model type to have a solid contract for users and visualizations.
 4 | 
 5 | For example, in the machine learning interpretability space for blackbox models such as in the https://github.com/interpretml/interpret-community/ library, this means that the user can pass in a function from a classifier or regressor, or a model that is a classifier or regressor. For model-specific explainers, the user would pass in the model directly. We can usually infer whether the model is a classifier or regressor in most cases.
 6 | 
 7 | - Functions - We can evaluate the function on the data and look at the output to understand if the model is a classifier or regressor. In general, if the user passes a function that returns a 1D array, we can infer it is a regressor. If the function returns a 2D array, we can infer it is a classifier. There is a tricky case where the function may return a 2D array of 1 column. In this case, we can throw an exception and force the user to specify model_task=(infer, classifier, regressor), and not allow automatic inferencing. The user can override this behavior if they specify an optional parameter model_task=(infer, classifier, regressor), which will have the value model_task=infer by default.
 8 | 
 9 |   - If they specify model_task=infer:
10 |     - We will try to infer whether the function is for classification or regression based on the specifications above.
11 |   - If they specify model_task=classifier and:
12 |     - They have a 2D array - run function, treat output as classifier
13 |     - They have a 1D array - add wrapper function to convert output to 2D array. Run function on samples and assert all values are probabilities. If are not all 1, convert to a 2D array with 2 columns [1-p, p]. If they are greater than 1, throw exception.
14 |     - They pass in classes parameter - run function, treat output as classifier
15 |   - If they have model_task=regressor and:
16 |     - They have a 2D array - if it has 1 column, treat it as regressor, if more than one column throw exception
17 |     - They have a 1D array - run function, treat output as regressor
18 |     - They pass in a classes parameter - throw exception, since user specified they are not using a classifier
19 | 
20 | Note for some types of frameworks, like catboost, we have found that the prediction results (in this case the predicted probabilities) for a single instance may be of a different shape than prediction results for multiple instances.  In this scenario, we can call the model for both single and multiple instances and compare the output dimensionality, and if they differ by one, wrap the prediction function to add an additional dimension if a single instance is predicted on.
21 | 
22 | - Models - We can convert the model to a function and then use the specifications listed above. We convert the model to a function by either using the predict_proba function, or, if it is not available, the predict function. In some specific cases, we may be able to get additional information from the model to help us decide which function to use. Specifically, if we know that the model is a Keras model, the model will always have a predict_proba method available. In this case, we can look at the shape of predict_proba, and if it has multiple columns or is a single column with values outside the range of [0, 1], we can by default use predict instead. Otherwise, we can use predict_proba. If the user specified model_task=classifier, this will always override the behavior for Keras models and specify whether to use predict or predict_proba. Also, if the user specifies that model_task=classifier, but the model does not have a predict_proba function, we can wrap the function in a one-hot vector of probabilities. After the model is converted to a function that conforms to our specifications, we can wrap that in our model wrapper, which can contain a reference to the original model in cases where it may be needed or for debugging.
23 | 
24 | - Supported Frameworks - Our library can directly support the most popular machine learning frameworks. In general, based on the description above, the library can support models and functions in scikit-learn. However, we can extend support to other frameworks with the model wrapper concept. Currently, the list of supported frameworks, or frameworks we plan to support, are:
25 |   - Scikit-Learn - This framework is directly supported by our APIs.
26 |   - LightGBM - We can wrap the function into a scikit-learn compatible wrapper.
27 |   - XGBoost - We can wrap the function into a scikit-learn compatible wrapper.
28 |   - Catboost - We can wrap the function into a scikit-learn compatible wrapper.
29 |   - Keras with Tensorflow backend - Keras has both a predict_proba and predict function on all models, so it is difficult to know for sure if the model is a classifier or regressor. We can force the user to specify whether the model is a classifier or regressor in case only a single column is output, and then wrap the model in a model wrapper. If the user specifies the model is a regressor we can fix the structure to be 2D.
30 |   - Pytorch - Pytorch does not have a predict or predict_proba function, but the model can be called on the dataset directly to get probabilities. The probabilities can then be transformed into predicted labels for classifiers. Similarly to Keras, we can force the user to specify whether the model is a classifier or regressor in case only a single column is output, and then wrap the model in a model wrapper. If the user specifies the model is a regressor we can fix the structure to be 2D.
31 |   - ONNX - ONNX is not yet supported, but we plan to support it in the future. We can use a model wrapper to conform to the predict and predict_proba specifications the SDK requires.
32 | 
33 | We would like to support caffe/caffe2 and other ML frameworks in the future as well.  Please feel free to contribute to this repository.


--------------------------------------------------------------------------------
/docs/object-detection-schema.md:
--------------------------------------------------------------------------------
 1 | # Object Detection Scenario Documentation
 2 | 
 3 | ML-Wrappers supports model wrapping of Pytorch object detection methods. We convert the model to a function by either using the predict_proba function, or, if it is not available, the predict function. 
 4 | 
 5 | ## Schema 
 6 | For each image in the dataset, the model is used to generate predictions. Then, the predictions are filtered
 7 | using non maximal suppression (based on the iuo threshold parameter). 
 8 | 
 9 | The predictions is a list of Pytorch tensors. Each tensor is composed of the labels, boxes (bounding boxes), scores. Example:
10 | 
11 | ```
12 | detections = [{'boxes': tensor([[ 97.0986, 170.7908, 241.4255, 516.5880]], grad_fn=<StackBackward0>), 'labels': tensor([2]), 'scores': tensor([0.9905], grad_fn=<IndexBackward0>)}]
13 | 
14 | predict_output = [[[2.0, 97.09860229492188, 170.7908172607422, 241.425537109375, 516.5879516601562, 0.9904877543449402]]]
15 | ```
16 | 
17 | ## Limitations
18 | This wrapper functionality only supports Pytorch machine learning models. 
19 | 


--------------------------------------------------------------------------------
/docs/release-process.md:
--------------------------------------------------------------------------------
  1 | # Release process for ml-wrappers
  2 | 
  3 | When ready to release, create a separate PR in ml-wrappers to bump up the version in the version.py file under the python/ml_wrappers directory:
  4 | 
  5 | ```
  6 | _major = '0'
  7 | _minor = <enter new minor version here>
  8 | _patch = <enter new patch version here>
  9 | ```
 10 | 
 11 | In the notes make sure to mention all of the changes that have been introduced since the last release.  Usually you can take the main description in the PR.
 12 | 
 13 | After the PR has been merged, checkout the master branch and get the latest code.
 14 | 
 15 | ## Release notes
 16 | 
 17 | On the main page, click on releases, and select "Draft a new release".
 18 | 
 19 | In "tag version", enter the version in the format v0.*.*, for example v0.10.0.  Keep the target as master branch.
 20 | 
 21 | In release title, enter either "Patch release v0.*.*" or "Release v0.*.*".
 22 | 
 23 | In the release notes, enter the same release notes as in the PR above for all changes that have been made to the package.
 24 | 
 25 | ## PyPI release
 26 | 
 27 | For a guide on the PyPI release process, please see:
 28 | 
 29 | https://packaging.python.org/tutorials/packaging-projects/
 30 | 
 31 | ### PyPI file
 32 | 
 33 | Create a .pypirc file in the users home directory, it should look similar to:
 34 | 
 35 | ```
 36 | [distutils]
 37 | index-servers =
 38 |   pypi
 39 |   pypitest
 40 | 
 41 | [pypi]
 42 | repository: https://upload.pypi.org/legacy/
 43 | username: interpret-community
 44 | password: PASSWORD_REMOVED
 45 | 
 46 | [pypitest]
 47 | repository: https://test.pypi.org/legacy/
 48 | username: interpret-community
 49 | password: PASSWORD_REMOVED
 50 | ```
 51 | 
 52 | Note interpret-community PyPI user is currently used to publish ml-wrappers to PyPI but this may change in the future.
 53 | 
 54 | ### Clean repo
 55 | 
 56 | Make sure the repo is clean prior to release on the master branch, run:
 57 | 
 58 | ```
 59 | git clean -fdx
 60 | ```
 61 | 
 62 | ### Creating wheel
 63 | 
 64 | Generate the wheel file.  First activate your release environment, this can be any conda environment on the release machine:
 65 | ```
 66 | conda activate my_env
 67 | ```
 68 | Then update setuptools and wheel, always make sure you have the latest version installed before releasing to PyPI:
 69 | ```
 70 | pip install --upgrade setuptools wheel
 71 | ```
 72 | Generate the wheel where setup.py is located:
 73 | ```
 74 | cd (ml-wrappers location)\python
 75 | python setup.py sdist bdist_wheel
 76 | ```
 77 | If using WSL, it may be necessary to use
 78 | ```
 79 | python setup.py sdist bdist_wheel --bdist-dir ~/temp/bdistwheel
 80 | ```
 81 | You should see the following files in the dist directory:
 82 | ```
 83 | dist/
 84 |   ml-wrappers-0.0.1-py3-none-any.whl
 85 |   ml-wrappers-0.0.1.tar.gz
 86 | ```
 87 | 
 88 | Upgrade twine before uploading to PyPI:
 89 | ```
 90 | pip install --upgrade twine
 91 | ```
 92 | 
 93 | Note: you may need to specify --user on some environments:
 94 | ```
 95 | pip install --user --upgrade twine
 96 | ```
 97 | 
 98 | Run twine upload to the PyPI test repository:
 99 | ```
100 | twine upload --repository pypitest dist/*
101 | ```
102 | The twine install location may not be on PATH by default; either add it or call twine using its full path.
103 | 
104 | Validate that the page looks correct on the PyPI release page.
105 | 
106 | OPTIONAL:
107 | You can install and validate the package locally:
108 | 
109 | pip install --index-url https://test.pypi.org/simple/ --no-deps ml-wrappers
110 | 
111 | Run twine upload to the PyPI repository:
112 | ```
113 | twine upload --repository pypi dist/*
114 | ```


--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Wrappers SDK for Python
 2 | 
 3 | ### This package has been tested with Python 3.9, 3.10 and 3.11
 4 | 
 5 | The Machine Learning Wrappers SDK provides a unified wrapper for various ML frameworks - to have one uniform scikit-learn format predict and predict_proba functions.
 6 | 
 7 | Highlights of the package include:
 8 | 
 9 | - A dataset wrapper to handle scipy sparse, pandas and numpy datasets in a uniform manner.
10 | - A model wrapper to handle models from various frameworks uniformly, including scikit-learn, tensorflow, pytorch, lightgbm and xgboost
11 | 
12 | Please see the github website for the documentation and sample notebooks:
13 | https://github.com/microsoft/ml-wrappers
14 | 


--------------------------------------------------------------------------------
/python/docs/api_reference.rst:
--------------------------------------------------------------------------------
  1 | .. _API Reference:
  2 | 
  3 | API Reference
  4 | =============
  5 | 
  6 | .. contents:: Table of Contents
  7 |    :local:
  8 | 
  9 | ml_wrappers
 10 | -----------
 11 | 
 12 | .. automodule:: ml_wrappers
 13 |    :members:
 14 |    :undoc-members:
 15 |    :show-inheritance:
 16 | 
 17 | ml_wrappers.common
 18 | ------------------
 19 | 
 20 | .. automodule:: ml_wrappers.common
 21 |    :members:
 22 |    :undoc-members:
 23 |    :show-inheritance:
 24 | 
 25 | ml_wrappers.common.constants
 26 | ---------------------------
 27 | 
 28 | .. automodule:: ml_wrappers.common.constants
 29 |    :members:
 30 |    :undoc-members:
 31 |    :show-inheritance:
 32 | 
 33 | ml_wrappers.dataset
 34 | -------------------
 35 | 
 36 | .. automodule:: ml_wrappers.dataset
 37 |    :members:
 38 |    :undoc-members:
 39 |    :show-inheritance:
 40 | 
 41 | ml_wrappers.dataset.dataset_utils
 42 | --------------------------------
 43 | 
 44 | .. automodule:: ml_wrappers.dataset.dataset_utils
 45 |    :members:
 46 |    :undoc-members:
 47 |    :show-inheritance:
 48 | 
 49 | ml_wrappers.dataset.dataset_wrapper
 50 | ----------------------------------
 51 | 
 52 | .. automodule:: ml_wrappers.dataset.dataset_wrapper
 53 |    :members:
 54 |    :undoc-members:
 55 |    :show-inheritance:
 56 | 
 57 | ml_wrappers.dataset.timestamp_featurizer
 58 | ---------------------------------------
 59 | 
 60 | .. automodule:: ml_wrappers.dataset.timestamp_featurizer
 61 |    :members:
 62 |    :undoc-members:
 63 |    :show-inheritance:
 64 | 
 65 | ml_wrappers.model
 66 | -----------------
 67 | 
 68 | .. automodule:: ml_wrappers.model
 69 |    :members:
 70 |    :undoc-members:
 71 |    :show-inheritance:
 72 | 
 73 | ml_wrappers.model.base_wrapped_model
 74 | -----------------------------------
 75 | 
 76 | .. automodule:: ml_wrappers.model.base_wrapped_model
 77 |    :members:
 78 |    :undoc-members:
 79 |    :show-inheritance:
 80 | 
 81 | ml_wrappers.model.evaluator
 82 | ---------------------------
 83 | 
 84 | .. automodule:: ml_wrappers.model.evaluator
 85 |    :members:
 86 |    :undoc-members:
 87 |    :show-inheritance:
 88 | 
 89 | ml_wrappers.model.fastai_wrapper
 90 | -------------------------------
 91 | 
 92 | .. automodule:: ml_wrappers.model.fastai_wrapper
 93 |    :members:
 94 |    :undoc-members:
 95 |    :show-inheritance:
 96 | 
 97 | ml_wrappers.model.function_wrapper
 98 | ---------------------------------
 99 | 
100 | .. automodule:: ml_wrappers.model.function_wrapper
101 |    :members:
102 |    :undoc-members:
103 |    :show-inheritance:
104 | 
105 | ml_wrappers.model.image_model_wrapper
106 | ------------------------------------
107 | 
108 | .. automodule:: ml_wrappers.model.image_model_wrapper
109 |    :members:
110 |    :undoc-members:
111 |    :show-inheritance:
112 | 
113 | ml_wrappers.model.model_utils
114 | ----------------------------
115 | 
116 | .. automodule:: ml_wrappers.model.model_utils
117 |    :members:
118 |    :undoc-members:
119 |    :show-inheritance:
120 | 
121 | ml_wrappers.model.model_wrapper
122 | ------------------------------
123 | 
124 | .. automodule:: ml_wrappers.model.model_wrapper
125 |    :members:
126 |    :undoc-members:
127 |    :show-inheritance:
128 | 
129 | ml_wrappers.model.predictions_wrapper
130 | ------------------------------------
131 | 
132 | .. automodule:: ml_wrappers.model.predictions_wrapper
133 |    :members:
134 |    :undoc-members:
135 |    :show-inheritance:
136 | 
137 | ml_wrappers.model.pytorch_wrapper
138 | --------------------------------
139 | 
140 | .. automodule:: ml_wrappers.model.pytorch_wrapper
141 |    :members:
142 |    :undoc-members:
143 |    :show-inheritance:
144 | 
145 | ml_wrappers.model.tensorflow_wrapper
146 | -----------------------------------
147 | 
148 | .. automodule:: ml_wrappers.model.tensorflow_wrapper
149 |    :members:
150 |    :undoc-members:
151 |    :show-inheritance:
152 | 
153 | ml_wrappers.model.text_model_wrapper
154 | -----------------------------------
155 | 
156 | .. automodule:: ml_wrappers.model.text_model_wrapper
157 |    :members:
158 |    :undoc-members:
159 |    :show-inheritance:
160 | 
161 | ml_wrappers.model.wrapped_classification_model
162 | ---------------------------------------------
163 | 
164 | .. automodule:: ml_wrappers.model.wrapped_classification_model
165 |    :members:
166 |    :undoc-members:
167 |    :show-inheritance:
168 | 
169 | ml_wrappers.model.wrapped_classification_without_proba_model
170 | -----------------------------------------------------------
171 | 
172 | .. automodule:: ml_wrappers.model.wrapped_classification_without_proba_model
173 |    :members:
174 |    :undoc-members:
175 |    :show-inheritance:
176 | 
177 | ml_wrappers.model.wrapped_regression_model
178 | -----------------------------------------
179 | 
180 | .. automodule:: ml_wrappers.model.wrapped_regression_model
181 |    :members:
182 |    :undoc-members:
183 |    :show-inheritance:
184 | 
185 | ml_wrappers.version
186 | -------------------
187 | 
188 | .. automodule:: ml_wrappers.version
189 |    :members:
190 |    :undoc-members:
191 |    :show-inheritance:


--------------------------------------------------------------------------------
/python/docs/code_of_conduct.rst:
--------------------------------------------------------------------------------
 1 | .. _code_of_conduct:
 2 | 
 3 | Code of Conduct
 4 | ===============
 5 | 
 6 | This project has adopted the `Microsoft Open Source Code of Conduct <https://opensource.microsoft.com/codeofconduct/>`_.
 7 | 
 8 | Resources:
 9 | 
10 | - `Microsoft Open Source Code of Conduct <https://opensource.microsoft.com/codeofconduct/>`_
11 | - `Microsoft Code of Conduct FAQ <https://opensource.microsoft.com/codeofconduct/faq/>`_
12 | - Contact `opencode@microsoft.com <mailto:opencode@microsoft.com>`_ with questions or concerns


--------------------------------------------------------------------------------
/python/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | 
 3 | # -- Path setup --------------------------------------------------------------
 4 | 
 5 | import os
 6 | import sys
 7 | 
 8 | sys.path.insert(0, os.path.abspath('../../python'))
 9 | 
10 | 
11 | # -- Project information -----------------------------------------------------
12 | 
13 | project = 'ml_wrappers'
14 | author = 'Microsoft Corporation'
15 | 
16 | # The full version, including alpha/beta/rc tags
17 | release = '0.4.12'
18 | 
19 | 
20 | # -- General configuration ---------------------------------------------------
21 | 
22 | # Add any Sphinx extension module names here, as strings. They can be
23 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
24 | # ones.
25 | extensions = [
26 |     'sphinx.ext.autodoc',
27 |     'sphinx.ext.viewcode',
28 |     'sphinx.ext.githubpages',
29 |     'sphinx.ext.napoleon',
30 |     'sphinx.ext.mathjax',
31 |     'sphinx.ext.todo',
32 |     'sphinx.ext.coverage',
33 |     'sphinx.ext.ifconfig',
34 |     'sphinx.ext.intersphinx',
35 |     'sphinx.ext.doctest',
36 |     'sphinx.ext.inheritance_diagram',
37 |     'sphinx.ext.autosummary'
38 | ]
39 | 
40 | # Add any paths that contain templates here, relative to this directory.
41 | templates_path = ['_templates']
42 | 
43 | # List of patterns, relative to source directory, that match files and
44 | # directories to ignore when looking for source files.
45 | # This pattern also affects html_static_path and html_extra_path.
46 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
47 | 
48 | 
49 | # -- Options for HTML output -------------------------------------------------
50 | 
51 | # The theme to use for HTML and HTML Help pages.  See the documentation for
52 | # a list of builtin themes.
53 | html_theme = 'sphinx_rtd_theme'
54 | 
55 | # Add any paths that contain custom static files (such as style sheets) here,
56 | # relative to this directory. They are copied after the builtin static files,
57 | # so a file named "default.css" will overwrite the builtin "default.css".
58 | html_static_path = ['_static']
59 | 
60 | # -- Extension configuration -------------------------------------------------
61 | 
62 | # -- Options for intersphinx extension ---------------------------------------
63 | 
64 | # Example configuration for intersphinx: refer to the Python standard library.
65 | intersphinx_mapping = {'https://docs.python.org/': None}
66 | 
67 | # -- Options for todo extension ----------------------------------------------
68 | 
69 | # If true, `todo` and `todoList` produce output, else they produce nothing.
70 | todo_include_todos = True
71 | 


--------------------------------------------------------------------------------
/python/docs/contributing.rst:
--------------------------------------------------------------------------------
 1 | .. _contributing:
 2 | 
 3 | Contributing
 4 | ============
 5 | 
 6 | We welcome contributions and suggestions! Please see the `CONTRIBUTING.md <https://github.com/microsoft/ml-wrappers/blob/main/CONTRIBUTING.md>`_ file for more details.
 7 | 
 8 | Feature Request
 9 | ---------------
10 | 
11 | If you have a feature request related to this project, please follow the template provided in the `feature_request.md <https://github.com/microsoft/ml-wrappers/blob/main/.github/ISSUE_TEMPLATE/feature_request.md>`_ file. This template will guide you to describe the problem, the solution you'd like, any alternative solutions you've considered, and any additional context or screenshots about the feature request.
12 | 
13 | Bug Report
14 | ----------
15 | 
16 | If you encounter a bug and want to report it, please use the template provided in the `bug_report.md <https://github.com/microsoft/ml-wrappers/blob/main/.github/ISSUE_TEMPLATE/bug_report.md>`_ file. This template will guide you to describe the bug, the steps to reproduce it, the expected behavior, any screenshots, and any additional context about the problem.
17 | 
18 | Support
19 | -------
20 | 
21 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new Issue. Support for this project, ml-wrappers, is limited to the resources listed above. For more details, please see the `SUPPORT.md <https://github.com/microsoft/ml-wrappers/blob/main/SUPPORT.md>`_ file.
22 | 
23 | Microsoft Open Source Code of Conduct
24 | -------------------------------------
25 | 
26 | This project has adopted the `Microsoft Open Source Code of Conduct <https://opensource.microsoft.com/codeofconduct/>`_. For more information, see the `CODE_OF_CONDUCT.md <https://github.com/microsoft/ml-wrappers/blob/main/CODE_OF_CONDUCT.md>`_ file.
27 | 
28 | Security
29 | --------
30 | 
31 | Microsoft takes the security of our software products and services seriously. If you believe you have found a security vulnerability in any Microsoft-owned repository, please report it to us as described in the `SECURITY.md <https://github.com/microsoft/ml-wrappers/blob/main/SECURITY.md>`_ file.
32 | 
33 | Release Process
34 | ---------------
35 | 
36 | When ready to release, create a separate PR in ml-wrappers to bump up the version in the version.py file under the python/ml_wrappers directory. For more details, please see the `release-process.md <https://github.com/microsoft/ml-wrappers/blob/main/docs/release-process.md>`_ file.
37 | 
38 | License
39 | -------
40 | 
41 | This project is licensed under the MIT License. For more details, please see the `LICENSE.txt <https://github.com/microsoft/ml-wrappers/blob/main/LICENSE.txt>`_ file.


--------------------------------------------------------------------------------
/python/docs/dataset_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _dataset_wrapping:
 2 | 
 3 | Dataset Wrapping
 4 | ================
 5 | 
 6 | The ``DatasetWrapper`` class in the ``ml_wrappers`` package provides a uniform interface for handling datasets across different explainers. It supports various data types including numpy arrays, pandas DataFrame, pandas Series, scipy sparse matrices, shap.DenseData, torch.Tensor, and tensorflow.python.data.ops.dataset_ops.BatchDataset.
 7 | 
 8 | .. code-block:: python
 9 | 
10 |     from ml_wrappers.dataset import DatasetWrapper
11 | 
12 |     # Initialize the dataset wrapper
13 |     wrapper = DatasetWrapper(dataset)
14 | 
15 | Here, ``dataset`` is a matrix of feature vector examples (# examples x # features) for initializing the explainer.
16 | 
17 | The ``DatasetWrapper`` class also provides methods for operations such as summarizing data, taking the subset or sampling. It also provides an option to clear all references after use in explainers for memory optimization.
18 | 
19 | .. code-block:: python
20 | 
21 |     # Initialize the dataset wrapper with clear_references option
22 |     wrapper = DatasetWrapper(dataset, clear_references=True)
23 | 
24 | The ``DatasetWrapper`` class also provides a method for sampling examples from the dataset. If the number of rows in the dataset is less than a lower bound, it returns the full dataset. If the number of rows is more than an upper bound, it samples randomly. It also provides an option to resample based on the optimal number of clusters.
25 | 
26 | .. code-block:: python
27 | 
28 |     # Sample examples from the dataset
29 |     sampled_dataset = wrapper.sample_examples()
30 | 
31 | The ``DatasetWrapper`` class also provides a method to clear all references for memory optimization.
32 | 
33 | .. code-block:: python
34 | 
35 |     # Clear all references
36 |     wrapper._clear()
37 | 
38 | The ``DatasetWrapper`` class is part of the ``ml_wrappers.dataset`` module, which also includes the ``CustomTimestampFeaturizer`` class for timestamp featurization.
39 | 
40 | .. code-block:: python
41 | 
42 |     from ml_wrappers.dataset import CustomTimestampFeaturizer
43 | 
44 |     # Initialize the timestamp featurizer
45 |     featurizer = CustomTimestampFeaturizer()


--------------------------------------------------------------------------------
/python/docs/dependencies.rst:
--------------------------------------------------------------------------------
 1 | .. _dependencies:
 2 | 
 3 | Dependencies
 4 | ============
 5 | 
 6 | The ml-wrappers library has several dependencies that are required for it to function correctly. These dependencies are listed in various files throughout the repository. Here are the main dependencies:
 7 | 
 8 | python/ml_wrappers.egg-info/dependency_links.txt
 9 | ------------------------------------------------
10 | 
11 | This file does not list any specific dependencies.
12 | 
13 | requirements-test.txt
14 | ---------------------
15 | 
16 | - pytest
17 | - pytest-cov
18 | - rai-test-utils==0.3.0
19 | 
20 | requirements-linting.txt
21 | ------------------------
22 | 
23 | - flake8==4.0.1
24 | - flake8-bugbear==21.11.29
25 | - flake8-blind-except==0.1.1
26 | - flake8-breakpoint
27 | - flake8-builtins==1.5.3
28 | - flake8-logging-format==0.6.0
29 | - flake8-pytest-style
30 | - isort
31 | 
32 | python/ml_wrappers.egg-info/requires.txt
33 | ----------------------------------------
34 | 
35 | - numpy
36 | - pandas
37 | - scipy
38 | - scikit-learn
39 | 
40 | requirements-dev.txt
41 | --------------------
42 | 
43 | - lightgbm
44 | - xgboost
45 | - catboost
46 | - tensorflow
47 | - shap
48 | - transformers<4.40.0
49 | - datasets
50 | - raiutils
51 | - fastai
52 | - vision_explanation_methods
53 | - mlflow
54 | - joblib<1.3.0; python_version <= '3.7'
55 | - scikeras
56 | - openai; python_version >= '3.7'
57 | 
58 | requirements-automl.txt
59 | -----------------------
60 | 
61 | - mlflow
62 | - azureml-automl-dnn-vision
63 | - vision_explanation_methods
64 | 
65 | Please note that the versions of these dependencies are subject to change and it is always a good idea to check the latest version of the library for the most up-to-date information.


--------------------------------------------------------------------------------
/python/docs/getting_started.rst:
--------------------------------------------------------------------------------
 1 | .. _getting_started:
 2 | 
 3 | Getting Started
 4 | ===============
 5 | 
 6 | This documentation provides an overview of the ML Wrappers SDK, which is designed to provide a uniform format for wrapping datasets and models. 
 7 | 
 8 | Installation
 9 | ------------
10 | 
11 | The ML Wrappers SDK can be installed via pip:
12 | 
13 | .. code-block:: bash
14 | 
15 |     pip install ml-wrappers
16 | 
17 | Supported Models
18 | ----------------
19 | 
20 | The ML Wrappers SDK supports the following models:
21 | 
22 | - Scikit-Learn
23 | - LightGBM
24 | - XGBoost
25 | - Catboost
26 | - Keras with Tensorflow backend
27 | - Pytorch
28 | - ONNX (planned for future support)
29 | 
30 | For more details, please refer to the :ref:`supported_models` section.
31 | 
32 | Supported Frameworks
33 | --------------------
34 | 
35 | The ML Wrappers SDK supports the following frameworks:
36 | 
37 | - Scikit-Learn
38 | - LightGBM
39 | - XGBoost
40 | - Catboost
41 | - Keras with Tensorflow backend
42 | - Pytorch
43 | - ONNX (planned for future support)
44 | 
45 | For more details, please refer to the :ref:`supported_frameworks` section.
46 | 
47 | Model Wrapping
48 | --------------
49 | 
50 | The ML Wrappers SDK provides a way to wrap models into a uniform format. This is done by either using the predict_proba function, or, if it is not available, the predict function. For more details, please refer to the :ref:`model_wrapping` section.
51 | 
52 | Dataset Wrapping
53 | ----------------
54 | 
55 | The ML Wrappers SDK provides a way to wrap datasets into a uniform format. This is done using the DatasetWrapper class. For more details, please refer to the :ref:`dataset_wrapping` section.
56 | 
57 | License Information
58 | -------------------
59 | 
60 | The ML Wrappers SDK is licensed under the MIT License. For more details, please refer to the :ref:`license_information` section.
61 | 
62 | Support
63 | -------
64 | 
65 | Support for this project is limited to the resources listed in the :ref:`support` section.


--------------------------------------------------------------------------------
/python/docs/image_model_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _image_model_wrapping:
 2 | 
 3 | Image Model Wrapping
 4 | ====================
 5 | 
 6 | The ML-Wrappers SDK supports model wrapping for vision-based models. The wrapping process is handled by the ``wrap_model`` function, which takes in a model, data, and a model task as parameters. The model task can be one of the following: ``ModelTask.IMAGE_CLASSIFICATION``, ``ModelTask.MULTILABEL_IMAGE_CLASSIFICATION``, or ``ModelTask.OBJECT_DETECTION``.
 7 | 
 8 | The ``wrap_model`` function determines the type of the model and wraps it accordingly. For instance, if the model is a FastAI model, it is wrapped as a ``WrappedFastAIImageClassificationModel``. If the model is an AutoML model, it is wrapped as a ``WrappedMlflowAutomlImagesClassificationModel`` or a ``WrappedMlflowAutomlObjectDetectionModel`` depending on the model task. If the model is a callable pipeline, it is wrapped as a ``WrappedTransformerImageClassificationModel``.
 9 | 
10 | For object detection models, the ``wrap_model`` function can also take in an additional parameter, ``classes``, which is a list of class labels. The function returns the wrapped model and the model task.
11 | 
12 | The wrapped model can then be used for various tasks such as validation and prediction. For instance, the ``validate_wrapped_classification_model`` function can be used to validate a wrapped classification model.
13 | 
14 | The ML-Wrappers SDK also provides support for PyTorch models. The ``PytorchDRiseWrapper`` and ``WrappedObjectDetectionModel`` classes are used to wrap PyTorch models for object detection tasks.
15 | 
16 | .. note::
17 |    The ML-Wrappers SDK currently only supports PyTorch machine learning models for object detection tasks.
18 | 
19 | For more information on how to use the ML-Wrappers SDK for image model wrapping, refer to the `tests/main/test_image_model_wrapper.py` file in the repository.


--------------------------------------------------------------------------------
/python/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. ml-wrappers documentation master file, created by
 2 |    sphinx-quickstart on Tue Mar 22 13:30:40 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to ml-wrappers's documentation!
 7 | ========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    overview
14 |    getting_started
15 |    supported_models
16 |    supported_frameworks
17 |    model_wrapping
18 |    model_wrapper_specifications
19 |    dataset_wrapping
20 |    pytorch_model_wrapping
21 |    tensorflow_model_wrapping
22 |    image_model_wrapping
23 |    text_model_wrapping
24 |    object_detection_model_wrapping
25 |    api_reference
26 |    contributing
27 |    support
28 |    versioning
29 |    dependencies
30 |    license_information
31 |    code_of_conduct
32 |    privacy_policy
33 | 
34 | Indices and tables
35 | ==================
36 | 
37 | * :ref:`genindex`
38 | * :ref:`modindex`
39 | * :ref:`search`


--------------------------------------------------------------------------------
/python/docs/license_information.rst:
--------------------------------------------------------------------------------
 1 | .. _license_information:
 2 | 
 3 | License Information
 4 | ===================
 5 | 
 6 | The ml-wrappers project is licensed under the MIT License. The full license text is as follows:
 7 | 
 8 | ::
 9 | 
10 |     MIT License
11 | 
12 |     Copyright (c) Microsoft Corporation.
13 | 
14 |     Permission is hereby granted, free of charge, to any person obtaining a copy
15 |     of this software and associated documentation files (the "Software"), to deal
16 |     in the Software without restriction, including without limitation the rights
17 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 |     copies of the Software, and to permit persons to whom the Software is
19 |     furnished to do so, subject to the following conditions:
20 | 
21 |     The above copyright notice and this permission notice shall be included in all
22 |     copies or substantial portions of the Software.
23 | 
24 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 |     SOFTWARE.
31 | 
32 | For more information about the license, please refer to the `LICENSE.txt` file in the repository.


--------------------------------------------------------------------------------
/python/docs/model_wrapper_specifications.rst:
--------------------------------------------------------------------------------
 1 | .. _model_wrapper_specifications:
 2 | 
 3 | Model Wrapper Specifications
 4 | ============================
 5 | 
 6 | In the ML Wrappers SDK, there needs to be a clear understanding of the model type to have a solid contract for users and visualizations. This is particularly important for blackbox models such as those used in the `interpret-community` library. The user can pass in a function from a classifier or regressor, or a model that is a classifier or regressor. For model-specific explainers, the user would pass in the model directly. We can usually infer whether the model is a classifier or regressor in most cases.
 7 | 
 8 | Functions
 9 | ---------
10 | 
11 | We can evaluate the function on the data and look at the output to understand if the model is a classifier or regressor. In general, if the user passes a function that returns a 1D array, we can infer it is a regressor. If the function returns a 2D array, we can infer it is a classifier. There is a tricky case where the function may return a 2D array of 1 column. In this case, we can throw an exception and force the user to specify model_task=(infer, classifier, regressor), and not allow automatic inferencing. The user can override this behavior if they specify an optional parameter model_task=(infer, classifier, regressor), which will have the value model_task=infer by default.
12 | 
13 | Models
14 | ------
15 | 
16 | We can convert the model to a function and then use the specifications listed above. We convert the model to a function by either using the predict_proba function, or, if it is not available, the predict function. In some specific cases, we may be able to get additional information from the model to help us decide which function to use. Specifically, if we know that the model is a Keras model, the model will always have a predict_proba method available. In this case, we can look at the shape of predict_proba, and if it has multiple columns or is a single column with values outside the range of [0, 1], we can by default use predict instead. Otherwise, we can use predict_proba. If the user specified model_task=classifier, this will always override the behavior for Keras models and specify whether to use predict or predict_proba. Also, if the user specifies that model_task=classifier, but the model does not have a predict_proba function, we can wrap the function in a one-hot vector of probabilities. After the model is converted to a function that conforms to our specifications, we can wrap that in our model wrapper, which can contain a reference to the original model in cases where it may be needed or for debugging.
17 | 
18 | Supported Frameworks
19 | --------------------
20 | 
21 | Our library can directly support the most popular machine learning frameworks. In general, based on the description above, the library can support models and functions in scikit-learn. However, we can extend support to other frameworks with the model wrapper concept. Currently, the list of supported frameworks, or frameworks we plan to support, are:
22 | 
23 | - Scikit-Learn
24 | - LightGBM
25 | - XGBoost
26 | - Catboost
27 | - Keras with Tensorflow backend
28 | - Pytorch
29 | - ONNX
30 | 
31 | We would like to support caffe/caffe2 and other ML frameworks in the future as well.


--------------------------------------------------------------------------------
/python/docs/model_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _model_wrapping:
 2 | 
 3 | Model Wrapping
 4 | ==============
 5 | 
 6 | The ML Wrappers SDK provides a clear understanding of the model type to have a solid contract for users and visualizations. This is particularly important in the machine learning interpretability space for blackbox models. 
 7 | 
 8 | The user can pass in a function from a classifier or regressor, or a model that is a classifier or regressor. For model-specific explainers, the user would pass in the model directly. We can usually infer whether the model is a classifier or regressor in most cases.
 9 | 
10 | Functions
11 | ---------
12 | 
13 | We can evaluate the function on the data and look at the output to understand if the model is a classifier or regressor. In general, if the user passes a function that returns a 1D array, we can infer it is a regressor. If the function returns a 2D array, we can infer it is a classifier. There is a tricky case where the function may return a 2D array of 1 column. In this case, we can throw an exception and force the user to specify model_task=(infer, classifier, regressor), and not allow automatic inferencing. The user can override this behavior if they specify an optional parameter model_task=(infer, classifier, regressor), which will have the value model_task=infer by default.
14 | 
15 | Models
16 | ------
17 | 
18 | We can convert the model to a function and then use the specifications listed above. We convert the model to a function by either using the predict_proba function, or, if it is not available, the predict function. In some specific cases, we may be able to get additional information from the model to help us decide which function to use. Specifically, if we know that the model is a Keras model, the model will always have a predict_proba method available. In this case, we can look at the shape of predict_proba, and if it has multiple columns or is a single column with values outside the range of [0, 1], we can by default use predict instead. Otherwise, we can use predict_proba. If the user specified model_task=classifier, this will always override the behavior for Keras models and specify whether to use predict or predict_proba. Also, if the user specifies that model_task=classifier, but the model does not have a predict_proba function, we can wrap the function in a one-hot vector of probabilities. After the model is converted to a function that conforms to our specifications, we can wrap that in our model wrapper, which can contain a reference to the original model in cases where it may be needed or for debugging.
19 | 
20 | Supported Frameworks
21 | --------------------
22 | 
23 | Our library can directly support the most popular machine learning frameworks. In general, based on the description above, the library can support models and functions in scikit-learn. However, we can extend support to other frameworks with the model wrapper concept. Currently, the list of supported frameworks, or frameworks we plan to support, are:
24 | 
25 | - Scikit-Learn
26 | - LightGBM
27 | - XGBoost
28 | - Catboost
29 | - Keras with Tensorflow backend
30 | - Pytorch
31 | - ONNX (planned)
32 | 
33 | We would like to support caffe/caffe2 and other ML frameworks in the future as well.


--------------------------------------------------------------------------------
/python/docs/object_detection_model_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _object_detection_model_wrapping:
 2 | 
 3 | Object Detection Model Wrapping
 4 | ===============================
 5 | 
 6 | ML-Wrappers supports model wrapping of Pytorch object detection methods. The model is converted to a function by either using the predict_proba function, or, if it is not available, the predict function. 
 7 | 
 8 | Schema
 9 | ------
10 | For each image in the dataset, the model is used to generate predictions. Then, the predictions are filtered using non maximal suppression (based on the iuo threshold parameter). 
11 | 
12 | The predictions is a list of Pytorch tensors. Each tensor is composed of the labels, boxes (bounding boxes), scores. 
13 | 
14 | Example:
15 | 
16 | .. code-block:: python
17 | 
18 |     detections = [{'boxes': tensor([[ 97.0986, 170.7908, 241.4255, 516.5880]], grad_fn=<StackBackward0>), 'labels': tensor([2]), 'scores': tensor([0.9905], grad_fn=<IndexBackward0>)}]
19 | 
20 |     predict_output = [[[2.0, 97.09860229492188, 170.7908172607422, 241.425537109375, 516.5879516601562, 0.9904877543449402]]]
21 | 
22 | Limitations
23 | -----------
24 | This wrapper functionality only supports Pytorch machine learning models.
25 | 
26 | Model Wrapping
27 | --------------
28 | The model wrapping process involves the following steps:
29 | 
30 | 1. Processing the raw detections to generate bounding boxes, class scores, and objectness scores.
31 | 2. Applying non-maximal suppression and score filtering based on the iou threshold and score threshold parameters.
32 | 3. Creating a list of detection records from the image predictions.
33 | 
34 | Example:
35 | 
36 | .. code-block:: python
37 | 
38 |     class WrappedObjectDetectionModel:
39 |         """A class for wrapping a object detection model in the scikit-learn style."""
40 | 
41 |         def __init__(self, model: Any, number_of_classes: int, device=Device.AUTO.value) -> None:
42 |             """Initialize the WrappedObjectDetectionModel with the model and evaluation function."""
43 |             self._device = torch.device(_get_device(device))
44 |             model.eval()
45 |             model.to(self._device)
46 | 
47 |             self._model = model
48 |             self._number_of_classes = number_of_classes
49 | 
50 |         def predict(self, x, iou_threshold: float = 0.5, score_threshold: float = 0.5):
51 |             """Create a list of detection records from the image predictions."""
52 |             detections = []
53 |             for image in x:
54 |                 if type(image) == Tensor:
55 |                     raw_detections = self._model(image.to(self._device).unsqueeze(0))
56 |                 else:
57 |                     raw_detections = self._model(T.ToTensor()(image).to(self._device).unsqueeze(0))
58 | 
59 | Supported Frameworks
60 | --------------------
61 | The following machine learning frameworks are supported:
62 | 
63 | - Scikit-Learn
64 | - LightGBM
65 | - XGBoost
66 | - Catboost
67 | - Keras with Tensorflow backend
68 | - Pytorch
69 | 
70 | ONNX is not yet supported, but there are plans to support it in the future. Other ML frameworks like caffe/caffe2 are also planned to be supported in the future.


--------------------------------------------------------------------------------
/python/docs/overview.rst:
--------------------------------------------------------------------------------
 1 | .. _overview:
 2 | 
 3 | Overview
 4 | ========
 5 | 
 6 | The ml-wrappers project is a Python library that provides a unified interface for wrapping machine learning models and datasets. It is designed to make it easier to work with different types of models and datasets, and to facilitate the process of explaining and interpreting machine learning models.
 7 | 
 8 | The library includes support for a variety of machine learning frameworks, including Scikit-Learn, LightGBM, XGBoost, Catboost, Keras with Tensorflow backend, Pytorch, and ONNX. It also provides a mechanism for inferring whether a model is a classifier or regressor, and for wrapping models in a way that conforms to the specifications required by the library.
 9 | 
10 | The ml-wrappers library also provides a DatasetWrapper class that makes it easier to perform operations such as summarizing data, taking subsets of data, and sampling data. This class can handle a variety of data types, including numpy arrays, pandas DataFrames, pandas Series, scipy sparse matrices, and more.
11 | 
12 | In addition to wrapping models and datasets, the ml-wrappers library also provides a number of utilities for working with machine learning models. These include functions for evaluating models, generating augmented data, and more.
13 | 
14 | The library is released under the MIT License and adheres to the Microsoft Open Source Code of Conduct. It is maintained by Microsoft and contributions are welcome.


--------------------------------------------------------------------------------
/python/docs/privacy_policy.rst:
--------------------------------------------------------------------------------
 1 | .. _privacy_policy:
 2 | 
 3 | Privacy Policy
 4 | ==============
 5 | 
 6 | The ml-wrappers project does not collect any personal data. As an open-source project, it is designed to be used locally on your machine. Any data used for model training or prediction remains on your local machine and is not sent or shared with any external entities.
 7 | 
 8 | However, please note that if you choose to contribute to the project by submitting pull requests or issues, your username and any information you include in these submissions will be publicly visible. We recommend that you do not include any sensitive personal information in these submissions.
 9 | 
10 | For more information about Microsoft's privacy policies, please visit the `Microsoft Privacy Statement <https://privacy.microsoft.com/en-us/privacystatement>`_.
11 | 
12 | If you have any questions or concerns about privacy in relation to the use of ml-wrappers, please contact us at opencode@microsoft.com.


--------------------------------------------------------------------------------
/python/docs/pytorch_model_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _pytorch_model_wrapping:
 2 | 
 3 | Pytorch Model Wrapping
 4 | =======================
 5 | 
 6 | The ML Wrappers library provides support for wrapping Pytorch models. This is achieved through the use of model wrappers and utilities specifically designed for Pytorch models.
 7 | 
 8 | .. code-block:: python
 9 | 
10 |     import logging
11 |     import numpy as np
12 |     import pandas as pd
13 | 
14 |     module_logger = logging.getLogger(__name__)
15 |     module_logger.setLevel(logging.INFO)
16 | 
17 |     try:
18 |         import torch
19 |     except ImportError:
20 |         module_logger.debug('Could not import torch, required if using a PyTorch model')
21 | 
22 |     try:
23 |         from torchvision.transforms import ToTensor
24 |     except ImportError:
25 |         module_logger.debug('Could not import torchvision, required if using' +
26 |                             ' a vision PyTorch model')
27 | 
28 | The library attempts to import the necessary Pytorch and torchvision modules. If these imports fail, a debug message is logged indicating that these modules are required when using a Pytorch model.
29 | 
30 | The library provides a WrappedPytorchModel class for wrapping Pytorch models. This class is used in the wrap_model function to wrap the model if it is a Pytorch model.
31 | 
32 | .. code-block:: python
33 | 
34 |     class WrappedPytorchModel(object):
35 |         def __init__(self, model):
36 |             self._model = model
37 | 
38 |         def predict(self, dataset):
39 |             return self._model(dataset)
40 | 
41 |         def predict_proba(self, dataset):
42 |             return self._model(dataset)
43 | 
44 | The WrappedPytorchModel class provides a predict and predict_proba method, which call the model's predict method on the given dataset.
45 | 
46 | The library also provides a PytorchModelInitializer class for initializing Pytorch models. This class is used in the wrapped_pytorch_model_initializer function to initialize the model.
47 | 
48 | .. code-block:: python
49 | 
50 |     class PytorchModelInitializer():
51 |         def __init__(self, model_initializer, model_task):
52 |             self._model_initializer = model_initializer
53 |             self._model_task = model_task
54 | 
55 |         def __call__(self, X_train, y_train):
56 |             fitted_model = self._model_initializer(X_train, y_train)
57 |             wrapped_pytorch_model = WrappedPytorchModel(fitted_model)
58 |             validate_wrapped_pytorch_model(wrapped_pytorch_model, X_train,
59 |                                            self._model_task)
60 |             return wrapped_pytorch_model
61 | 
62 | The PytorchModelInitializer class provides a __call__ method, which initializes the model and wraps it using the WrappedPytorchModel class.
63 | 
64 | .. note::
65 | 
66 |     The ML Wrappers library only supports Pytorch machine learning models.


--------------------------------------------------------------------------------
/python/docs/support.rst:
--------------------------------------------------------------------------------
 1 | .. _support:
 2 | 
 3 | Support
 4 | =======
 5 | 
 6 | How to file issues and get help
 7 | -------------------------------
 8 | 
 9 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new issue.
10 | 
11 | Microsoft Support Policy
12 | ------------------------
13 | 
14 | Support for this project, ml-wrappers, is limited to the resources listed above.
15 | 
16 | Feature request
17 | ---------------
18 | 
19 | If you have a feature request related to this project, you can suggest an idea through the GitHub Issues. Please provide a clear and concise description of what the problem is and the solution you'd like. Also, describe any alternative solutions or features you've considered. You can add any other context or screenshots about the feature request.
20 | 
21 | Bug report
22 | ----------
23 | 
24 | If you encounter a bug, you can create a report to help us improve. Please provide a clear and concise description of what the bug is. Include steps to reproduce the behavior and what you expected to happen. If applicable, add screenshots to help explain your problem. Also, provide information about your desktop or smartphone, including the OS, browser, and version. Add any other context about the problem.
25 | 
26 | Microsoft Open Source Code of Conduct
27 | -------------------------------------
28 | 
29 | This project has adopted the `Microsoft Open Source Code of Conduct <https://opensource.microsoft.com/codeofconduct/>`_. For more information, see the `Microsoft Code of Conduct FAQ <https://opensource.microsoft.com/codeofconduct/faq/>`_ or contact `opencode@microsoft.com <mailto:opencode@microsoft.com>`_ with any additional questions or comments.
30 | 
31 | Security
32 | --------
33 | 
34 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations. If you believe you have found a security vulnerability in any Microsoft-owned repository, please report it to us as described in the `Microsoft's definition of a security vulnerability <https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)>`_. Please do not report security vulnerabilities through public GitHub issues. Instead, report them to the Microsoft Security Response Center (MSRC) at `https://msrc.microsoft.com/create-report <https://msrc.microsoft.com/create-report>`_.


--------------------------------------------------------------------------------
/python/docs/supported_frameworks.rst:
--------------------------------------------------------------------------------
 1 | .. _supported_frameworks:
 2 | 
 3 | Supported Frameworks
 4 | ====================
 5 | 
 6 | The ml-wrappers library supports a variety of machine learning frameworks. The following frameworks are directly supported or can be supported with the model wrapper concept:
 7 | 
 8 | - **Scikit-Learn**: This framework is directly supported by our APIs.
 9 | 
10 | - **LightGBM**: The functions of this framework can be wrapped into a scikit-learn compatible wrapper.
11 | 
12 | - **XGBoost**: The functions of this framework can be wrapped into a scikit-learn compatible wrapper.
13 | 
14 | - **Catboost**: The functions of this framework can be wrapped into a scikit-learn compatible wrapper.
15 | 
16 | - **Keras with Tensorflow backend**: Keras has both a predict_proba and predict function on all models, so it is difficult to know for sure if the model is a classifier or regressor. We can force the user to specify whether the model is a classifier or regressor in case only a single column is output, and then wrap the model in a model wrapper. If the user specifies the model is a regressor we can fix the structure to be 2D.
17 | 
18 | - **Pytorch**: Pytorch does not have a predict or predict_proba function, but the model can be called on the dataset directly to get probabilities. The probabilities can then be transformed into predicted labels for classifiers. Similarly to Keras, we can force the user to specify whether the model is a classifier or regressor in case only a single column is output, and then wrap the model in a model wrapper. If the user specifies the model is a regressor we can fix the structure to be 2D.
19 | 
20 | - **ONNX**: ONNX is not yet supported, but we plan to support it in the future. We can use a model wrapper to conform to the predict and predict_proba specifications the SDK requires.
21 | 
22 | We would like to support caffe/caffe2 and other ML frameworks in the future as well. Contributions to this repository are welcome.


--------------------------------------------------------------------------------
/python/docs/supported_models.rst:
--------------------------------------------------------------------------------
 1 | .. _supported_models:
 2 | 
 3 | Supported Models
 4 | ================
 5 | 
 6 | The ML-Wrappers library supports a variety of machine learning models. The following sections provide an overview of the supported models.
 7 | 
 8 | Scikit-Learn
 9 | ------------
10 | 
11 | Scikit-Learn models are directly supported by our APIs. 
12 | 
13 | LightGBM
14 | --------
15 | 
16 | LightGBM models can be wrapped into a scikit-learn compatible wrapper.
17 | 
18 | XGBoost
19 | -------
20 | 
21 | XGBoost models can be wrapped into a scikit-learn compatible wrapper.
22 | 
23 | Catboost
24 | --------
25 | 
26 | Catboost models can be wrapped into a scikit-learn compatible wrapper.
27 | 
28 | Keras with Tensorflow backend
29 | -----------------------------
30 | 
31 | Keras models have both a predict_proba and predict function on all models, so it is difficult to know for sure if the model is a classifier or regressor. We can force the user to specify whether the model is a classifier or regressor in case only a single column is output, and then wrap the model in a model wrapper. If the user specifies the model is a regressor we can fix the structure to be 2D.
32 | 
33 | Pytorch
34 | -------
35 | 
36 | Pytorch does not have a predict or predict_proba function, but the model can be called on the dataset directly to get probabilities. The probabilities can then be transformed into predicted labels for classifiers. Similarly to Keras, we can force the user to specify whether the model is a classifier or regressor in case only a single column is output, and then wrap the model in a model wrapper. If the user specifies the model is a regressor we can fix the structure to be 2D.
37 | 
38 | ONNX
39 | ----
40 | 
41 | ONNX is not yet supported, but we plan to support it in the future. We can use a model wrapper to conform to the predict and predict_proba specifications the SDK requires.
42 | 
43 | Future Support
44 | --------------
45 | 
46 | We would like to support caffe/caffe2 and other ML frameworks in the future as well. Contributions to this repository are welcome.


--------------------------------------------------------------------------------
/python/docs/tensorflow_model_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _tensorflow_model_wrapping:
 2 | 
 3 | Tensorflow Model Wrapping
 4 | =========================
 5 | 
 6 | The ML Wrappers library provides support for wrapping Tensorflow models to conform to the required specifications for model explanations. This is achieved through the ``WrappedTensorflowModel`` class and the ``is_sequential`` function.
 7 | 
 8 | WrappedTensorflowModel
 9 | ----------------------
10 | 
11 | The ``WrappedTensorflowModel`` class is used to wrap a Tensorflow model. This class is initialized with the model to be wrapped. It provides the ``predict`` method for making predictions using the wrapped Tensorflow model.
12 | 
13 | .. code-block:: python
14 | 
15 |     class WrappedTensorflowModel(object):
16 |         def __init__(self, model):
17 |             self._model = model
18 | 
19 |         def predict(self, dataset):
20 |             if isinstance(dataset, pd.DataFrame):
21 |                 dataset = dataset.values
22 |             return self._model.predict(dataset)
23 | 
24 | is_sequential
25 | -------------
26 | 
27 | The ``is_sequential`` function checks if a given model is a sequential model. It returns True if the model is a sequential model and False otherwise.
28 | 
29 | .. code-block:: python
30 | 
31 |     def is_sequential(model):
32 |         return str(type(model)).endswith("keras.engine.sequential.Sequential'>")
33 | 
34 | Tensorflow Model Initializer
35 | ----------------------------
36 | 
37 | The Tensorflow Model Initializer is a class that initializes a Tensorflow model and wraps it using the ``WrappedTensorflowModel`` class. It also validates the wrapped Tensorflow model.
38 | 
39 | .. code-block:: python
40 | 
41 |     class TensorflowModelInitializer():
42 |         def __init__(self, model_initializer, model_task):
43 |             self._model_initializer = model_initializer
44 |             self._model_task = model_task
45 | 
46 |         def __call__(self, X_train, y_train):
47 |             fitted_model = self._model_initializer(X_train, y_train)
48 |             wrapped_tf_model = WrappedTensorflowModel(fitted_model)
49 |             validate_wrapped_tf_model(wrapped_tf_model, X_train, self._model_task)
50 |             return wrapped_tf_model
51 | 
52 | The ``wrapped_tensorflow_model_initializer`` function returns an instance of the TensorflowModelInitializer class.
53 | 
54 | .. code-block:: python
55 | 
56 |     def wrapped_tensorflow_model_initializer(model_initializer, model_task):
57 |         return TensorflowModelInitializer(model_initializer, model_task)
58 | 
59 | Supported Frameworks
60 | --------------------
61 | 
62 | The ML Wrappers library supports a variety of machine learning frameworks. For Tensorflow models, the library can wrap the model in a model wrapper if the user specifies whether the model is a classifier or regressor in case only a single column is output. If the user specifies the model is a regressor, the structure can be fixed to be 2D.
63 | 
64 | .. note::
65 | 
66 |     The library can directly support the most popular machine learning frameworks. However, support can be extended to other frameworks with the model wrapper concept.


--------------------------------------------------------------------------------
/python/docs/text_model_wrapping.rst:
--------------------------------------------------------------------------------
 1 | .. _text_model_wrapping:
 2 | 
 3 | Text Model Wrapping
 4 | ===================
 5 | 
 6 | The ml-wrappers library provides support for wrapping text-based models. This includes both classification and question-answering models. 
 7 | 
 8 | WrappedQuestionAnsweringModel
 9 | -----------------------------
10 | 
11 | The WrappedQuestionAnsweringModel class is used for wrapping a Transformers model in the scikit-learn style. 
12 | 
13 | .. code-block:: python
14 | 
15 |     class WrappedQuestionAnsweringModel(object):
16 |         """A class for wrapping a Transformers model in the scikit-learn style."""
17 | 
18 |         def __init__(self, model):
19 |             """Initialize the WrappedQuestionAnsweringModel."""
20 |             self._model = model
21 | 
22 |         def predict(self, dataset):
23 |             """Predict the output using the wrapped Transformers model.
24 | 
25 |             :param dataset: The dataset to predict on.
26 |             :type dataset: ml_wrappers.DatasetWrapper
27 |             """
28 |             output = []
29 |             for context, question in zip(dataset['context'], dataset['questions']):
30 |                 answer = self._model({'context': context, 'question': question})
31 |                 output.append(answer['answer'])
32 |             return output
33 | 
34 | WrappedTextClassificationModel
35 | ------------------------------
36 | 
37 | The WrappedTextClassificationModel class is used for wrapping a Transformers model in the scikit-learn style. 
38 | 
39 | .. code-block:: python
40 | 
41 |     class WrappedTextClassificationModel(object):
42 |         """A class for wrapping a Transformers model in the scikit-learn style."""
43 | 
44 |         def __init__(self, model, multilabel=False):
45 |             """Initialize the WrappedTextClassificationModel."""
46 |             self._model = model
47 |             if not shap_installed:
48 |                 raise ImportError("SHAP is not installed. Please install it " +
49 |                                   "to use WrappedTextClassificationModel.")
50 |             self._wrapped_model = models.TransformersPipeline(model)
51 |             self._multilabel = multilabel
52 | 
53 |         def predict(self, dataset):
54 |             """Predict the output using the wrapped Transformers model.
55 | 
56 |             :param dataset: The dataset to predict on.
57 |             :type dataset: ml_wrappers.DatasetWrapper
58 |             """
59 |             pipeline_dicts = self._wrapped_model.inner_model(dataset)
60 |             output = []
61 |             for val in pipeline_dicts:
62 |                 if not isinstance(val, list):
63 |                     val = [val]
64 |                 scores = [obj["score"] for obj in val]
65 |                 if self._multilabel:
66 |                     threshold = MULTILABEL_THRESHOLD
67 |                     labels = np.where(np.array(scores) > threshold)
68 |                     predictions = np.zeros(len(scores))
69 |                     predictions[labels] = 1
70 |                     output.append(predictions)
71 |                 else:
72 |                     max_score_index = np.argmax(scores)
73 |                     output.append(max_score_index)
74 |             return np.array(output)
75 | 
76 |         def predict_proba(self, dataset):
77 |             """Predict the output probability using the Transformers model.
78 | 
79 |             :param dataset: The dataset to predict_proba on.
80 |             :type dataset: ml_wrappers.DatasetWrapper
81 |             """
82 |             return self._wrapped_model(dataset)
83 | 
84 | The wrap_model function is used to wrap the model. It takes as input the model, the data, and the model task (in this case, text classification or question answering). The function returns a wrapped model that can be used for further processing or evaluation. 
85 | 
86 | .. code-block:: python
87 | 
88 |     from ml_wrappers import wrap_model
89 |     from ml_wrappers.common.constants import ModelTask
90 | 
91 |     wrapped_model = wrap_model(model, data, ModelTask.TEXT_CLASSIFICATION)
92 | 
93 | For more information on how to use these classes and functions, please refer to the source code and the provided examples.


--------------------------------------------------------------------------------
/python/docs/versioning.rst:
--------------------------------------------------------------------------------
 1 | .. _versioning:
 2 | 
 3 | Versioning
 4 | ==========
 5 | 
 6 | The version of the ml-wrappers package is defined in the ``version.py`` file located in the ``python/ml_wrappers`` directory. The version is specified using three variables: ``_major``, ``_minor``, and ``_patch``. 
 7 | 
 8 | .. code-block:: python
 9 | 
10 |     name = 'ml_wrappers'
11 |     _major = '0'
12 |     _minor = '4'
13 |     _patch = '12'
14 |     version = '{}.{}.{}'.format(_major, _minor, _patch)
15 | 
16 | The version follows the format of ``major.minor.patch``. 
17 | 
18 | - ``major``: This is incremented for major changes or redesigns in the package.
19 | - ``minor``: This is incremented for minor changes or additions of new features.
20 | - ``patch``: This is incremented for bug fixes or minor improvements.
21 | 
22 | When ready to release a new version, create a separate PR in ml-wrappers to bump up the version in the ``version.py`` file. In the notes, make sure to mention all of the changes that have been introduced since the last release. 
23 | 
24 | .. code-block:: python
25 | 
26 |     _major = '0'
27 |     _minor = <enter new minor version here>
28 |     _patch = <enter new patch version here>
29 | 
30 | After the PR has been merged, checkout the master branch and get the latest code. For more details on the release process, refer to the `Release Process <release-process.html>`_ section.


--------------------------------------------------------------------------------
/python/ml_wrappers/__init__.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Module for wrapping datasets and models in one uniform format.
 6 | """
 7 | from .dataset import DatasetWrapper
 8 | from .model import wrap_model
 9 | from .version import name, version
10 | 
11 | __all__ = ['DatasetWrapper', 'wrap_model']
12 | 
13 | import atexit
14 | # Setup logging infrustructure
15 | import logging
16 | import os
17 | 
18 | # Only log to disk if environment variable specified
19 | ml_wrappers_c_logs = os.environ.get('ML_WRAPPERS_C_LOGS')
20 | if ml_wrappers_c_logs is not None:
21 |     logger = logging.getLogger(__name__)
22 |     logger.setLevel(logging.INFO)
23 |     os.makedirs(os.path.dirname(ml_wrappers_c_logs), exist_ok=True)
24 |     handler = logging.FileHandler(ml_wrappers_c_logs, mode='w')
25 |     handler.setLevel(logging.INFO)
26 |     logger.addHandler(handler)
27 |     logger.info('Initializing logging file for ml-wrappers')
28 | 
29 |     def close_handler():
30 |         handler.close()
31 |         logger.removeHandler(handler)
32 |     atexit.register(close_handler)
33 | 
34 | __name__ = name
35 | __version__ = version
36 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/common/__init__.py:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # ---------------------------------------------------------
4 | 
5 | """Defines a common directory shared across ML model and dataset wrappers."""
6 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/common/gpu_kmeans.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021, NVIDIA CORPORATION.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | 
 16 | """
 17 | The code is based on the similar utility function from SHAP:
 18 | https://github.com/slundberg/shap/blob/9411b68e8057a6c6f3621765b89b24d82bee13d4/shap/utils/_legacy.py
 19 | This version makes use of cuml kmeans instead of sklearn for speed.
 20 | """
 21 | 
 22 | import numpy as np
 23 | 
 24 | try:
 25 |     import cuml
 26 |     from cuml import KMeans
 27 |     from cuml.preprocessing import SimpleImputer
 28 |     rapids_installed = True
 29 | except BaseException:  # noqa: B036
 30 |     rapids_installed = False
 31 | from scipy.sparse import issparse
 32 | 
 33 | 
 34 | def kmeans(X, k, round_values=True):
 35 |     """ Summarize a dataset with k mean samples weighted by the number of data points they
 36 |     each represent.
 37 |     Parameters
 38 |     ----------
 39 |     X : numpy.ndarray or pandas.DataFrame or any scipy.sparse matrix
 40 |         Matrix of data samples to summarize (# samples x # features)
 41 |     k : int
 42 |         Number of means to use for approximation.
 43 |     round_values : bool
 44 |         For all i, round the ith dimension of each mean sample to match the nearest value
 45 |         from X[:,i]. This ensures discrete features always get a valid value.
 46 |     Returns
 47 |     -------
 48 |     DenseData object.
 49 |     """
 50 | 
 51 |     if not rapids_installed:
 52 |         raise RuntimeError(
 53 |             "cuML is required to use GPU explainers. Check https://rapids.ai/start.html \
 54 |             for more information on how to install it.")
 55 |     if cuml.__version__ >= '21.08':
 56 |         from cuml.explainer.sampling import kmeans_sampling
 57 |         summary, group_names, labels = kmeans_sampling(X, k, round_values, detailed=True)
 58 | 
 59 |         return DenseData(summary,
 60 |                          group_names,
 61 |                          None,
 62 |                          1.0 * np.bincount(labels))
 63 |     # For backward compatibility
 64 |     group_names = [str(i) for i in range(X.shape[1])]
 65 |     if str(type(X)).endswith("'pandas.core.frame.DataFrame'>"):
 66 |         group_names = X.columns
 67 |         X = X.values
 68 | 
 69 |     # in case there are any missing values in data impute them
 70 |     imp = SimpleImputer(missing_values=np.nan, strategy='mean')
 71 |     X = imp.fit_transform(X)
 72 | 
 73 |     kmeans = KMeans(n_clusters=k, random_state=0).fit(X)
 74 | 
 75 |     if round_values:
 76 |         for i in range(k):
 77 |             for j in range(X.shape[1]):
 78 |                 xj = X[:, j].toarray().flatten() if issparse(
 79 |                     X) else X[:, j]  # sparse support courtesy of @PrimozGodec
 80 |                 ind = np.argmin(np.abs(xj - kmeans.cluster_centers_[i, j]))
 81 |                 kmeans.cluster_centers_[i, j] = X[ind, j]
 82 |     return DenseData(
 83 |         kmeans.cluster_centers_,
 84 |         group_names,
 85 |         None,
 86 |         1.0 * np.bincount(kmeans.labels_)
 87 |     )
 88 | 
 89 | 
 90 | class Data:
 91 |     def __init__(self):
 92 |         pass
 93 | 
 94 | 
 95 | class DenseData(Data):
 96 |     def __init__(self, data, group_names, *args):
 97 |         self.groups = args[0] if len(args) > 0 and args[0] is not None else [
 98 |             np.array([i]) for i in range(len(group_names))]
 99 | 
100 |         length = sum(len(g) for g in self.groups)
101 |         num_samples = data.shape[0]
102 |         t = False
103 |         if length != data.shape[1]:
104 |             t = True
105 |             num_samples = data.shape[1]
106 | 
107 |         valid = (
108 |             not t and length == data.shape[1]) or (
109 |             t and length == data.shape[0])
110 |         assert valid, "# of names must match data matrix!"
111 | 
112 |         self.weights = args[1] if len(args) > 1 else np.ones(num_samples)
113 |         self.weights /= np.sum(self.weights)
114 |         wl = len(self.weights)
115 |         valid = (not t and wl == data.shape[0]) or (t and wl == data.shape[1])
116 |         assert valid, "# weights must match data matrix!"
117 | 
118 |         self.transposed = t
119 |         self.group_names = group_names
120 |         self.data = data
121 |         self.groups_size = len(self.groups)
122 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/common/warnings_suppressor.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Suppresses warnings on imports."""
 6 | 
 7 | import os
 8 | import warnings
 9 | 
10 | TF_CPP_MIN_LOG_LEVEL = 'TF_CPP_MIN_LOG_LEVEL'
11 | 
12 | 
13 | class tf_warnings_suppressor(object):
14 |     """Context manager to suppress warnings from tensorflow."""
15 | 
16 |     def __init__(self):
17 |         """Initialize the tf_warnings_suppressor."""
18 |         self._entered = False
19 |         if TF_CPP_MIN_LOG_LEVEL in os.environ:
20 |             self._default_tf_log_level = os.environ[TF_CPP_MIN_LOG_LEVEL]
21 |         else:
22 |             self._default_tf_log_level = '0'
23 | 
24 |     def __enter__(self):
25 |         """Begins suppressing tensorflow warnings."""
26 |         if self._entered:
27 |             raise RuntimeError("Cannot enter %r twice" % self)
28 |         self._entered = True
29 |         os.environ[TF_CPP_MIN_LOG_LEVEL] = '2'
30 | 
31 |     def __exit__(self, *exc_info):
32 |         """Finishes suppressing tensorflow warnings."""
33 |         if not self._entered:
34 |             raise RuntimeError("Cannot exit %r without entering first" % self)
35 |         os.environ[TF_CPP_MIN_LOG_LEVEL] = self._default_tf_log_level
36 | 
37 | 
38 | class shap_warnings_suppressor(object):
39 |     """Context manager to suppress warnings from shap."""
40 | 
41 |     def __init__(self):
42 |         """Initialize the shap_warnings_suppressor."""
43 |         self._catch_warnings = warnings.catch_warnings()
44 |         self._tf_warnings_suppressor = tf_warnings_suppressor()
45 |         self._entered = False
46 |         if TF_CPP_MIN_LOG_LEVEL in os.environ:
47 |             self._default_tf_log_level = os.environ[TF_CPP_MIN_LOG_LEVEL]
48 |         else:
49 |             self._default_tf_log_level = '0'
50 | 
51 |     def __enter__(self):
52 |         """Begins suppressing shap warnings."""
53 |         if self._entered:
54 |             raise RuntimeError("Cannot enter %r twice" % self)
55 |         self._entered = True
56 |         self._tf_warnings_suppressor.__enter__()
57 |         log = self._catch_warnings.__enter__()
58 |         warnings.filterwarnings('ignore', 'Starting from version 2.2.1', UserWarning)
59 |         return log
60 | 
61 |     def __exit__(self, *exc_info):
62 |         """Finishes suppressing shap warnings."""
63 |         if not self._entered:
64 |             raise RuntimeError("Cannot exit %r without entering first" % self)
65 |         self._tf_warnings_suppressor.__exit__()
66 |         self._catch_warnings.__exit__()
67 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines a common dataset wrapper and common functions for data manipulation."""
 6 | 
 7 | from .dataset_wrapper import DatasetWrapper
 8 | from .timestamp_featurizer import CustomTimestampFeaturizer
 9 | 
10 | __all__ = ['CustomTimestampFeaturizer', 'DatasetWrapper']
11 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/dataset/dataset_utils.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Defines helpful utilities for the DatasetWrapper."""
  6 | 
  7 | import logging
  8 | 
  9 | import numpy as np
 10 | from scipy.sparse import csr_matrix, issparse
 11 | from scipy.sparse import vstack as sparse_vstack
 12 | from sklearn.utils import shuffle
 13 | from sklearn.utils.sparsefuncs import csc_median_axis_0
 14 | 
 15 | from ..common.gpu_kmeans import kmeans
 16 | from ..common.warnings_suppressor import shap_warnings_suppressor
 17 | 
 18 | with shap_warnings_suppressor():
 19 |     try:
 20 |         import shap
 21 |         shap_installed = True
 22 |     except BaseException:  # noqa: B036
 23 |         shap_installed = False
 24 | 
 25 | module_logger = logging.getLogger(__name__)
 26 | module_logger.setLevel(logging.INFO)
 27 | 
 28 | 
 29 | def _generate_augmented_data(x, max_num_of_augmentations=np.inf):
 30 |     """Augment x by appending x with itself shuffled columnwise many times.
 31 | 
 32 |     :param x: data that has to be augmented, array or sparse matrix of 2 dimensions
 33 |     :type x: numpy.ndarray or scipy.sparse.csr_matrix
 34 |     :param max_augment_data_size: number of times we stack permuted x to augment.
 35 |     :type max_augment_data_size: int
 36 |     :return: augmented data with roughly number of rows that are equal to number of columns
 37 |     :rtype: numpy.ndarray or scipy.sparse.csr_matrix
 38 |     """
 39 |     x_augmented = x
 40 |     vstack = sparse_vstack if issparse(x) else np.vstack
 41 |     for i in range(min(x.shape[1] // x.shape[0] - 1, max_num_of_augmentations)):
 42 |         x_permuted = shuffle(x.T, random_state=i).T
 43 |         x_augmented = vstack([x_augmented, x_permuted])
 44 | 
 45 |     return x_augmented
 46 | 
 47 | 
 48 | def _summarize_data(X, k=10, use_gpu=False, to_round_values=True):
 49 |     """Summarize a dataset.
 50 | 
 51 |     For dense dataset, use k mean samples weighted by the number of data points they
 52 |     each represent.
 53 |     For sparse dataset, use a sparse row for the background with calculated
 54 |     median for dense columns.
 55 | 
 56 |     :param X: Matrix of data samples to summarize (# samples x # features).
 57 |     :type X: numpy.ndarray or pandas.DataFrame or scipy.sparse.csr_matrix
 58 |     :param k: Number of cluster centroids to use for approximation.
 59 |     :type k: int
 60 |     :param to_round_values: When using kmeans, for each element of every cluster centroid to match the nearest value
 61 |         from X in the corresponding dimension. This ensures discrete features
 62 |         always get a valid value.  Ignored for sparse data sample.
 63 |     :type to_round_values: bool
 64 |     :return: summarized numpy array or csr_matrix object.
 65 |     :rtype: numpy.ndarray or scipy.sparse.csr_matrix or DenseData
 66 |     """
 67 |     is_sparse = issparse(X)
 68 |     if not str(type(X)).endswith(".DenseData'>"):
 69 |         if is_sparse:
 70 |             module_logger.debug('Creating sparse data summary as csr matrix')
 71 |             # calculate median of sparse background data
 72 |             median_dense = csc_median_axis_0(X.tocsc())
 73 |             return csr_matrix(median_dense)
 74 |         elif len(X) > 10 * k:
 75 |             module_logger.debug('Create dense data summary with k-means')
 76 |             # use kmeans to summarize the examples for initialization
 77 |             # if there are more than 10 x k of them
 78 |             if use_gpu:
 79 |                 return kmeans(X, k, to_round_values)
 80 |             else:
 81 |                 if not shap_installed:
 82 |                     raise RuntimeError('shap is required to compute dataset summary in DatasetWrapper')
 83 |                 return shap.kmeans(X, k, to_round_values)
 84 |     return X
 85 | 
 86 | 
 87 | def _convert_batch_dataset_to_numpy(batch_dataset):
 88 |     """Convert a TensorFlow batch dataset to a numpy array.
 89 | 
 90 |     :param batch_dataset: batch dataset to convert
 91 |     :type batch_dataset: BatchDataset
 92 |     :return: data, feature names and batch size
 93 |     :rtype: numpy.ndarray, list, int
 94 |     """
 95 |     batches = []
 96 |     set_keys = False
 97 |     features = []
 98 |     batch_size = 0
 99 |     for data, _ in batch_dataset:
100 |         columns = []
101 |         for column in data.values():
102 |             columns.append(np.array(column))
103 |         if not set_keys:
104 |             for key in data.keys():
105 |                 features.append(key)
106 |             batch_size = columns[0].shape[0]
107 |             set_keys = True
108 |         batches.append(np.stack(columns, axis=1))
109 |     converted_data = np.vstack(batches)
110 |     return converted_data, features, batch_size
111 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/dataset/timestamp_featurizer.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines a custom timestamp featurizer for converting timestamp columns to numeric."""
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | from pandas.api.types import is_datetime64_any_dtype as is_datetime
10 | from scipy.sparse import issparse
11 | from sklearn.base import BaseEstimator, TransformerMixin
12 | 
13 | 
14 | class CustomTimestampFeaturizer(BaseEstimator, TransformerMixin):
15 |     """An estimator for featurizing timestamp columns to numeric data.
16 | 
17 |     :param features: Optional feature column names.
18 |     :type features: list[str]
19 |     :param return_pandas: Whether to return the transformed dataset as a pandas DataFrame.
20 |     :type return_pandas: bool
21 |     :param modify_in_place: Whether to modify the original dataset in place.
22 |     :type modify_in_place: bool
23 |     """
24 | 
25 |     def __init__(self, features=None, return_pandas=False, modify_in_place=False):
26 |         """Initialize the CustomTimestampFeaturizer.
27 | 
28 |         :param features: Optional feature column names.
29 |         :type features: list[str]
30 |         :param return_pandas: Whether to return the transformed dataset as a pandas DataFrame.
31 |         :type return_pandas: bool
32 |         :param modify_in_place: Whether to modify the original dataset in place.
33 |         :type modify_in_place: bool
34 |         """
35 |         self.features = features
36 |         self.return_pandas = return_pandas
37 |         self.modify_in_place = modify_in_place
38 |         self._time_col_names = []
39 |         return
40 | 
41 |     def fit(self, X, y=None):
42 |         """Fits the CustomTimestampFeaturizer.
43 | 
44 |         :param X: The dataset containing timestamp columns to featurize.
45 |         :type X: numpy.ndarray or pandas.DataFrame or scipy.sparse.csr_matrix
46 |         :param y: The target values.
47 |         :type y: Optional target values (None for unsupervised transformations).
48 |         """
49 |         # If the data was previously successfully summarized, then there are no
50 |         # timestamp columns as it must be numeric.
51 |         # Also, if the dataset is sparse, we can assume there are no timestamps
52 |         if str(type(X)).endswith(".DenseData'>") or issparse(X):
53 |             return self
54 |         tmp_dataset = X
55 |         # If numpy array, temporarily convert to pandas for easier and uniform timestamp handling
56 |         if isinstance(X, np.ndarray):
57 |             tmp_dataset = pd.DataFrame(X, columns=self.features)
58 |         self._time_col_names = [column for column in tmp_dataset.columns if is_datetime(tmp_dataset[column])]
59 |         # Calculate the min date for each column
60 |         self._min = []
61 |         for time_col_name in self._time_col_names:
62 |             self._min.append(tmp_dataset[time_col_name].map(lambda x: x.timestamp()).min())
63 |         return self
64 | 
65 |     def transform(self, X):
66 |         """Transforms the timestamp columns to numeric type in the given dataset.
67 | 
68 |         Specifically, extracts the year, month, day, hour, minute, second and time
69 |         since min timestamp in the training dataset.
70 | 
71 |         :param X: The dataset containing timestamp columns to featurize.
72 |         :type X: numpy.ndarray or pandas.DataFrame or scipy.sparse.csr_matrix
73 |         :return: The transformed dataset.
74 |         :rtype: numpy.ndarray or scipy.sparse.csr_matrix
75 |         """
76 |         tmp_dataset = X
77 |         if len(self._time_col_names) > 0:
78 |             # Temporarily convert to pandas for easier and uniform timestamp handling
79 |             if isinstance(X, np.ndarray):
80 |                 tmp_dataset = pd.DataFrame(X, columns=self.features)
81 |             elif not self.modify_in_place:
82 |                 # If originally pandas, make a copy to avoid changing the original dataset
83 |                 tmp_dataset = X.copy()
84 |             # Get the year, day, month, hour, minute, second
85 |             for idx, time_col_name in enumerate(self._time_col_names):
86 |                 tmp_dataset[time_col_name + '_year'] = tmp_dataset[time_col_name].map(lambda x: x.year)
87 |                 tmp_dataset[time_col_name + '_month'] = tmp_dataset[time_col_name].map(lambda x: x.month)
88 |                 tmp_dataset[time_col_name + '_day'] = tmp_dataset[time_col_name].map(lambda x: x.day)
89 |                 tmp_dataset[time_col_name + '_hour'] = tmp_dataset[time_col_name].map(lambda x: x.hour)
90 |                 tmp_dataset[time_col_name + '_minute'] = tmp_dataset[time_col_name].map(lambda x: x.minute)
91 |                 tmp_dataset[time_col_name + '_second'] = tmp_dataset[time_col_name].map(lambda x: x.second)
92 |                 # Replace column itself with difference from min value, leave as same name
93 |                 # to keep index so order of other columns remains the same for other transformations
94 |                 tmp_dataset[time_col_name] = tmp_dataset[time_col_name].map(lambda x: x.timestamp() - self._min[idx])
95 |             if not self.return_pandas:
96 |                 tmp_dataset = tmp_dataset.values
97 |         return tmp_dataset
98 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Common infrastructure, class hierarchy and utilities for model explanations."""
 6 | 
 7 | from .endpoint_wrapper import EndpointWrapperModel
 8 | from .model_wrapper import _wrap_model, wrap_model
 9 | from .openai_wrapper import OpenaiWrapperModel
10 | from .pytorch_wrapper import WrappedPytorchModel
11 | from .tensorflow_wrapper import WrappedTensorflowModel, is_sequential
12 | from .wrapped_classification_model import WrappedClassificationModel
13 | from .wrapped_regression_model import WrappedRegressionModel
14 | 
15 | __all__ = ['EndpointWrapperModel', 'OpenaiWrapperModel',
16 |            'WrappedClassificationModel', 'WrappedPytorchModel',
17 |            'WrappedRegressionModel', 'WrappedTensorflowModel',
18 |            '_wrap_model', 'is_sequential', 'wrap_model']
19 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/base_wrapped_model.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines a base class for wrapping models."""
 6 | 
 7 | from ml_wrappers.model.evaluator import _eval_model
 8 | 
 9 | 
10 | class BaseWrappedModel(object):
11 |     """A base class for WrappedClassificationModel and WrappedRegressionModel."""
12 | 
13 |     def __init__(self, model, eval_function, examples, model_task):
14 |         """Initialize the WrappedClassificationModel with the model and evaluation function."""
15 |         self._eval_function = eval_function
16 |         self._model = model
17 |         self._examples = examples
18 |         self._model_task = model_task
19 | 
20 |     def __getstate__(self):
21 |         """Influence how BaseWrappedModel is pickled.
22 | 
23 |         Removes _eval_function which may not be serializable.
24 | 
25 |         :return state: The state to be pickled, with _eval_function removed.
26 |         :rtype: dict
27 |         """
28 |         odict = self.__dict__.copy()
29 |         if self._examples is not None:
30 |             del odict['_eval_function']
31 |         return odict
32 | 
33 |     def __setstate__(self, state):
34 |         """Influence how BaseWrappedModel is unpickled.
35 | 
36 |         Re-adds _eval_function which may not be serializable.
37 | 
38 |         :param dict: A dictionary of deserialized state.
39 |         :type dict: dict
40 |         """
41 |         self.__dict__.update(state)
42 |         if self._examples is not None:
43 |             eval_function, _ = _eval_model(self._model, self._examples, self._model_task)
44 |             self._eval_function = eval_function
45 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/evaluator.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | import pandas as pd
  6 | 
  7 | from ..common.constants import ModelTask, SKLearn
  8 | from .function_wrapper import (_convert_to_two_cols, _FunctionWrapper,
  9 |                                _MultiVsSingleInstanceFunctionResolver)
 10 | from .pytorch_wrapper import WrappedPytorchModel
 11 | from .tensorflow_wrapper import WrappedTensorflowModel, is_sequential
 12 | 
 13 | 
 14 | def _is_classification_task(task):
 15 |     """Return True if the task is a classification task.
 16 | 
 17 |     :param task: The task to check.
 18 |     :type task: str
 19 |     :return: True if the task is a classification task.
 20 |     :rtype: bool
 21 |     """
 22 |     return task == ModelTask.CLASSIFICATION or task == ModelTask.IMAGE_CLASSIFICATION
 23 | 
 24 | 
 25 | def _eval_model(model, examples, model_task):
 26 |     """Return function from model and specify the ML Domain using model evaluation on examples.
 27 | 
 28 |     :param model: The model to evaluate on the examples.
 29 |     :type model: model with a predict or predict_proba function
 30 |     :param examples: The model evaluation examples.
 31 |     :type examples: ml_wrappers.DatasetWrapper
 32 |     :param model_task: Optional parameter to specify whether the model is a classification or regression model.
 33 |         In most cases, the type of the model can be inferred based on the shape of the output, where a classifier
 34 |         has a predict_proba method and outputs a 2 dimensional array, while a regressor has a predict method and
 35 |         outputs a 1 dimensional array.
 36 |     :type model_task: str
 37 |     :return: The function chosen from given model and chosen domain.
 38 |     :rtype: (function, str)
 39 |     """
 40 |     is_tf_seq = is_sequential(model)
 41 |     is_wrapped_pytorch = isinstance(model, WrappedPytorchModel)
 42 |     is_wrapped_tf = isinstance(model, WrappedTensorflowModel)
 43 |     if is_tf_seq or is_wrapped_pytorch or is_wrapped_tf:
 44 |         if model_task == ModelTask.REGRESSION:
 45 |             return _eval_function(model.predict, examples, ModelTask.REGRESSION)
 46 |         if model_task == ModelTask.IMAGE_CLASSIFICATION:
 47 |             examples_dataset = examples.dataset
 48 |             if isinstance(examples_dataset, pd.DataFrame):
 49 |                 return _eval_function(model.predict_proba, examples,
 50 |                                       model_task, wrapped=True)
 51 |             is_pytorch_image_model = True
 52 |             wrapper = _FunctionWrapper(model.predict_proba,
 53 |                                        len(examples_dataset[0].shape),
 54 |                                        is_pytorch_image_model)
 55 |             return _eval_function(wrapper._function_input_expand_wrapper,
 56 |                                   examples, model_task, wrapped=True)
 57 |         result = model.predict_proba(examples.typed_wrapper_func(examples.dataset[0:1]))
 58 |         if result.shape[1] == 1 and model_task == ModelTask.UNKNOWN:
 59 |             raise Exception("Please specify model_task to disambiguate model type since "
 60 |                             "result of calling function is 2D array of one column.")
 61 |         else:
 62 |             return _eval_function(model.predict_proba, examples, ModelTask.CLASSIFICATION)
 63 |     else:
 64 |         has_predict_proba = hasattr(model, SKLearn.PREDICT_PROBA)
 65 |         # Note: Allow user to override default to use predict method for regressor
 66 |         if has_predict_proba and model_task != ModelTask.REGRESSION:
 67 |             return _eval_function(model.predict_proba, examples, model_task)
 68 |         else:
 69 |             return _eval_function(model.predict, examples, model_task)
 70 | 
 71 | 
 72 | def _eval_function(function, examples, model_task, wrapped=False):
 73 |     """Return function and specify the ML Domain using function evaluation on examples.
 74 | 
 75 |     :param function: The prediction function to evaluate on the examples.
 76 |     :type function: function
 77 |     :param examples: The model evaluation examples.
 78 |     :type examples: ml_wrappers.DatasetWrapper
 79 |     :param model_task: Optional parameter to specify whether the model is a classification or regression model.
 80 |         In most cases, the type of the model can be inferred based on the shape of the output, where a classifier
 81 |         has a predict_proba method and outputs a 2 dimensional array, while a regressor has a predict method and
 82 |         outputs a 1 dimensional array.
 83 |     :type model_task: str
 84 |     :param wrapped: Indicates if function has already been wrapped.
 85 |     :type wrapped: bool
 86 |     :return: The function chosen from given model and chosen domain.
 87 |     :rtype: (function, str)
 88 |     """
 89 |     # Try to run the function on a single example - if it doesn't work wrap
 90 |     # it in a function that converts a 1D array to 2D for those functions
 91 |     # that only support 2D arrays as input
 92 |     examples_dataset = examples.dataset
 93 |     if str(type(examples_dataset)).endswith(".DenseData'>"):
 94 |         examples_dataset = examples_dataset.data
 95 |     try:
 96 |         inst_result = function(examples.typed_wrapper_func(examples_dataset[0]))
 97 |         if inst_result is None:
 98 |             raise Exception("Wrapped function returned None in model wrapper when called on dataset")
 99 |         multi_inst_result = function(examples.typed_wrapper_func(examples_dataset[0:1]))
100 |         if multi_inst_result.shape != inst_result.shape:
101 |             if len(multi_inst_result.shape) == len(inst_result.shape) + 1:
102 |                 resolver = _MultiVsSingleInstanceFunctionResolver(function)
103 |                 return _eval_function(resolver._add_single_predict_dimension, examples, model_task)
104 |             else:
105 |                 raise Exception("Wrapped function dimensions for single and multi predict unresolvable")
106 |     except Exception as ex:
107 |         # If function has already been wrapped, re-throw error to prevent stack overflow
108 |         if wrapped:
109 |             raise ex
110 |         wrapper = _FunctionWrapper(function, len(examples_dataset[0].shape))
111 |         return _eval_function(wrapper._function_input_expand_wrapper, examples, model_task, wrapped=True)
112 |     if len(inst_result.shape) == 2:
113 |         # If the result of evaluation the function is a 2D array of 1 column,
114 |         # and they did not specify classifier or regressor, throw exception
115 |         # to force the user to disambiguate the results.
116 |         if inst_result.shape[1] == 1:
117 |             if model_task == ModelTask.UNKNOWN:
118 |                 if isinstance(inst_result, pd.DataFrame):
119 |                     return (function, ModelTask.REGRESSION)
120 |                 raise Exception("Please specify model_task to disambiguate model type since "
121 |                                 "result of calling function is 2D array of one column.")
122 |             elif _is_classification_task(model_task):
123 |                 return _convert_to_two_cols(function, examples_dataset)
124 |             else:
125 |                 # model_task == ModelTask.REGRESSION
126 |                 # In case user specified a regressor but we have a 2D output with one column,
127 |                 # we want to flatten the function to 1D
128 |                 wrapper = _FunctionWrapper(function)
129 |                 return (wrapper._function_flatten, model_task)
130 |         else:
131 |             if model_task == ModelTask.UNKNOWN or _is_classification_task(model_task):
132 |                 return (function, ModelTask.CLASSIFICATION)
133 |             else:
134 |                 raise Exception("Invalid shape for prediction: "
135 |                                 "Regression function cannot output 2D array with multiple columns")
136 |     elif len(inst_result.shape) == 1:
137 |         if model_task == ModelTask.UNKNOWN:
138 |             return (function, ModelTask.REGRESSION)
139 |         elif _is_classification_task(model_task):
140 |             return _convert_to_two_cols(function, examples_dataset)
141 |         return (function, model_task)
142 |     elif len(inst_result.shape) == 0:
143 |         # single value returned, flatten output array
144 |         wrapper = _FunctionWrapper(function)
145 |         return (wrapper._function_flatten, model_task)
146 |     raise Exception("Failed to wrap function, may require custom wrapper for input function or model")
147 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/fastai_wrapper.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Defines model wrappers and utilities for fastai tabular models."""
  6 | 
  7 | import numpy as np
  8 | 
  9 | FASTAI_TABULAR_MODEL_SUFFIX = "fastai.tabular.learner.TabularLearner'>"
 10 | 
 11 | 
 12 | def _is_fastai_tabular_model(model):
 13 |     """Check if the model is a fastai tabular model.
 14 | 
 15 |     :param model: The model to check.
 16 |     :type model: object
 17 |     :return: True if the model is a fastai model, False otherwise.
 18 |     :rtype: bool
 19 |     """
 20 |     return str(type(model)).endswith(FASTAI_TABULAR_MODEL_SUFFIX)
 21 | 
 22 | 
 23 | class WrappedFastAITabularModel(object):
 24 |     """A class for wrapping a FastAI tabular model in the scikit-learn style."""
 25 | 
 26 |     def __init__(self, model):
 27 |         """Initialize the WrappedFastAITabularModel.
 28 | 
 29 |         :param model: The model to wrap.
 30 |         :type model: fastai.learner.Learner
 31 |         """
 32 |         self._model = model
 33 |         dl = self._model.dls[0]
 34 |         self.cat_cols = dl.dataset.cat_names
 35 |         self.cont_cols = dl.dataset.cont_names
 36 | 
 37 |     def _fastai_predict(self, dataset, index, model=None):
 38 |         """Predict the output using the wrapped FastAI model.
 39 | 
 40 |         :param dataset: The dataset to predict on.
 41 |         :type dataset: ml_wrappers.DatasetWrapper
 42 |         :param index: The index into the predicted data.
 43 |             Index 1 is for the predicted class and index
 44 |             2 is for the predicted probability.
 45 |         :type index: int
 46 |         :param model: The model to use for prediction.
 47 |             If None, the wrapped model is used.
 48 |         :type model: fastai.learner.Learner
 49 |         :return: The predicted data.
 50 |         :rtype: numpy.ndarray
 51 |         """
 52 |         if model is None:
 53 |             model = self._model
 54 |         predictions = []
 55 |         for i in range(len(dataset)):
 56 |             row = dataset.iloc[i]
 57 |             # get only feature columns for prediction
 58 |             row = row[self.cat_cols + self.cont_cols]
 59 |             predictions.append(np.array(model.predict(row)[index]))
 60 |         predictions = np.array(predictions)
 61 |         if index == 1:
 62 |             is_boolean = predictions.dtype == bool
 63 |             if is_boolean:
 64 |                 predictions = predictions.astype(int)
 65 |         return predictions
 66 | 
 67 |     def _fastai_predict_without_callbacks(self, dataset, index):
 68 |         """Predict the output using the wrapped FastAI model without callbacks.
 69 | 
 70 |         :param dataset: The dataset to predict on.
 71 |         :type dataset: ml_wrappers.DatasetWrapper
 72 |         :param index: The index into the predicted data.
 73 |             Index 1 is for the predicted class and index
 74 |             2 is for the predicted probability.
 75 |         :type index: int
 76 |         :return: The predicted data.
 77 |         :rtype: numpy.ndarray
 78 |         """
 79 |         removed_cbs = []
 80 |         default_cbs = ['TrainEvalCallback', 'Recorder', 'CastToTensor']
 81 |         for cb in self._model.cbs:
 82 |             if cb.__class__.__name__ not in default_cbs:
 83 |                 removed_cbs.append(cb)
 84 |         with self._model.removed_cbs(removed_cbs) as model:
 85 |             return self._fastai_predict(dataset, index, model)
 86 | 
 87 |     def predict(self, dataset):
 88 |         """Predict the output value using the wrapped FastAI model.
 89 | 
 90 |         :param dataset: The dataset to predict on.
 91 |         :type dataset: ml_wrappers.DatasetWrapper
 92 |         :return: The predicted values.
 93 |         :rtype: numpy.ndarray
 94 |         """
 95 |         try:
 96 |             return self._fastai_predict(dataset, 1)
 97 |         except Exception:
 98 |             return self._fastai_predict_without_callbacks(dataset, 1)
 99 | 
100 |     def predict_proba(self, dataset):
101 |         """Predict the output probability using the FastAI model.
102 | 
103 |         :param dataset: The dataset to predict_proba on.
104 |         :type dataset: ml_wrappers.DatasetWrapper
105 |         :return: The predicted probabilities.
106 |         :rtype: numpy.ndarray
107 |         """
108 |         try:
109 |             return self._fastai_predict(dataset, 2)
110 |         except Exception:
111 |             return self._fastai_predict_without_callbacks(dataset, 2)
112 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/model_utils.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines common model utilities."""
 6 | 
 7 | from ml_wrappers.common.warnings_suppressor import shap_warnings_suppressor
 8 | 
 9 | with shap_warnings_suppressor():
10 |     try:
11 |         from shap.utils import safe_isinstance
12 |         shap_installed = True
13 |     except BaseException:  # noqa: B036
14 |         shap_installed = False
15 | 
16 | 
17 | MULTILABEL_THRESHOLD = 0.5
18 | 
19 | 
20 | def _is_transformers_pipeline(model):
21 |     """Checks if the model is a transformers pipeline.
22 | 
23 |     :param model: The model to check.
24 |     :type model: object
25 |     :return: True if the model is a transformers pipeline, False otherwise.
26 |     :rtype: bool
27 |     """
28 |     return shap_installed and safe_isinstance(
29 |         model, "transformers.pipelines.Pipeline")
30 | 
31 | 
32 | def _is_callable_pipeline(model):
33 |     """Checks if the model is a callable pipeline.
34 | 
35 |     Returns false if the model has a predict and predict_proba method.
36 | 
37 |     :param model: The model to check.
38 |     :type model: object
39 |     :return: True if the model is a callable pipeline, False otherwise.
40 |     :rtype: bool
41 |     """
42 |     has_predict = hasattr(model, 'predict')
43 |     has_predict_proba = hasattr(model, 'predict_proba')
44 |     return callable(model) and not has_predict and not has_predict_proba
45 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/model_wrapper.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Defines helpful model wrapper and utils for implicitly rewrapping the model
  6 |     to conform to explainer contracts."""
  7 | 
  8 | import logging
  9 | import warnings
 10 | from typing import Union
 11 | 
 12 | import numpy as np
 13 | from ml_wrappers.model.wrapped_classification_model import \
 14 |     WrappedClassificationModel
 15 | from ml_wrappers.model.wrapped_classification_without_proba_model import \
 16 |     WrappedClassificationWithoutProbaModel
 17 | from ml_wrappers.model.wrapped_regression_model import WrappedRegressionModel
 18 | from sklearn.linear_model import SGDClassifier
 19 | 
 20 | from ..common.constants import (Device, ModelTask, SKLearn, image_model_tasks,
 21 |                                 text_model_tasks)
 22 | from ..dataset.dataset_wrapper import DatasetWrapper
 23 | from .evaluator import _eval_function, _eval_model
 24 | from .fastai_wrapper import WrappedFastAITabularModel, _is_fastai_tabular_model
 25 | from .image_model_wrapper import _wrap_image_model
 26 | from .pytorch_wrapper import WrappedPytorchModel
 27 | from .tensorflow_wrapper import WrappedTensorflowModel, is_sequential
 28 | from .text_model_wrapper import _is_transformers_pipeline, _wrap_text_model
 29 | 
 30 | with warnings.catch_warnings():
 31 |     warnings.filterwarnings(
 32 |         'ignore', 'Starting from version 2.2.1', UserWarning)
 33 | 
 34 | 
 35 | module_logger = logging.getLogger(__name__)
 36 | module_logger.setLevel(logging.INFO)
 37 | 
 38 | 
 39 | try:
 40 |     import torch.nn as nn
 41 | except ImportError:
 42 |     module_logger.debug(
 43 |         'Could not import torch, required if using a PyTorch model')
 44 | 
 45 | 
 46 | def wrap_model(model, examples, model_task: str = ModelTask.UNKNOWN,
 47 |                num_classes: int = None, classes: Union[list, np.array] = None,
 48 |                device=Device.AUTO.value):
 49 |     """If needed, wraps the model in a common API based on model task and
 50 |         prediction function contract.
 51 | 
 52 |     :param model: The model to evaluate on the examples.
 53 |     :type model: model with a predict or predict_proba function.
 54 |     :param examples: The model evaluation examples.
 55 |         Note the examples will be wrapped in a DatasetWrapper, if not
 56 |         wrapped when input.
 57 |     :type examples: ml_wrappers.DatasetWrapper or numpy.ndarray
 58 |         or pandas.DataFrame or panads.Series or scipy.sparse.csr_matrix
 59 |         or shap.DenseData or torch.Tensor
 60 |     :param model_task: Optional parameter to specify whether the model
 61 |                        is a classification or regression model.
 62 |                        In most cases, the type of the model can be inferred
 63 |                        based on the shape of the output, where a classifier
 64 |                        has a predict_proba method and outputs a 2 dimensional
 65 |                        array, while a regressor has a predict method and
 66 |                        outputs a 1 dimensional array.
 67 |     :param classes: optional parameter specifying a list of class names
 68 |         the dataset
 69 |     :type classes: list or np.ndarray
 70 |     :param num_classes: optional parameter specifying the number of classes in
 71 |         the dataset
 72 |     :type num_classes: int
 73 |     :type model_task: str
 74 |     :param device: optional parameter specifying the device to move the model
 75 |         to. If not specified, then cpu is the default
 76 |     :type device: str, for instance: 'cpu', 'cuda'
 77 |     :return: The wrapper model.
 78 |     :rtype: model
 79 |     """
 80 |     if model_task == ModelTask.UNKNOWN and _is_transformers_pipeline(model):
 81 |         # TODO: can we also dynamically figure out the task if it was
 82 |         # originally unknown for text scenarios?
 83 |         raise ValueError("ModelTask must be specified for text-based models")
 84 |     if model_task in text_model_tasks:
 85 |         return _wrap_text_model(model, examples, model_task, False)[0]
 86 |     if model_task in image_model_tasks:
 87 |         return _wrap_image_model(model, examples, model_task,
 88 |                                  False, num_classes, classes,
 89 |                                  device)[0]
 90 |     return _wrap_model(model, examples, model_task, False)[0]
 91 | 
 92 | 
 93 | def _wrap_model(model, examples, model_task, is_function):
 94 |     """If needed, wraps the model or function in a common API based on model
 95 |         task and prediction function contract.
 96 | 
 97 |     :param model: The model or function to evaluate on the examples.
 98 |     :type model: function or model with a predict or predict_proba function
 99 |     :param examples: The model evaluation examples.
100 |         Note the examples will be wrapped in a DatasetWrapper, if not
101 |         wrapped when input.
102 |     :type examples: ml_wrappers.DatasetWrapper or numpy.ndarray
103 |         or pandas.DataFrame or panads.Series or scipy.sparse.csr_matrix
104 |         or shap.DenseData or torch.Tensor
105 |     :param model_task: Optional parameter to specify whether the model
106 |                        is a classification or regression model.
107 |                        In most cases, the type of the model can be inferred
108 |                        based on the shape of the output, where a classifier
109 |                        has a predict_proba method and outputs a 2 dimensional
110 |                        array, while a regressor has a predict method and
111 |                        outputs a 1 dimensional array.
112 |     :type model_task: str
113 |     :return: The function chosen from given model and chosen domain, or
114 |              model wrapping the function and chosen domain.
115 |     :rtype: (function, str) or (model, str)
116 |     """
117 |     if not isinstance(examples, DatasetWrapper):
118 |         examples = DatasetWrapper(examples)
119 |     if is_function:
120 |         return _eval_function(model, examples, model_task)
121 |     else:
122 |         try:
123 |             if isinstance(model, nn.Module):
124 |                 # Wrap the model in an extra layer that
125 |                 # converts the numpy array
126 | 
127 |                 # to pytorch Variable and adds predict and
128 |                 # predict_proba functions
129 |                 model = WrappedPytorchModel(model)
130 |         except (NameError, AttributeError):
131 |             module_logger.debug(
132 |                 'Could not import torch, required if using a pytorch model')
133 |         if _is_fastai_tabular_model(model):
134 |             model = WrappedFastAITabularModel(model)
135 |         if is_sequential(model):
136 |             model = WrappedTensorflowModel(model)
137 |         if _classifier_without_proba(model):
138 |             model = WrappedClassificationWithoutProbaModel(model)
139 |         eval_function, eval_ml_domain = _eval_model(
140 |             model, examples, model_task)
141 |         if eval_ml_domain == ModelTask.CLASSIFICATION:
142 |             return WrappedClassificationModel(model, eval_function, examples), \
143 |                 eval_ml_domain
144 |         else:
145 |             return WrappedRegressionModel(model, eval_function, examples), \
146 |                 eval_ml_domain
147 | 
148 | 
149 | def _classifier_without_proba(model):
150 |     """Returns True if the given model is a classifier without predict_proba,
151 |         eg SGDClassifier.
152 | 
153 |     :param model: The model to evaluate on the examples.
154 |     :type model: model with a predict or predict_proba function
155 |     :return: True if the given model is a classifier without predict_proba.
156 |     :rtype: bool
157 |     """
158 |     return isinstance(model, SGDClassifier) and not \
159 |         hasattr(model, SKLearn.PREDICT_PROBA)
160 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/pytorch_wrapper.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Defines model wrappers and utilities for pytorch models."""
  6 | 
  7 | import logging
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | 
 12 | module_logger = logging.getLogger(__name__)
 13 | module_logger.setLevel(logging.INFO)
 14 | 
 15 | 
 16 | try:
 17 |     import torch
 18 | except ImportError:
 19 |     module_logger.debug('Could not import torch, required if using a PyTorch model')
 20 | 
 21 | try:
 22 |     from torchvision.transforms import ToTensor
 23 | except ImportError:
 24 |     module_logger.debug('Could not import torchvision, required if using'
 25 |                         ' a vision PyTorch model')
 26 | 
 27 | 
 28 | class WrappedPytorchModel(object):
 29 |     """A class for wrapping a PyTorch model.
 30 | 
 31 |     Note at time of initialization, since we don't have
 32 |     access to the dataset, we can't infer if this is for
 33 |     classification or regression case.  Hence, we add
 34 |     the predict_classes method for classification, and keep
 35 |     predict for either outputting values in regression or
 36 |     probabilities in classification.
 37 |     """
 38 | 
 39 |     def __init__(self, model, image_to_tensor=False):
 40 |         """Initialize the WrappedPytorchModel with the model and evaluation function.
 41 | 
 42 |         :param model: The PyTorch model to wrap.
 43 |         :type model: torch.nn.Module
 44 |         :param image_to_tensor: Whether to convert the image to tensor.
 45 |         :type image_to_tensor: bool
 46 |         """
 47 |         self._model = model
 48 |         # Set eval automatically for user for batchnorm and dropout layers
 49 |         self._model.eval()
 50 |         self._image_to_tensor = image_to_tensor
 51 | 
 52 |     def _convert_to_tensor(self, dataset):
 53 |         """Convert the dataset to a pytorch tensor.
 54 | 
 55 |         For image datasets, we use ToTensor from torchvision,
 56 |         which moves channel to the first dimension and for
 57 |         2D images adds a third dimension.
 58 | 
 59 |         :param dataset: The dataset to convert.
 60 |         :type dataset: ml_wrappers.DatasetWrapper
 61 |         :return: The converted dataset.
 62 |         :rtype: torch.Tensor
 63 |         """
 64 |         # Convert the data to pytorch Variable
 65 |         if isinstance(dataset, pd.DataFrame):
 66 |             if self._image_to_tensor:
 67 |                 dataset = dataset.iloc[0]
 68 |             dataset = dataset.values
 69 |             # If represented as a list of arrays,
 70 |             # convert to a 3D array instead of array
 71 |             # of 2D arrays
 72 |             if len(dataset.shape) == 1:
 73 |                 if self._image_to_tensor and len(dataset[0].shape) == 2:
 74 |                     # add channel to end of image if 2D grayscale
 75 |                     for i in range(dataset.shape[0]):
 76 |                         dataset[i] = np.expand_dims(dataset[i], axis=2)
 77 |                 dataset = np.stack(dataset)
 78 |         # If not already tensor, convert
 79 |         if not isinstance(dataset, torch.Tensor):
 80 |             if self._image_to_tensor:
 81 |                 # For torchvision images, can only convert one
 82 |                 # image at a time
 83 |                 # Note pytorch wrapper expects extra dimension for rows
 84 |                 # to be expanded in evaluator for image case,
 85 |                 # otherwise this code won't work for a single
 86 |                 # image input to predict call
 87 |                 rows = []
 88 |                 for row in range(dataset.shape[0]):
 89 |                     instance = dataset[row]
 90 |                     if not isinstance(instance, torch.Tensor):
 91 |                         instance = ToTensor()(instance)
 92 |                     rows.append(instance)
 93 |                 dataset = torch.stack(rows)
 94 |             else:
 95 |                 dataset = torch.Tensor(dataset)
 96 |         return dataset
 97 | 
 98 |     def predict(self, dataset):
 99 |         """Predict the output using the wrapped PyTorch model.
100 | 
101 |         :param dataset: The dataset to predict on.
102 |         :type dataset: ml_wrappers.DatasetWrapper
103 |         :return: The prediction results.
104 |         :rtype: numpy.ndarray
105 |         """
106 |         wrapped_dataset = self._convert_to_tensor(dataset)
107 |         with torch.no_grad():
108 |             result = self._model(wrapped_dataset).numpy()
109 |         # Reshape to 2D if output is 1D and input has one row
110 |         if len(dataset.shape) == 1:
111 |             result = result.reshape(1, -1)
112 |         return result
113 | 
114 |     def predict_classes(self, dataset):
115 |         """Predict the class using the wrapped PyTorch model.
116 | 
117 |         :param dataset: The dataset to predict on.
118 |         :type dataset: ml_wrappers.DatasetWrapper
119 |         :return: The predicted classes.
120 |         :rtype: numpy.ndarray
121 |         """
122 |         wrapped_dataset = self._convert_to_tensor(dataset)
123 |         with torch.no_grad():
124 |             result = self._model(wrapped_dataset)
125 |         result_len = len(result.shape)
126 |         if result_len == 1 or (result_len > 1 and result.shape[1] == 1):
127 |             result = np.where(result.numpy() > 0.5, 1, 0)
128 |         else:
129 |             result = torch.max(result, 1)[1].numpy()
130 |         # Reshape to 2D if output is 1D and input has one row
131 |         if len(dataset.shape) == 1:
132 |             result = result.reshape(1, -1)
133 |         return result
134 | 
135 |     def predict_proba(self, dataset):
136 |         """Predict the output probability using the wrapped PyTorch model.
137 | 
138 |         :param dataset: The dataset to predict_proba on.
139 |         :type dataset: ml_wrappers.DatasetWrapper
140 |         :return: The predicted probabilities.
141 |         :rtype: numpy.ndarray
142 |         """
143 |         return self.predict(dataset)
144 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/tensorflow_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines model wrappers and utilities for tensorflow models."""
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | 
10 | PREDICT_CLASSES = 'predict_classes'
11 | 
12 | 
13 | def is_sequential(model):
14 |     """Returns True if the model is a sequential model.
15 | 
16 |     Note the model class name can be
17 |     keras.src.engine.sequential.Sequential,
18 |     keras.engine.sequential.Sequential or
19 |     tensorflow.python.keras.engine.sequential.Sequential
20 |     depending on the tensorflow version.
21 |     In the latest 2.13 version, the namespace changed from
22 |     keras.engine to keras.src.engine.
23 |     The check should include all of these cases.
24 | 
25 |     :param model: The model to check.
26 |     :type model: tf.keras.Model
27 |     :return: True if the model is a sequential model.
28 |     :rtype: bool
29 |     """
30 |     model_type = str(type(model))
31 |     old_sequential_ns = "keras.engine.sequential.Sequential'>"
32 |     # the sequential namespace changed in tensorflow 2.13
33 |     new_sequential_ns1 = "keras.src.engine.sequential.Sequential'>"
34 |     # it changed again in tensorflow 2.17
35 |     new_sequential_ns2 = "keras.src.models.sequential.Sequential'>"
36 |     return any([model_type.endswith(old_sequential_ns),
37 |                 model_type.endswith(new_sequential_ns1),
38 |                 model_type.endswith(new_sequential_ns2)])
39 | 
40 | 
41 | class WrappedTensorflowModel(object):
42 |     """A class for wrapping a TensorFlow model.
43 | 
44 |     Note at time of initialization, since we don't have
45 |     access to the dataset, we can't infer if this is for
46 |     classification or regression case.  Hence, we add
47 |     the predict_classes method for classification, and keep
48 |     predict for either outputting values in regression or
49 |     probabilities in classification.
50 |     """
51 | 
52 |     def __init__(self, model):
53 |         """Initialize the WrappedTensorflowModel with the model.
54 | 
55 |         :param model: The model to wrap.
56 |         :type model: tf.keras.Model
57 |         """
58 |         self._model = model
59 | 
60 |     def predict(self, dataset):
61 |         """Predict the output using the wrapped TensorFlow model.
62 | 
63 |         :param dataset: The dataset to predict on.
64 |         :type dataset: ml_wrappers.DatasetWrapper
65 |         :return: The prediction results.
66 |         :rtype: numpy.ndarray
67 |         """
68 |         # Convert the data to numpy
69 |         if isinstance(dataset, pd.DataFrame):
70 |             dataset = dataset.values
71 |         return self._model.predict(dataset)
72 | 
73 |     def predict_classes(self, dataset):
74 |         """Predict the class using the wrapped TensorFlow model.
75 | 
76 |         :param dataset: The dataset to predict on.
77 |         :type dataset: ml_wrappers.DatasetWrapper
78 |         :return: The predicted classes.
79 |         :rtype: numpy.ndarray
80 |         """
81 |         # Note predict_classes was removed for models after
82 |         # tensorflow version 2.6
83 |         if hasattr(self._model, PREDICT_CLASSES):
84 |             return self._model.predict_classes(dataset)
85 |         probabilities = self.predict_proba(dataset)
86 |         return np.argmax(probabilities, axis=1)
87 | 
88 |     def predict_proba(self, dataset):
89 |         """Predict the output probability using the wrapped TensorFlow model.
90 | 
91 |         :param dataset: The dataset to predict_proba on.
92 |         :type dataset: ml_wrappers.DatasetWrapper
93 |         :return: The predicted probabilities.
94 |         :rtype: numpy.ndarray
95 |         """
96 |         return self.predict(dataset)
97 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/text_model_wrapper.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Defines wrappers for text-based models."""
  6 | 
  7 | import numpy as np
  8 | from ml_wrappers.common.constants import ModelTask
  9 | from ml_wrappers.common.warnings_suppressor import shap_warnings_suppressor
 10 | from ml_wrappers.model.model_utils import (MULTILABEL_THRESHOLD,
 11 |                                            _is_transformers_pipeline)
 12 | 
 13 | with shap_warnings_suppressor():
 14 |     try:
 15 |         from shap import models
 16 |         shap_installed = True
 17 |     except BaseException:  # noqa: B036
 18 |         shap_installed = False
 19 | 
 20 | 
 21 | def _wrap_text_model(model, examples, model_task, is_function):
 22 |     """If needed, wraps the model or function in a common API.
 23 | 
 24 |     Wraps the model based on model task and prediction function contract.
 25 | 
 26 |     :param model: The model or function to evaluate on the examples.
 27 |     :type model: function or model with a predict or predict_proba function
 28 |     :param examples: The model evaluation examples.
 29 |         Note the examples will be wrapped in a DatasetWrapper, if not
 30 |         wrapped when input.
 31 |     :type examples: ml_wrappers.DatasetWrapper or numpy.ndarray
 32 |         or pandas.DataFrame or panads.Series or scipy.sparse.csr_matrix
 33 |         or shap.DenseData or torch.Tensor
 34 |     :param model_task: Parameter to specify whether the model is a
 35 |         'text_classification', 'sentiment_analysis', 'question_answering',
 36 |         'entailment' or 'summarizations' text model.
 37 |     :type model_task: str
 38 |     :return: The function chosen from given model and chosen domain, or
 39 |         model wrapping the function and chosen domain.
 40 |     :rtype: (function, str) or (model, str)
 41 |     """
 42 |     _wrapped_model = model
 43 |     if _is_transformers_pipeline(model):
 44 |         if model_task == ModelTask.TEXT_CLASSIFICATION:
 45 |             _wrapped_model = WrappedTextClassificationModel(model)
 46 |         elif model_task == ModelTask.QUESTION_ANSWERING:
 47 |             _wrapped_model = WrappedQuestionAnsweringModel(model)
 48 |         elif model_task == ModelTask.MULTILABEL_TEXT_CLASSIFICATION:
 49 |             _wrapped_model = WrappedTextClassificationModel(model, multilabel=True)
 50 |     return _wrapped_model, model_task
 51 | 
 52 | 
 53 | class WrappedTextClassificationModel(object):
 54 |     """A class for wrapping a Transformers model in the scikit-learn style."""
 55 | 
 56 |     def __init__(self, model, multilabel=False):
 57 |         """Initialize the WrappedTextClassificationModel."""
 58 |         self._model = model
 59 |         if not shap_installed:
 60 |             raise ImportError("SHAP is not installed. Please install it "
 61 |                               "to use WrappedTextClassificationModel.")
 62 |         self._wrapped_model = models.TransformersPipeline(model)
 63 |         self._multilabel = multilabel
 64 | 
 65 |     def predict(self, dataset):
 66 |         """Predict the output using the wrapped Transformers model.
 67 | 
 68 |         :param dataset: The dataset to predict on.
 69 |         :type dataset: ml_wrappers.DatasetWrapper
 70 |         """
 71 |         pipeline_dicts = self._wrapped_model.inner_model(dataset)
 72 |         output = []
 73 |         for val in pipeline_dicts:
 74 |             if not isinstance(val, list):
 75 |                 val = [val]
 76 |             scores = [obj["score"] for obj in val]
 77 |             if self._multilabel:
 78 |                 threshold = MULTILABEL_THRESHOLD
 79 |                 # jagged, thresholded array of labels model predicted
 80 |                 labels = np.where(np.array(scores) > threshold)
 81 |                 predictions = np.zeros(len(scores))
 82 |                 # indicator matrix of labels since numpy does not
 83 |                 # support jagged arrays, which seems to be the format
 84 |                 # scikit-learn MultiOutputClassifier uses,
 85 |                 # see sklearn.multioutput.MultiOutputClassifier.predict
 86 |                 predictions[labels] = 1
 87 |                 output.append(predictions)
 88 |             else:
 89 |                 max_score_index = np.argmax(scores)
 90 |                 output.append(max_score_index)
 91 |         return np.array(output)
 92 | 
 93 |     def predict_proba(self, dataset):
 94 |         """Predict the output probability using the Transformers model.
 95 | 
 96 |         :param dataset: The dataset to predict_proba on.
 97 |         :type dataset: ml_wrappers.DatasetWrapper
 98 |         """
 99 |         return self._wrapped_model(dataset)
100 | 
101 | 
102 | class WrappedQuestionAnsweringModel(object):
103 |     """A class for wrapping a Transformers model in the scikit-learn style."""
104 | 
105 |     def __init__(self, model):
106 |         """Initialize the WrappedQuestionAnsweringModel."""
107 |         self._model = model
108 | 
109 |     def predict(self, dataset):
110 |         """Predict the output using the wrapped Transformers model.
111 | 
112 |         :param dataset: The dataset to predict on.
113 |         :type dataset: ml_wrappers.DatasetWrapper
114 |         """
115 |         output = []
116 |         for context, question in zip(dataset['context'], dataset['questions']):
117 |             answer = self._model({'context': context, 'question': question})
118 |             output.append(answer['answer'])
119 |         return np.array(output)
120 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/wrapped_classification_model.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines a class for wrapping classification models."""
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | from ml_wrappers.common.constants import ModelTask, SKLearn
10 | from ml_wrappers.model.base_wrapped_model import BaseWrappedModel
11 | from ml_wrappers.model.function_wrapper import _FunctionWrapper
12 | from ml_wrappers.model.pytorch_wrapper import WrappedPytorchModel
13 | from ml_wrappers.model.tensorflow_wrapper import (WrappedTensorflowModel,
14 |                                                   is_sequential)
15 | 
16 | 
17 | class WrappedClassificationModel(BaseWrappedModel):
18 |     """A class for wrapping a classification model."""
19 | 
20 |     def __init__(self, model, eval_function, examples=None):
21 |         """Initialize the WrappedClassificationModel with the model and evaluation function."""
22 |         super(WrappedClassificationModel, self).__init__(model, eval_function, examples, ModelTask.CLASSIFICATION)
23 | 
24 |     def predict(self, dataset):
25 |         """Predict the output using the wrapped classification model.
26 | 
27 |         :param dataset: The dataset to predict on.
28 |         :type dataset: ml_wrappers.DatasetWrapper
29 |         """
30 |         is_tf_seq = is_sequential(self._model)
31 |         is_wrapped_pytroch = isinstance(self._model, WrappedPytorchModel)
32 |         is_wrapped_tf = isinstance(self._model, WrappedTensorflowModel)
33 |         if is_tf_seq or is_wrapped_pytroch or is_wrapped_tf:
34 |             wrapped_predict_classes = self._wrap_function(self._model.predict_classes)
35 |             return wrapped_predict_classes(dataset).flatten()
36 |         wrapped_predict = self._wrap_function(self._model.predict)
37 |         preds = wrapped_predict(dataset)
38 |         if isinstance(preds, pd.DataFrame):
39 |             preds = preds.values.ravel()
40 |         # Handle possible case where the model has only a predict function and it outputs probabilities
41 |         # Note this is different from WrappedClassificationWithoutProbaModel where there is no predict_proba
42 |         # method but the predict method outputs classes
43 |         has_predict_proba = hasattr(self._model, SKLearn.PREDICT_PROBA)
44 |         if not has_predict_proba:
45 |             if len(preds.shape) == 1:
46 |                 return np.argmax(preds)
47 |             else:
48 |                 return np.argmax(preds, axis=1)
49 |         # Handle the case that the model predicts a two-dimensional array of one column
50 |         if len(preds.shape) == 2 and preds.shape[1] == 1:
51 |             preds = preds.ravel()
52 |         return np.array(preds)
53 | 
54 |     def predict_proba(self, dataset):
55 |         """Predict the output probability using the wrapped model.
56 | 
57 |         :param dataset: The dataset to predict_proba on.
58 |         :type dataset: ml_wrappers.DatasetWrapper
59 |         """
60 |         proba_preds = self._eval_function(dataset)
61 |         if isinstance(proba_preds, pd.DataFrame):
62 |             proba_preds = proba_preds.values
63 | 
64 |         return proba_preds
65 | 
66 |     def _wrap_function(self, function):
67 |         """Wrap a function to conform to the prediction input contracts.
68 | 
69 |         If model requires _function_input_expand_wrapper, re-wraps
70 |         the given function with _function_input_expand_wrapper.
71 | 
72 |         :param function: The function to wrap.
73 |         :type function: function
74 |         :return: The wrapped function.
75 |         :rtype: function
76 |         """
77 |         eval_function = self._eval_function
78 |         exp_wrapper = _FunctionWrapper._function_input_expand_wrapper
79 |         exp_wrapper_name = exp_wrapper.__name__
80 |         if eval_function.__name__ == exp_wrapper_name:
81 |             base_dims = eval_function.__self__._base_dims
82 |             function_wrapper = _FunctionWrapper(function, base_dims)
83 |             function = function_wrapper._function_input_expand_wrapper
84 |         return function
85 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/wrapped_classification_without_proba_model.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines a class for wrapping classifiers without predict_proba."""
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | class WrappedClassificationWithoutProbaModel(object):
11 |     """A class for wrapping a classifier without a predict_proba method.
12 | 
13 |     Note: the classifier may not output numeric values for its predictions.
14 |     We generate a trival boolean version of predict_proba
15 |     """
16 | 
17 |     def __init__(self, model):
18 |         """Initialize the WrappedClassificationWithoutProbaModel with the model."""
19 |         self._model = model
20 |         # Create a map from classes to index
21 |         self._classes_to_index = {}
22 |         for index, i in enumerate(self._model.classes_):
23 |             self._classes_to_index[i] = index
24 |         self._num_classes = len(self._model.classes_)
25 | 
26 |     def predict(self, dataset):
27 |         """Predict the output using the wrapped regression model.
28 | 
29 |         :param dataset: The dataset to predict on.
30 |         :type dataset: ml_wrappers.DatasetWrapper
31 |         """
32 |         return self._model.predict(dataset)
33 | 
34 |     def predict_proba(self, dataset):
35 |         """Predict the output probability using the wrapped model.
36 | 
37 |         :param dataset: The dataset to predict_proba on.
38 |         :type dataset: ml_wrappers.DatasetWrapper
39 |         """
40 |         predictions = self.predict(dataset)
41 |         # Generate trivial boolean array for predictions
42 |         probabilities = np.zeros((predictions.shape[0], self._num_classes))
43 |         for row_idx, pred_class in enumerate(predictions):
44 |             class_index = self._classes_to_index[pred_class]
45 |             probabilities[row_idx, class_index] = 1
46 |         return probabilities
47 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/model/wrapped_regression_model.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Defines a class for wrapping regression models."""
 6 | 
 7 | import pandas as pd
 8 | from ml_wrappers.common.constants import ModelTask
 9 | from ml_wrappers.model.base_wrapped_model import BaseWrappedModel
10 | 
11 | 
12 | class WrappedRegressionModel(BaseWrappedModel):
13 |     """A class for wrapping a regression model."""
14 | 
15 |     def __init__(self, model, eval_function, examples=None):
16 |         """Initialize the WrappedRegressionModel with the model and evaluation function."""
17 |         super(WrappedRegressionModel, self).__init__(
18 |             model, eval_function, examples, ModelTask.REGRESSION)
19 | 
20 |     def predict(self, dataset):
21 |         """Predict the output using the wrapped regression model.
22 | 
23 |         :param dataset: The dataset to predict on.
24 |         :type dataset: ml_wrappers.DatasetWrapper
25 |         """
26 |         preds = self._eval_function(dataset)
27 |         if isinstance(preds, pd.DataFrame):
28 |             preds = preds.values.ravel()
29 | 
30 |         return preds
31 | 


--------------------------------------------------------------------------------
/python/ml_wrappers/version.py:
--------------------------------------------------------------------------------
1 | name = 'ml_wrappers'
2 | _major = '0'
3 | _minor = '6'
4 | _patch = '0'
5 | version = '{}.{}.{}'.format(_major, _minor, _patch)
6 | 


--------------------------------------------------------------------------------
/python/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | license_file=LICENSE.txt
3 | 


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Setup file for ml-wrappers package."""
 6 | import os
 7 | import shutil
 8 | 
 9 | from setuptools import find_packages, setup
10 | 
11 | with open('ml_wrappers/version.py') as f:
12 |     code = compile(f.read(), f.name, 'exec')
13 |     exec(code)
14 | 
15 | README_FILE = 'README.md'
16 | LICENSE_FILE = 'LICENSE.txt'
17 | 
18 | # Note: used when generating the wheel but not on pip install of the package
19 | if os.path.exists('../LICENSE'):
20 |     shutil.copyfile('../LICENSE', LICENSE_FILE)
21 | 
22 | 
23 | CLASSIFIERS = [
24 |     'Development Status :: 5 - Production/Stable',
25 |     'Intended Audience :: Developers',
26 |     'Intended Audience :: Science/Research',
27 |     'License :: OSI Approved :: MIT License',
28 |     'Programming Language :: Python :: 3',
29 |     'Programming Language :: Python :: 3.9',
30 |     'Programming Language :: Python :: 3.10',
31 |     'Programming Language :: Python :: 3.11',
32 |     'Topic :: Scientific/Engineering :: Artificial Intelligence',
33 |     'Operating System :: Microsoft :: Windows',
34 |     'Operating System :: MacOS',
35 |     'Operating System :: POSIX :: Linux'
36 | ]
37 | 
38 | DEPENDENCIES = [
39 |     'numpy',
40 |     'packaging',
41 |     'pandas',
42 |     'scipy',
43 |     'scikit-learn'
44 | ]
45 | 
46 | with open(README_FILE, 'r', encoding='utf-8') as f:
47 |     README = f.read()
48 | 
49 | setup(
50 |     name=name,  # noqa: F821
51 |     version=version,  # noqa: F821
52 |     description='Machine Learning Wrappers SDK for Python',
53 |     long_description=README,
54 |     long_description_content_type='text/markdown',
55 |     author='Microsoft Corp',
56 |     author_email='ilmat@microsoft.com',
57 |     license='MIT License',
58 |     url='https://github.com/microsoft/ml-wrappers',
59 |     classifiers=CLASSIFIERS,
60 |     packages=find_packages(exclude=["*.tests"]),
61 |     install_requires=DEPENDENCIES,
62 |     zip_safe=False
63 | )
64 | 


--------------------------------------------------------------------------------
/requirements-automl.txt:
--------------------------------------------------------------------------------
1 | tensorflow
2 | azureml-automl-dnn-vision
3 | vision_explanation_methods


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | lightgbm
 2 | xgboost
 3 | catboost
 4 | tensorflow
 5 | # due to macos errors with old absl-py, remove with next absl-py release
 6 | protobuf<5.26.0; platform_system == 'Darwin'
 7 | shap
 8 | transformers<4.40.0
 9 | datasets
10 | raiutils
11 | fastai
12 | vision_explanation_methods
13 | mlflow
14 | joblib<1.3.0; python_version <= '3.7'
15 | scikeras
16 | openai; python_version >= '3.7'
17 | 


--------------------------------------------------------------------------------
/requirements-doc.txt:
--------------------------------------------------------------------------------
1 | sphinx==4.3.0
2 | pyyaml


--------------------------------------------------------------------------------
/requirements-linting.txt:
--------------------------------------------------------------------------------
 1 | flake8
 2 | flake8-bugbear
 3 | flake8-blind-except
 4 | flake8-breakpoint
 5 | flake8-builtins
 6 | flake8-logging-format
 7 | flake8-pytest-style
 8 | flake8-all-not-strings
 9 | isort
10 | 


--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | rai-test-utils==0.3.0
4 | 


--------------------------------------------------------------------------------
/tests/automl/test_automl_image_model_wrapper.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Tests for wrap_model function on vision-based models"""
  6 | 
  7 | import copy
  8 | import json
  9 | import os
 10 | import sys
 11 | import tempfile
 12 | 
 13 | import azureml.automl.core.shared.constants as shared_constants
 14 | import mlflow
 15 | import pytest
 16 | import torch
 17 | from azureml.automl.dnn.vision.classification.common.constants import \
 18 |     ModelNames
 19 | from azureml.automl.dnn.vision.classification.models import ModelFactory
 20 | from azureml.automl.dnn.vision.common.mlflow.mlflow_model_wrapper import \
 21 |     MLFlowImagesModelWrapper
 22 | from azureml.automl.dnn.vision.common.model_export_utils import (
 23 |     _get_mlflow_signature, _get_scoring_method)
 24 | from common_vision_utils import load_base64_images, load_fridge_dataset
 25 | from ml_wrappers import wrap_model
 26 | from ml_wrappers.common.constants import ModelTask
 27 | from wrapper_validator import validate_wrapped_classification_model
 28 | 
 29 | 
 30 | @pytest.mark.usefixtures('_clean_dir')
 31 | class TestImageModelWrapper(object):
 32 |     # Skip for older versions of python as azureml-automl-dnn-vision
 33 |     # works with 3.9 only
 34 |     @pytest.mark.skipif(
 35 |         sys.version_info < (3, 9),
 36 |         reason=('azureml-automl-dnn-vision not supported '
 37 |                 'for newer versions of python'))
 38 |     @pytest.mark.skipif(
 39 |         sys.version_info >= (3, 10),
 40 |         reason=('azureml-automl-dnn-vision not supported '
 41 |                 'for newer versions of python'))
 42 |     def test_wrap_automl_image_classification_model(self):
 43 |         data = load_fridge_dataset()
 44 |         model_name = ModelNames.SERESNEXT
 45 |         multilabel = False
 46 |         with tempfile.TemporaryDirectory() as tmp_output_dir:
 47 | 
 48 |             task_type = shared_constants.Tasks.IMAGE_CLASSIFICATION
 49 |             number_of_classes = 10
 50 |             model_wrapper = ModelFactory().get_model_wrapper(
 51 |                 model_name,
 52 |                 number_of_classes,
 53 |                 multilabel=multilabel,
 54 |                 device="cpu",
 55 |                 distributed=False,
 56 |                 local_rank=0)
 57 | 
 58 |             # mock for Mlflow model generation
 59 |             model_file = os.path.join(tmp_output_dir, "model.pt")
 60 |             torch.save({
 61 |                 'model_name': model_name,
 62 |                 'number_of_classes': number_of_classes,
 63 |                 'model_state': copy.deepcopy(model_wrapper.state_dict()),
 64 |                 'specs': {
 65 |                     'multilabel': model_wrapper.multilabel,
 66 |                     'model_settings': model_wrapper.model_settings,
 67 |                     'labels': model_wrapper.labels
 68 |                 },
 69 | 
 70 |             }, model_file)
 71 |             settings_file = os.path.join(
 72 |                 tmp_output_dir,
 73 |                 shared_constants.MLFlowLiterals.MODEL_SETTINGS_FILENAME)
 74 |             remote_path = os.path.join(tmp_output_dir, "outputs")
 75 | 
 76 |             with open(settings_file, 'w') as f:
 77 |                 json.dump({}, f)
 78 | 
 79 |             conda_env = {
 80 |                 'channels': ['conda-forge', 'pytorch'],
 81 |                 'dependencies': [
 82 |                     'python=3.9',
 83 |                     'numpy==1.26.4',
 84 |                     'pytorch==2.2.0',
 85 |                     'torchvision==0.17.2',
 86 |                     {'pip': ['azureml-automl-dnn-vision']}
 87 |                 ],
 88 |                 'name': 'azureml-automl-dnn-vision-env'
 89 |             }
 90 | 
 91 |             mlflow_model_wrapper = MLFlowImagesModelWrapper(
 92 |                 model_settings={},
 93 |                 task_type=task_type,
 94 |                 scoring_method=_get_scoring_method(task_type)
 95 |             )
 96 |             print("Saving mlflow model at {}".format(remote_path))
 97 |             mlflow.pyfunc.save_model(
 98 |                 path=remote_path,
 99 |                 python_model=mlflow_model_wrapper,
100 |                 artifacts={"model": model_file,
101 |                            "settings": settings_file},
102 |                 conda_env=conda_env,
103 |                 signature=_get_mlflow_signature(task_type))
104 |             mlflow_model = mlflow.pyfunc.load_model(remote_path)
105 | 
106 |             # load the paths as base64 images
107 |             data = load_base64_images(data)
108 |             wrapped_model = wrap_model(
109 |                 mlflow_model, data, ModelTask.IMAGE_CLASSIFICATION)
110 |             validate_wrapped_classification_model(wrapped_model, data)
111 | 


--------------------------------------------------------------------------------
/tests/common_text_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation
  2 | # Licensed under the MIT License.
  3 | 
  4 | import zipfile
  5 | 
  6 | import datasets
  7 | import pandas as pd
  8 | from raiutils.common.retries import retry_function
  9 | from transformers import (AutoModelForSequenceClassification, AutoTokenizer,
 10 |                           pipeline)
 11 | 
 12 | try:
 13 |     from urllib import urlretrieve
 14 | except ImportError:
 15 |     from urllib.request import urlretrieve
 16 | 
 17 | 
 18 | EMOTION_DATASET = 'SetFit/emotion'
 19 | EMOTION = 'emotion'
 20 | COVID19_EVENTS_MODEL_NAME = "covid19_events_model"
 21 | 
 22 | 
 23 | def load_emotion_dataset():
 24 |     dataset = datasets.load_dataset(EMOTION_DATASET, split="train")
 25 |     data = pd.DataFrame({'text': dataset['text'],
 26 |                          EMOTION: dataset['label']})
 27 |     return data
 28 | 
 29 | 
 30 | def load_squad_dataset():
 31 |     dataset = datasets.load_dataset("squad", split="train")
 32 |     answers = []
 33 |     for row in dataset['answers']:
 34 |         answers.append(row['text'][0])
 35 |     questions = []
 36 |     context = []
 37 |     for row in dataset:
 38 |         context.append(row['context'])
 39 |         questions.append(row['question'])
 40 |     data = pd.DataFrame({'context': context, 'questions': questions, 'answers': answers})
 41 |     return data
 42 | 
 43 | 
 44 | def load_covid19_emergency_event_dataset():
 45 |     dataset = datasets.load_dataset("joelito/covid19_emergency_event", split="train")
 46 |     dataset = pd.DataFrame({"language": dataset["language"],
 47 |                             "text": dataset["text"],
 48 |                             "event1": dataset["event1"],
 49 |                             "event2": dataset["event2"],
 50 |                             "event3": dataset["event3"],
 51 |                             "event4": dataset["event4"],
 52 |                             "event5": dataset["event5"],
 53 |                             "event6": dataset["event6"],
 54 |                             "event7": dataset["event7"],
 55 |                             "event8": dataset["event8"]})
 56 |     dataset = dataset[dataset.language == "en"].reset_index(drop=True)
 57 |     dataset = dataset.drop(columns="language")
 58 |     return dataset
 59 | 
 60 | 
 61 | def create_text_classification_pipeline():
 62 |     # load the model and tokenizer
 63 |     tokenizer = AutoTokenizer.from_pretrained(
 64 |         "nateraw/bert-base-uncased-emotion", use_fast=True)
 65 |     model = AutoModelForSequenceClassification.from_pretrained(
 66 |         "nateraw/bert-base-uncased-emotion")
 67 | 
 68 |     # build a pipeline object to do predictions
 69 |     pred = pipeline("text-classification", model=model,
 70 |                     tokenizer=tokenizer, device=-1,
 71 |                     return_all_scores=True)
 72 |     return pred
 73 | 
 74 | 
 75 | def create_question_answering_pipeline():
 76 |     return pipeline('question-answering')
 77 | 
 78 | 
 79 | class FetchCovid19Model(object):
 80 |     def __init__(self):
 81 |         pass
 82 | 
 83 |     def fetch(self):
 84 |         zipfilename = COVID19_EVENTS_MODEL_NAME + '.zip'
 85 |         url = ('https://publictestdatasets.blob.core.windows.net/models/' + COVID19_EVENTS_MODEL_NAME + '.zip')
 86 |         urlretrieve(url, zipfilename)
 87 |         with zipfile.ZipFile(zipfilename, 'r') as unzip:
 88 |             unzip.extractall(COVID19_EVENTS_MODEL_NAME)
 89 | 
 90 | 
 91 | def create_multilabel_text_pipeline():
 92 |     fetcher = FetchCovid19Model()
 93 |     action_name = "Model download"
 94 |     err_msg = "Failed to download model"
 95 |     max_retries = 4
 96 |     retry_delay = 60
 97 |     retry_function(fetcher.fetch, action_name, err_msg,
 98 |                    max_retries=max_retries,
 99 |                    retry_delay=retry_delay)
100 |     labels = ["event1", "event2", "event3", "event4", "event5", "event6", "event7", "event8"]
101 |     num_labels = len(labels)
102 |     id2label = {idx: label for idx, label in enumerate(labels)}
103 |     label2id = {label: idx for idx, label in enumerate(labels)}
104 |     model = AutoModelForSequenceClassification.from_pretrained(
105 |         COVID19_EVENTS_MODEL_NAME, num_labels=num_labels,
106 |         problem_type="multi_label_classification",
107 |         id2label=id2label,
108 |         label2id=label2id)
109 |     tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
110 |     device = -1
111 |     # build a pipeline object to do predictions
112 |     pred = pipeline(
113 |         "text-classification",
114 |         model=model,
115 |         tokenizer=tokenizer,
116 |         device=device,
117 |         return_all_scores=True
118 |     )
119 |     return pred
120 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | import logging
  6 | import os
  7 | import tempfile
  8 | 
  9 | import pytest
 10 | 
 11 | try:
 12 |     from common_utils import create_cancer_data_booleans
 13 | except ModuleNotFoundError:
 14 |     print("Could not import common_utils, may be running minimal tests")
 15 |     pass
 16 | 
 17 | from constants import DatasetConstants
 18 | from rai_test_utils.datasets.tabular import (
 19 |     create_cancer_data, create_complex_titanic_data, create_diabetes_data,
 20 |     create_energy_data, create_housing_data, create_iris_data,
 21 |     create_multiclass_classification_dataset, create_simple_titanic_data,
 22 |     create_wine_data)
 23 | 
 24 | test_logger = logging.getLogger(__name__)
 25 | test_logger.setLevel(logging.DEBUG)
 26 | 
 27 | 
 28 | @pytest.fixture
 29 | def _clean_dir():
 30 |     new_path = tempfile.mkdtemp()
 31 |     print("tmp test directory: " + new_path)
 32 |     os.chdir(new_path)
 33 | 
 34 | 
 35 | @pytest.fixture(scope='session')
 36 | def iris():
 37 |     x_train, x_test, y_train, y_test, features, classes = create_iris_data()
 38 |     return {
 39 |         DatasetConstants.X_TRAIN: x_train.values,
 40 |         DatasetConstants.X_TEST: x_test.values,
 41 |         DatasetConstants.Y_TRAIN: y_train,
 42 |         DatasetConstants.Y_TEST: y_test,
 43 |         DatasetConstants.FEATURES: features,
 44 |         DatasetConstants.CLASSES: classes
 45 |     }
 46 | 
 47 | 
 48 | @pytest.fixture(scope='session')
 49 | def cancer():
 50 |     x_train, x_test, y_train, y_test, features, classes = create_cancer_data()
 51 |     return {
 52 |         DatasetConstants.X_TRAIN: x_train,
 53 |         DatasetConstants.X_TEST: x_test,
 54 |         DatasetConstants.Y_TRAIN: y_train,
 55 |         DatasetConstants.Y_TEST: y_test,
 56 |         DatasetConstants.FEATURES: features,
 57 |         DatasetConstants.CLASSES: classes
 58 |     }
 59 | 
 60 | 
 61 | @pytest.fixture(scope='session')
 62 | def cancer_booleans():
 63 |     x_train, x_test, y_train, y_test, features, classes = create_cancer_data_booleans()
 64 |     return {
 65 |         DatasetConstants.X_TRAIN: x_train,
 66 |         DatasetConstants.X_TEST: x_test,
 67 |         DatasetConstants.Y_TRAIN: y_train,
 68 |         DatasetConstants.Y_TEST: y_test,
 69 |         DatasetConstants.FEATURES: features,
 70 |         DatasetConstants.CLASSES: classes
 71 |     }
 72 | 
 73 | 
 74 | @pytest.fixture(scope='session')
 75 | def titanic_simple():
 76 |     x_train, x_test, y_train, y_test, numeric, categorical = create_simple_titanic_data()
 77 |     return {
 78 |         DatasetConstants.X_TRAIN: x_train,
 79 |         DatasetConstants.X_TEST: x_test,
 80 |         DatasetConstants.Y_TRAIN: y_train,
 81 |         DatasetConstants.Y_TEST: y_test,
 82 |         DatasetConstants.NUMERIC: numeric,
 83 |         DatasetConstants.CATEGORICAL: categorical
 84 |     }
 85 | 
 86 | 
 87 | @pytest.fixture(scope='session')
 88 | def titanic_complex():
 89 |     x_train, x_test, y_train, y_test = create_complex_titanic_data()
 90 |     return {
 91 |         DatasetConstants.X_TRAIN: x_train,
 92 |         DatasetConstants.X_TEST: x_test,
 93 |         DatasetConstants.Y_TRAIN: y_train,
 94 |         DatasetConstants.Y_TEST: y_test
 95 |     }
 96 | 
 97 | 
 98 | @pytest.fixture(scope='session')
 99 | def wine():
100 |     x_train, x_test, y_train, y_test, features, classes = create_wine_data()
101 |     return {
102 |         DatasetConstants.X_TRAIN: x_train,
103 |         DatasetConstants.X_TEST: x_test,
104 |         DatasetConstants.Y_TRAIN: y_train,
105 |         DatasetConstants.Y_TEST: y_test,
106 |         DatasetConstants.FEATURES: features,
107 |         DatasetConstants.CLASSES: classes
108 |     }
109 | 
110 | 
111 | @pytest.fixture(scope='session')
112 | def multiclass_classification():
113 |     x_train, y_train, x_test, y_test, classes = \
114 |         create_multiclass_classification_dataset()
115 |     feature_names = ["col" + str(i) for i in list(range(x_train.shape[1]))]
116 | 
117 |     return {
118 |         DatasetConstants.X_TRAIN: x_train.values,
119 |         DatasetConstants.X_TEST: x_test.values,
120 |         DatasetConstants.Y_TRAIN: y_train,
121 |         DatasetConstants.Y_TEST: y_test,
122 |         DatasetConstants.FEATURES: feature_names,
123 |         DatasetConstants.CLASSES: classes
124 |     }
125 | 
126 | 
127 | @pytest.fixture(scope='session')
128 | def housing():
129 |     x_train, x_test, y_train, y_test, features = create_housing_data()
130 |     return {
131 |         DatasetConstants.X_TRAIN: x_train,
132 |         DatasetConstants.X_TEST: x_test,
133 |         DatasetConstants.Y_TRAIN: y_train,
134 |         DatasetConstants.Y_TEST: y_test,
135 |         DatasetConstants.FEATURES: features
136 |     }
137 | 
138 | 
139 | @pytest.fixture(scope='session')
140 | def energy():
141 |     x_train, x_test, y_train, y_test, features = create_energy_data()
142 |     return {
143 |         DatasetConstants.X_TRAIN: x_train,
144 |         DatasetConstants.X_TEST: x_test,
145 |         DatasetConstants.Y_TRAIN: y_train,
146 |         DatasetConstants.Y_TEST: y_test,
147 |         DatasetConstants.FEATURES: features
148 |     }
149 | 
150 | 
151 | @pytest.fixture(scope='session')
152 | def diabetes():
153 |     x_train, x_test, y_train, y_test, features = create_diabetes_data()
154 |     return {
155 |         DatasetConstants.X_TRAIN: x_train,
156 |         DatasetConstants.X_TEST: x_test,
157 |         DatasetConstants.Y_TRAIN: y_train,
158 |         DatasetConstants.Y_TEST: y_test,
159 |         DatasetConstants.FEATURES: features
160 |     }
161 | 


--------------------------------------------------------------------------------
/tests/constants.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | 
 6 | UTF8 = 'utf-8'
 7 | 
 8 | 
 9 | class DatasetConstants(object):
10 |     """Dataset related constants."""
11 |     CATEGORICAL = 'categorical'
12 |     CLASSES = 'classes'
13 |     FEATURES = 'features'
14 |     NUMERIC = 'numeric'
15 |     X_TEST = 'x_test'
16 |     X_TRAIN = 'x_train'
17 |     Y_TEST = 'y_test'
18 |     Y_TRAIN = 'y_train'
19 | 
20 | 
21 | class ModelType(object):
22 |     """Model type constants."""
23 |     XGBOOST = 'xgboost'
24 |     TREE = 'tree'
25 |     DEFAULT = 'default'
26 | 


--------------------------------------------------------------------------------
/tests/main/test_dataset_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Tests for DatasetWrapper class"""
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | import pytest
10 | from common_utils import assert_batch_equal, assert_sparse_equal
11 | from ml_wrappers.dataset.dataset_utils import _summarize_data
12 | from ml_wrappers.dataset.dataset_wrapper import DatasetWrapper
13 | from pandas.testing import assert_frame_equal, assert_series_equal
14 | from scipy.sparse import csr_matrix
15 | 
16 | try:
17 |     import torch
18 | except ImportError:
19 |     pass
20 | 
21 | try:
22 |     import tensorflow as tf
23 | except ImportError:
24 |     pass
25 | 
26 | 
27 | @pytest.mark.usefixtures('_clean_dir')
28 | class TestDatasetWrapper(object):
29 |     def test_supported_types(self):
30 |         test_dataframe = pd.DataFrame(data=[[1, 2, 3]], columns=['c1,', 'c2', 'c3'])
31 |         wrapper = DatasetWrapper(dataset=test_dataframe)
32 |         df_converted = wrapper.typed_dataset
33 |         assert_frame_equal(df_converted, test_dataframe)
34 | 
35 |         test_array = test_dataframe.values
36 |         wrapper = DatasetWrapper(dataset=test_array)
37 |         numpy_converted = wrapper.typed_dataset
38 |         assert np.array_equal(numpy_converted, test_array)
39 | 
40 |         test_series = test_dataframe.squeeze().reset_index(drop=True)
41 |         wrapper = DatasetWrapper(dataset=test_series)
42 |         series_converted = wrapper.typed_dataset
43 |         assert_series_equal(series_converted, test_series,
44 |                             check_names=False)
45 | 
46 |         sparse_matrix = csr_matrix((3, 4),
47 |                                    dtype=np.int8)
48 |         wrapper = DatasetWrapper(dataset=sparse_matrix)
49 |         sparse_matrix_converted = wrapper.typed_dataset
50 |         assert_sparse_equal(sparse_matrix_converted, sparse_matrix)
51 | 
52 |         background = _summarize_data(test_dataframe.values)
53 |         DatasetWrapper(dataset=background)
54 | 
55 |         torch_input = torch.rand(100, 3)
56 |         wrapper = DatasetWrapper(dataset=torch_input)
57 |         torch_converted = wrapper.typed_dataset
58 |         assert torch.all(torch.eq(torch_converted, torch_input))
59 | 
60 |         tensor_slices = (dict(test_dataframe), None)
61 |         tf_batch_dataset = tf.data.Dataset.from_tensor_slices(tensor_slices).batch(32)
62 |         wrapper = DatasetWrapper(dataset=tf_batch_dataset)
63 |         tf_batch_dataset_converted = wrapper.typed_dataset
64 |         assert_batch_equal(tf_batch_dataset_converted, tf_batch_dataset)
65 | 
66 |     def test_unsupported_types(self):
67 |         test_dataframe = pd.DataFrame(data=[[1, 2, 3]], columns=['c1,', 'c2', 'c3'])
68 |         test_array = test_dataframe.values
69 |         test_list = test_array.tolist()
70 | 
71 |         with pytest.raises(
72 |                 TypeError,
73 |                 match='Got type <class \'list\'> which is not supported in DatasetWrapper'):
74 |             DatasetWrapper(test_list)
75 | 


--------------------------------------------------------------------------------
/tests/main/test_endpoint_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Tests the EndpointWrapperModel class."""
 6 | 
 7 | import json
 8 | import urllib.request
 9 | from unittest.mock import patch
10 | 
11 | import numpy as np
12 | import pandas as pd
13 | import pytest
14 | from ml_wrappers.model import EndpointWrapperModel
15 | 
16 | 
17 | class MockRead():
18 |     """Mock class for urllib.request.urlopen().read()"""
19 | 
20 |     def __init__(self, json_data, fail_read=False):
21 |         """Initialize the MockRead class.
22 | 
23 |         :param json_data: The json data to return from the read method.
24 |         :type json_data: str
25 |         """
26 |         self.json_data = json_data
27 |         self.fail_read = fail_read
28 | 
29 |     def read(self):
30 |         """Return the json data.
31 | 
32 |         :return: The json data.
33 |         :rtype: str
34 |         """
35 |         if self.fail_read:
36 |             # reset fail_read to False so that the next call to read
37 |             # does not fail
38 |             self.fail_read = False
39 |             raise urllib.error.HTTPError('url', 500, 'Internal Server Error', {}, None)
40 |         return self.json_data
41 | 
42 | 
43 | def mock_api_key_auto_refresh_method():
44 |     """Mock method for auto refreshing the API key.
45 | 
46 |     :return: The mock API key.
47 |     :rtype: str
48 |     """
49 |     return 'mock_key'
50 | 
51 | 
52 | @pytest.mark.usefixtures('_clean_dir')
53 | class TestEndpointWrapperModel(object):
54 |     def test_predict_call(self):
55 |         # test creating the EndpointWrapperModel and
56 |         # calling the predict function
57 |         test_dataframe = pd.DataFrame(data=[[1, 2, 3]], columns=['c1,', 'c2', 'c3'])
58 |         endpoint_wrapper = EndpointWrapperModel('mock_key', 'http://mock.url')
59 |         # mock the urllib.request.urlopen function
60 |         with patch('urllib.request.urlopen') as mock_urlopen:
61 |             json_inference_value = json.dumps(test_dataframe.values.tolist())
62 |             # wrap return value in mock class with read method
63 |             mock_urlopen.return_value = MockRead(json_inference_value)
64 |             context = {}
65 |             result = endpoint_wrapper.predict(context, test_dataframe)
66 |             # assert result and test_dataframe.values equal
67 |             assert np.array_equal(result, test_dataframe.values)
68 | 
69 |     def test_auto_refresh_token(self):
70 |         test_dataframe = pd.DataFrame(data=[[1, 2, 3]], columns=['c1,', 'c2', 'c3'])
71 |         endpoint_wrapper = EndpointWrapperModel.from_auto_refresh_callable(
72 |             mock_api_key_auto_refresh_method,
73 |             'http://mock.url')
74 |         # mock the urllib.request.urlopen function
75 |         with patch('urllib.request.urlopen') as mock_urlopen:
76 |             json_inference_value = json.dumps(test_dataframe.values.tolist())
77 |             # wrap return value in mock class with read method
78 |             mock_urlopen.return_value = MockRead(
79 |                 json_inference_value, fail_read=True)
80 |             context = {}
81 |             result = endpoint_wrapper.predict(context, test_dataframe)
82 |             # assert result and test_dataframe.values equal
83 |             assert np.array_equal(result, test_dataframe.values)
84 | 


--------------------------------------------------------------------------------
/tests/main/test_model_wrapper.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Tests for wrap_model function"""
  6 | 
  7 | import sys
  8 | 
  9 | import pandas as pd
 10 | import pytest
 11 | from common_utils import (create_catboost_classifier,
 12 |                           create_catboost_regressor,
 13 |                           create_fastai_tabular_classifier,
 14 |                           create_fastai_tabular_classifier_multimetric,
 15 |                           create_fastai_tabular_regressor,
 16 |                           create_keras_classifier, create_keras_regressor,
 17 |                           create_lightgbm_classifier,
 18 |                           create_lightgbm_regressor,
 19 |                           create_pytorch_multiclass_classifier,
 20 |                           create_pytorch_regressor,
 21 |                           create_scikit_keras_multiclass_classifier,
 22 |                           create_scikit_keras_regressor,
 23 |                           create_sklearn_linear_regressor,
 24 |                           create_sklearn_logistic_regressor, create_tf_model,
 25 |                           create_xgboost_classifier, create_xgboost_regressor)
 26 | from constants import DatasetConstants
 27 | from ml_wrappers import wrap_model
 28 | from ml_wrappers.dataset.dataset_wrapper import DatasetWrapper
 29 | from train_wrapper_utils import (train_classification_model_numpy,
 30 |                                  train_classification_model_pandas,
 31 |                                  train_regression_model_numpy,
 32 |                                  train_regression_model_pandas)
 33 | from wrapper_validator import validate_wrapped_regression_model
 34 | 
 35 | try:
 36 |     import tensorflow as tf
 37 | except ImportError:
 38 |     pass
 39 | 
 40 | 
 41 | @pytest.mark.usefixtures('_clean_dir')
 42 | class TestModelWrapper(object):
 43 |     def test_wrap_sklearn_logistic_regression_model(self, iris):
 44 |         train_classification_model_numpy(
 45 |             create_sklearn_logistic_regressor, iris)
 46 |         train_classification_model_pandas(
 47 |             create_sklearn_logistic_regressor, iris)
 48 |         train_classification_model_numpy(
 49 |             create_sklearn_logistic_regressor, iris,
 50 |             use_dataset_wrapper=False)
 51 |         train_classification_model_pandas(
 52 |             create_sklearn_logistic_regressor, iris,
 53 |             use_dataset_wrapper=False)
 54 | 
 55 |     def test_wrap_pytorch_classification_model(self, iris):
 56 |         train_classification_model_numpy(
 57 |             create_pytorch_multiclass_classifier, iris)
 58 |         train_classification_model_numpy(
 59 |             create_pytorch_multiclass_classifier, iris,
 60 |             use_dataset_wrapper=False)
 61 | 
 62 |     def test_wrap_xgboost_classification_model(self, iris):
 63 |         train_classification_model_numpy(create_xgboost_classifier, iris)
 64 |         train_classification_model_pandas(create_xgboost_classifier, iris)
 65 | 
 66 |     def test_wrap_catboost_classification_model(self, iris):
 67 |         train_classification_model_numpy(create_catboost_classifier, iris)
 68 |         train_classification_model_pandas(create_catboost_classifier, iris)
 69 | 
 70 |     def test_wrap_lightgbm_classification_model(self, iris):
 71 |         train_classification_model_numpy(create_lightgbm_classifier, iris)
 72 |         train_classification_model_pandas(create_lightgbm_classifier, iris)
 73 | 
 74 |     def test_wrap_keras_classification_model(self, iris):
 75 |         train_classification_model_numpy(create_keras_classifier, iris)
 76 |         train_classification_model_pandas(create_keras_classifier, iris)
 77 | 
 78 |     def test_wrap_scikit_keras_classification_model(self, iris):
 79 |         train_classification_model_numpy(create_scikit_keras_multiclass_classifier, iris)
 80 |         train_classification_model_pandas(create_scikit_keras_multiclass_classifier, iris)
 81 | 
 82 |     # Skip for older versions due to latest fastai not supporting 3.6
 83 |     @pytest.mark.skipif(sys.version_info.minor <= 6,
 84 |                         reason='Fastai not supported for older versions')
 85 |     # Skip is using macos due to fastai failing on latest macos
 86 |     @pytest.mark.skipif(sys.platform == 'darwin',
 87 |                         reason='Fastai not supported for latest macos')
 88 |     def test_wrap_fastai_classification_model(self, iris):
 89 |         train_classification_model_pandas(create_fastai_tabular_classifier, iris)
 90 | 
 91 |     # Skip for older versions due to latest fastai not supporting 3.6
 92 |     @pytest.mark.skipif(sys.version_info.minor <= 6,
 93 |                         reason='Fastai not supported for older versions')
 94 |     # Skip is using macos due to fastai failing on latest macos
 95 |     @pytest.mark.skipif(sys.platform == 'darwin',
 96 |                         reason='Fastai not supported for latest macos')
 97 |     def test_wrap_fastai_classification_model_multimetric(self, iris):
 98 |         iris = iris.copy()
 99 |         data_to_transform = [DatasetConstants.Y_TRAIN, DatasetConstants.Y_TEST]
100 |         for data in data_to_transform:
101 |             iris[data][iris[data] == 2] = 1
102 |         train_classification_model_pandas(
103 |             create_fastai_tabular_classifier_multimetric, iris,
104 |             validate_single_row=True)
105 | 
106 |     def test_wrap_sklearn_linear_regression_model(self, housing):
107 |         train_regression_model_numpy(
108 |             create_sklearn_linear_regressor, housing)
109 |         train_regression_model_pandas(
110 |             create_sklearn_linear_regressor, housing)
111 |         train_regression_model_numpy(
112 |             create_sklearn_linear_regressor, housing,
113 |             use_dataset_wrapper=False)
114 |         train_regression_model_pandas(
115 |             create_sklearn_linear_regressor, housing,
116 |             use_dataset_wrapper=False)
117 | 
118 |     def test_wrap_pytorch_regression_model(self, housing):
119 |         train_regression_model_numpy(
120 |             create_pytorch_regressor, housing)
121 | 
122 |     def test_wrap_xgboost_regression_model(self, housing):
123 |         train_regression_model_numpy(create_xgboost_regressor, housing)
124 |         train_regression_model_pandas(create_xgboost_regressor, housing)
125 | 
126 |     def test_wrap_catboost_regression_model(self, housing):
127 |         train_regression_model_numpy(create_catboost_regressor, housing)
128 |         train_regression_model_pandas(create_catboost_regressor, housing)
129 | 
130 |     def test_wrap_lightgbm_regression_model(self, housing):
131 |         train_regression_model_numpy(create_lightgbm_regressor, housing)
132 |         train_regression_model_pandas(create_lightgbm_regressor, housing)
133 | 
134 |     def test_wrap_keras_regression_model(self, housing):
135 |         train_regression_model_numpy(create_keras_regressor, housing)
136 |         train_regression_model_pandas(create_keras_regressor, housing)
137 | 
138 |     def test_wrap_scikit_keras_regression_model(self, housing):
139 |         train_regression_model_numpy(create_scikit_keras_regressor, housing)
140 |         train_regression_model_pandas(create_scikit_keras_regressor, housing)
141 | 
142 |     # Skip for older versions due to latest fastai not supporting 3.6
143 |     @pytest.mark.skipif(sys.version_info.minor <= 6,
144 |                         reason='Fastai not supported for older versions')
145 |     # Skip is using macos due to fastai failing on latest macos
146 |     @pytest.mark.skipif(sys.platform == 'darwin',
147 |                         reason='Fastai not supported for latest macos')
148 |     def test_wrap_fastai_regression_model(self, iris):
149 |         train_regression_model_pandas(create_fastai_tabular_regressor, iris)
150 | 
151 |     def test_batch_dataset(self, housing):
152 |         X_train = housing[DatasetConstants.X_TRAIN]
153 |         X_test = housing[DatasetConstants.X_TEST]
154 |         y_train = housing[DatasetConstants.Y_TRAIN]
155 |         y_test = housing[DatasetConstants.Y_TEST]
156 |         features = housing[DatasetConstants.FEATURES]
157 |         X_train_df = pd.DataFrame(X_train, columns=list(features))
158 |         X_test_df = pd.DataFrame(X_test, columns=list(features))
159 |         inp = (dict(X_train_df), y_train)
160 |         inp_ds = tf.data.Dataset.from_tensor_slices(inp).batch(32)
161 |         val = (dict(X_test_df), y_test)
162 |         val_ds = tf.data.Dataset.from_tensor_slices(val).batch(32)
163 |         model = create_tf_model(inp_ds, val_ds, features)
164 |         wrapped_dataset = DatasetWrapper(val_ds)
165 |         wrapped_model = wrap_model(model, wrapped_dataset, model_task='regression')
166 |         validate_wrapped_regression_model(wrapped_model, val_ds)
167 | 


--------------------------------------------------------------------------------
/tests/main/test_pytorch_model_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Tests for WrappedPytorchModel"""
 6 | 
 7 | import pytest
 8 | from common_utils import (create_pytorch_multiclass_classifier,
 9 |                           create_pytorch_regressor)
10 | from ml_wrappers.common.constants import ModelTask
11 | from ml_wrappers.model import WrappedPytorchModel
12 | from train_wrapper_utils import (train_classification_model_numpy,
13 |                                  train_regression_model_numpy)
14 | from wrapper_validator import validate_wrapped_pytorch_model
15 | 
16 | 
17 | @pytest.mark.usefixtures('_clean_dir')
18 | class TestPytorchModelWrapper(object):
19 |     def test_wrap_pytorch_classification_model(self, iris):
20 |         wrapped_init = wrapped_pytorch_model_initializer(
21 |             create_pytorch_multiclass_classifier,
22 |             model_task=ModelTask.CLASSIFICATION)
23 |         train_classification_model_numpy(wrapped_init, iris)
24 |         train_classification_model_numpy(wrapped_init, iris,
25 |                                          use_dataset_wrapper=False)
26 | 
27 |     def test_wrap_pytorch_regression_model(self, housing):
28 |         wrapped_init = wrapped_pytorch_model_initializer(
29 |             create_pytorch_regressor, model_task=ModelTask.REGRESSION)
30 |         train_regression_model_numpy(
31 |             wrapped_init, housing)
32 | 
33 | 
34 | class PytorchModelInitializer():
35 |     def __init__(self, model_initializer, model_task):
36 |         self._model_initializer = model_initializer
37 |         self._model_task = model_task
38 | 
39 |     def __call__(self, X_train, y_train):
40 |         fitted_model = self._model_initializer(X_train, y_train)
41 |         wrapped_pytorch_model = WrappedPytorchModel(fitted_model)
42 |         validate_wrapped_pytorch_model(wrapped_pytorch_model, X_train,
43 |                                        self._model_task)
44 |         return wrapped_pytorch_model
45 | 
46 | 
47 | def wrapped_pytorch_model_initializer(model_initializer, model_task):
48 |     return PytorchModelInitializer(model_initializer, model_task)
49 | 


--------------------------------------------------------------------------------
/tests/main/test_text_model_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Tests for wrap_model function on text-based models"""
 6 | 
 7 | import pytest
 8 | from common_text_utils import (EMOTION, create_multilabel_text_pipeline,
 9 |                                create_question_answering_pipeline,
10 |                                create_text_classification_pipeline,
11 |                                load_covid19_emergency_event_dataset,
12 |                                load_emotion_dataset, load_squad_dataset)
13 | from ml_wrappers import wrap_model
14 | from ml_wrappers.common.constants import ModelTask
15 | from wrapper_validator import (validate_wrapped_classification_model,
16 |                                validate_wrapped_multilabel_model,
17 |                                validate_wrapped_question_answering_model)
18 | 
19 | 
20 | @pytest.mark.usefixtures('_clean_dir')
21 | class TestTextModelWrapper(object):
22 |     @pytest.mark.skip("Need to update wrapper as only text pairs now supported")
23 |     def test_wrap_transformers_model(self):
24 |         emotion_data = load_emotion_dataset()
25 |         docs = emotion_data[:10].drop(columns=EMOTION).values.tolist()
26 |         pred = create_text_classification_pipeline()
27 |         wrapped_model = wrap_model(pred, docs, ModelTask.TEXT_CLASSIFICATION)
28 |         validate_wrapped_classification_model(wrapped_model, docs)
29 | 
30 |     def test_wrap_question_answering_model(self):
31 |         squad_data = load_squad_dataset()
32 |         docs = squad_data[:10].drop(columns=['answers'])
33 |         pred = create_question_answering_pipeline()
34 |         wrapped_model = wrap_model(pred, docs, ModelTask.QUESTION_ANSWERING)
35 |         validate_wrapped_question_answering_model(wrapped_model, docs)
36 | 
37 |     def test_wrap_multilabel_model(self):
38 |         covid19_data = load_covid19_emergency_event_dataset()
39 |         docs = covid19_data[:10]['text'].values.tolist()
40 |         pred = create_multilabel_text_pipeline()
41 |         wrapped_model = wrap_model(
42 |             pred, docs, ModelTask.MULTILABEL_TEXT_CLASSIFICATION)
43 |         num_labels = pred.model.num_labels
44 |         validate_wrapped_multilabel_model(wrapped_model, docs, num_labels)
45 | 


--------------------------------------------------------------------------------
/tests/main/test_tf_model_wrapper.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Tests for WrappedTensorflowModel"""
 6 | 
 7 | import pytest
 8 | import tensorflow as tf
 9 | from common_utils import (create_keras_classifier, create_keras_regressor,
10 |                           create_scikit_keras_regressor)
11 | from ml_wrappers.common.constants import ModelTask
12 | from ml_wrappers.model import WrappedTensorflowModel
13 | from ml_wrappers.model.tensorflow_wrapper import is_sequential
14 | from train_wrapper_utils import (train_classification_model_numpy,
15 |                                  train_classification_model_pandas,
16 |                                  train_regression_model_numpy,
17 |                                  train_regression_model_pandas)
18 | from wrapper_validator import validate_wrapped_tf_model
19 | 
20 | 
21 | @pytest.mark.usefixtures('_clean_dir')
22 | class TestTensorflowModelWrapper(object):
23 |     def test_wrap_keras_classification_model(self, iris):
24 |         wrapped_init = wrapped_tensorflow_model_initializer(
25 |             create_keras_classifier, model_task=ModelTask.CLASSIFICATION)
26 |         train_classification_model_numpy(wrapped_init, iris)
27 |         train_classification_model_pandas(wrapped_init, iris)
28 | 
29 |     def test_wrap_keras_regression_model(self, housing):
30 |         wrapped_init = wrapped_tensorflow_model_initializer(
31 |             create_keras_regressor, model_task=ModelTask.REGRESSION)
32 |         train_regression_model_numpy(wrapped_init, housing)
33 |         train_regression_model_pandas(wrapped_init, housing)
34 | 
35 |     def test_wrap_scikit_keras_regression_model(self, housing):
36 |         wrapped_init = wrapped_tensorflow_model_initializer(
37 |             create_scikit_keras_regressor, model_task=ModelTask.REGRESSION)
38 |         train_regression_model_numpy(wrapped_init, housing)
39 |         train_regression_model_pandas(wrapped_init, housing)
40 | 
41 |     def test_validate_is_sequential(self):
42 |         sequential_layer = tf.keras.Sequential(layers=None, name=None)
43 |         assert is_sequential(sequential_layer)
44 | 
45 | 
46 | class TensorflowModelInitializer():
47 |     def __init__(self, model_initializer, model_task):
48 |         self._model_initializer = model_initializer
49 |         self._model_task = model_task
50 | 
51 |     def __call__(self, X_train, y_train):
52 |         fitted_model = self._model_initializer(X_train, y_train)
53 |         wrapped_tf_model = WrappedTensorflowModel(fitted_model)
54 |         validate_wrapped_tf_model(wrapped_tf_model, X_train, self._model_task)
55 |         return wrapped_tf_model
56 | 
57 | 
58 | def wrapped_tensorflow_model_initializer(model_initializer, model_task):
59 |     return TensorflowModelInitializer(model_initializer, model_task)
60 | 


--------------------------------------------------------------------------------
/tests/main/test_timestamp_featurizer.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | import pandas as pd
 6 | import pytest
 7 | from constants import DatasetConstants
 8 | from ml_wrappers.dataset import CustomTimestampFeaturizer
 9 | from pandas.api.types import is_datetime64_any_dtype as is_datetime
10 | from rai_test_utils.datasets.tabular import create_timeseries_data
11 | 
12 | 
13 | @pytest.mark.usefixtures('_clean_dir')
14 | class TestTimestampFeaturizer(object):
15 | 
16 |     def test_working(self):
17 |         assert True
18 | 
19 |     def test_no_timestamps(self, iris):
20 |         # create pandas dataframes without any timestamps
21 |         x_train = pd.DataFrame(data=iris[DatasetConstants.X_TRAIN], columns=iris[DatasetConstants.FEATURES])
22 |         x_test = pd.DataFrame(data=iris[DatasetConstants.X_TEST], columns=iris[DatasetConstants.FEATURES])
23 |         featurizer = CustomTimestampFeaturizer(iris[DatasetConstants.FEATURES]).fit(x_train)
24 |         result = featurizer.transform(x_test)
25 |         # Assert result is same as before, pandas dataframe
26 |         assert isinstance(result, pd.DataFrame)
27 |         # Assert the result is the same as the original passed in data (no featurization was done)
28 |         assert result.equals(x_test)
29 | 
30 |     @pytest.mark.parametrize(("sample_cnt_per_grain", "grains_dict"), [
31 |         (240, {}),
32 |         (20, {'fruit': ['apple', 'grape'], 'store': [100, 200, 50]})])
33 |     def test_timestamp_featurization(self, sample_cnt_per_grain, grains_dict):
34 |         # create timeseries data
35 |         X, _ = create_timeseries_data(sample_cnt_per_grain, 'time', 'y', grains_dict)
36 |         original_cols = list(X.columns.values)
37 |         # featurize and validate the timestamp column
38 |         featurizer = CustomTimestampFeaturizer(original_cols).fit(X)
39 |         result = featurizer.transform(X)
40 |         # Form a temporary dataframe for validation
41 |         tmp_result = pd.DataFrame(result)
42 |         # Assert there are no timestamp columns
43 |         assert ([column for column in tmp_result.columns if is_datetime(tmp_result[column])] == [])
44 |         # Assert we have the expected number of columns - 1 time columns * 6 featurized plus original
45 |         assert (result.shape[1] == len(original_cols) + 6)
46 | 
47 |     @pytest.mark.parametrize(("return_pandas"), [True, False])
48 |     def test_separate_fit_with_no_features(self, return_pandas):
49 |         sample_cnt_per_grain = 20
50 |         grains_dict = {'fruit': ['apple', 'grape'], 'store': [100, 200, 50]}
51 |         # create timeseries data
52 |         X, _ = create_timeseries_data(sample_cnt_per_grain, 'time', 'y', grains_dict)
53 |         original_cols = list(X.columns.values)
54 |         # featurize and validate the timestamp column as a separate fit call and fit_transform
55 |         # Note: in this case we don't pass the feature names to the constructor
56 |         ctf1 = CustomTimestampFeaturizer(return_pandas=return_pandas)
57 |         ctf2 = CustomTimestampFeaturizer(return_pandas=return_pandas)
58 |         ctf1.fit(X)
59 |         result1 = ctf1.transform(X)
60 |         result2 = ctf2.fit_transform(X)
61 |         for result in [result1, result2]:
62 |             if not return_pandas:
63 |                 assert not isinstance(result, pd.DataFrame)
64 |                 # Form a temporary dataframe for validation
65 |                 result = pd.DataFrame(result)
66 |             # Assert there are no timestamp columns
67 |             assert ([column for column in result.columns if is_datetime(result[column])] == [])
68 |             # Assert we have the expected number of columns - 1 time columns * 6 featurized plus original
69 |             assert (result.shape[1] == len(original_cols) + 6)
70 | 


--------------------------------------------------------------------------------
/tests/minimal/test_minimal.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Tests minimal imports and functions from ml-wrappers"""
 6 | 
 7 | import pytest
 8 | 
 9 | 
10 | @pytest.mark.usefixtures('_clean_dir')
11 | class TestMinialImports(object):
12 |     def test_main_import(self):
13 |         import ml_wrappers  # noqa
14 | 
15 |     def test_import_wrap_model(self):
16 |         from ml_wrappers import wrap_model  # noqa
17 | 
18 |     def test_import_constants(self):
19 |         from ml_wrappers.common.constants import ModelTask  # noqa
20 | 


--------------------------------------------------------------------------------
/tests/train_wrapper_utils.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # ---------------------------------------------------------
 4 | 
 5 | """Utilities for calling the wrap_model function and validating the results."""
 6 | 
 7 | import pandas as pd
 8 | from constants import DatasetConstants
 9 | from ml_wrappers import wrap_model
10 | from ml_wrappers.common.constants import ModelTask
11 | from ml_wrappers.dataset.dataset_wrapper import DatasetWrapper
12 | from wrapper_validator import (validate_wrapped_classification_model,
13 |                                validate_wrapped_regression_model)
14 | 
15 | 
16 | def train_classification_model_numpy(model_initializer, dataset,
17 |                                      use_dataset_wrapper=True):
18 |     X_train = dataset[DatasetConstants.X_TRAIN]
19 |     X_test = dataset[DatasetConstants.X_TEST]
20 |     y_train = dataset[DatasetConstants.Y_TRAIN]
21 |     model = model_initializer(X_train, y_train)
22 |     if use_dataset_wrapper:
23 |         X_test_wrapped = DatasetWrapper(X_test)
24 |     else:
25 |         X_test_wrapped = X_test
26 |     wrapped_model = wrap_model(model, X_test_wrapped,
27 |                                model_task=ModelTask.CLASSIFICATION)
28 |     validate_wrapped_classification_model(wrapped_model, X_test)
29 | 
30 | 
31 | def train_classification_model_pandas(model_initializer, dataset,
32 |                                       use_dataset_wrapper=True,
33 |                                       validate_single_row=False):
34 |     X_train = pd.DataFrame(data=dataset[DatasetConstants.X_TRAIN],
35 |                            columns=dataset[DatasetConstants.FEATURES])
36 |     X_test = pd.DataFrame(data=dataset[DatasetConstants.X_TEST],
37 |                           columns=dataset[DatasetConstants.FEATURES])
38 |     y_train = dataset[DatasetConstants.Y_TRAIN]
39 |     model = model_initializer(X_train, y_train)
40 |     if use_dataset_wrapper:
41 |         X_test_wrapped = DatasetWrapper(X_test)
42 |     else:
43 |         X_test_wrapped = X_test
44 |     wrapped_model = wrap_model(model, X_test_wrapped,
45 |                                model_task=ModelTask.CLASSIFICATION)
46 |     if validate_single_row:
47 |         validate_wrapped_classification_model(wrapped_model, X_test.iloc[0:1])
48 |     validate_wrapped_classification_model(wrapped_model, X_test)
49 | 
50 | 
51 | def train_regression_model_numpy(model_initializer, dataset,
52 |                                  use_dataset_wrapper=True):
53 |     X_train = dataset[DatasetConstants.X_TRAIN]
54 |     X_test = dataset[DatasetConstants.X_TEST]
55 |     y_train = dataset[DatasetConstants.Y_TRAIN]
56 |     model = model_initializer(X_train, y_train)
57 |     if use_dataset_wrapper:
58 |         X_test_wrapped = DatasetWrapper(X_test)
59 |     else:
60 |         X_test_wrapped = X_test
61 |     wrapped_model = wrap_model(model, X_test_wrapped,
62 |                                model_task=ModelTask.REGRESSION)
63 |     validate_wrapped_regression_model(wrapped_model, X_test)
64 | 
65 | 
66 | def train_regression_model_pandas(model_initializer, dataset,
67 |                                   use_dataset_wrapper=True):
68 |     X_train = pd.DataFrame(data=dataset[DatasetConstants.X_TRAIN],
69 |                            columns=dataset[DatasetConstants.FEATURES])
70 |     X_test = pd.DataFrame(data=dataset[DatasetConstants.X_TEST],
71 |                           columns=dataset[DatasetConstants.FEATURES])
72 |     y_train = dataset[DatasetConstants.Y_TRAIN]
73 |     model = model_initializer(X_train, y_train)
74 |     if use_dataset_wrapper:
75 |         X_test_wrapped = DatasetWrapper(X_test)
76 |     else:
77 |         X_test_wrapped = X_test
78 |     wrapped_model = wrap_model(model, X_test_wrapped,
79 |                                model_task=ModelTask.REGRESSION)
80 |     validate_wrapped_regression_model(wrapped_model, X_test)
81 | 


--------------------------------------------------------------------------------
/tests/wrapper_validator.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # ---------------------------------------------------------
  4 | 
  5 | """Utilities for validating wrapped models."""
  6 | 
  7 | from ml_wrappers.common.constants import ModelTask, SKLearn
  8 | from ml_wrappers.model import WrappedPytorchModel, WrappedTensorflowModel
  9 | 
 10 | PREDICT_CLASSES = 'predict_classes'
 11 | 
 12 | 
 13 | def validate_wrapped_classification_model(wrapped_model, X_test):
 14 |     # validate wrapped model has predict and predict_proba functions
 15 |     function_names = [SKLearn.PREDICT, SKLearn.PREDICT_PROBA]
 16 |     validate_functions(wrapped_model, function_names)
 17 |     # validate we can call the model on the dataset
 18 |     predictions = wrapped_model.predict(X_test)
 19 |     probabilities = wrapped_model.predict_proba(X_test)
 20 |     # validate predictions and probabilities have correct shape
 21 |     assert len(predictions.shape) == 1
 22 |     assert len(probabilities.shape) == 2
 23 | 
 24 | 
 25 | def validate_wrapped_object_detection_custom_model(wrapped_model, X_test, has_predict_proba=True):
 26 |     # validate wrapped model has predict and predict_proba functions
 27 |     function_names = [SKLearn.PREDICT, SKLearn.PREDICT_PROBA] \
 28 |         if has_predict_proba else [SKLearn.PREDICT]
 29 |     validate_functions(wrapped_model, function_names)
 30 |     # validate we can call the model on the dataset
 31 |     predictions = wrapped_model.predict(X_test)
 32 |     # validate predictions and probabilities have correct shape
 33 |     assert len(predictions) == 2, "Expected number of predictions to be 2." + \
 34 |         f"Got {len(predictions)} predictions"
 35 |     if has_predict_proba:
 36 |         probabilities = wrapped_model.predict_proba(X_test)
 37 |         assert len(probabilities) == 2, "Expected number of probabilities to be 2." + \
 38 |             f"Got {len(probabilities)} probabilities"
 39 | 
 40 | 
 41 | def validate_wrapped_object_detection_mlflow_drise_model(
 42 |         wrapped_model, X_test):
 43 |     # validate wrapped model has predict and predict_proba functions
 44 |     function_names = [SKLearn.PREDICT]
 45 |     validate_functions(wrapped_model, function_names)
 46 |     # validate we can call the model on the dataset
 47 |     predictions = wrapped_model.predict(X_test)
 48 |     # validate predictions and probabilities have correct shape
 49 |     assert len(predictions) == 1
 50 | 
 51 | 
 52 | def validate_wrapped_object_detection_model(wrapped_model, X_test,
 53 |                                             num_predictions=3):
 54 |     # validate wrapped model has predict and predict_proba functions
 55 |     function_names = [SKLearn.PREDICT, SKLearn.PREDICT_PROBA]
 56 |     validate_functions(wrapped_model, function_names)
 57 |     # validate we can call the model on the dataset
 58 |     predictions = wrapped_model.predict(X_test)
 59 |     probabilities = wrapped_model.predict_proba(X_test)
 60 |     # validate predictions and probabilities have correct shape
 61 |     assert len(predictions) == num_predictions
 62 |     assert len(probabilities) == num_predictions
 63 | 
 64 | 
 65 | def validate_wrapped_multilabel_model(wrapped_model, X_test, num_labels):
 66 |     # validate wrapped model has predict and predict_proba functions
 67 |     function_names = [SKLearn.PREDICT, SKLearn.PREDICT_PROBA]
 68 |     validate_functions(wrapped_model, function_names)
 69 |     # validate we can call the model on the dataset
 70 |     predictions = wrapped_model.predict(X_test)
 71 |     probabilities = wrapped_model.predict_proba(X_test)
 72 |     # validate predictions and probabilities have correct shape
 73 |     assert len(predictions.shape) == 2
 74 |     assert len(probabilities.shape) == 2
 75 |     assert predictions.shape[1] == num_labels
 76 |     assert probabilities.shape[1] == num_labels
 77 | 
 78 | 
 79 | def validate_wrapped_regression_model(wrapped_model, X_test):
 80 |     # validate wrapped model has predict function and NO predict_proba function
 81 |     assert hasattr(wrapped_model, SKLearn.PREDICT)
 82 |     assert not hasattr(wrapped_model, SKLearn.PREDICT_PROBA)
 83 |     # validate we can call the model on the dataset
 84 |     predictions = wrapped_model.predict(X_test)
 85 |     # validate predictions have correct shape
 86 |     assert len(predictions.shape) == 1
 87 | 
 88 | 
 89 | def validate_wrapped_question_answering_model(wrapped_model, X_test):
 90 |     # validate wrapped model has predict and predict_proba functions
 91 |     assert hasattr(wrapped_model, SKLearn.PREDICT)
 92 |     assert not hasattr(wrapped_model, SKLearn.PREDICT_PROBA)
 93 |     # validate we can call the model on the dataset
 94 |     predictions = wrapped_model.predict(X_test)
 95 |     # validate predictions have correct shape
 96 |     assert len(predictions) == len(X_test)
 97 |     assert isinstance(predictions[0], str)
 98 | 
 99 | 
100 | def validate_wrapped_tf_model(wrapped_tf_model, X_test, model_task):
101 |     assert isinstance(wrapped_tf_model, WrappedTensorflowModel)
102 |     validate_wrapped_pred_classes_model(wrapped_tf_model, X_test, model_task)
103 | 
104 | 
105 | def validate_wrapped_pytorch_model(wrapped_pytorch_model, X_test, model_task):
106 |     assert isinstance(wrapped_pytorch_model, WrappedPytorchModel)
107 |     validate_wrapped_pred_classes_model(
108 |         wrapped_pytorch_model, X_test, model_task)
109 | 
110 | 
111 | def validate_wrapped_pred_classes_model(wrapped_model, X_test, model_task):
112 |     function_names = [SKLearn.PREDICT, SKLearn.PREDICT_PROBA, PREDICT_CLASSES]
113 |     validate_functions(wrapped_model, function_names)
114 |     # validate we can call the model on the dataset
115 |     if model_task == ModelTask.CLASSIFICATION:
116 |         probabilities = wrapped_model.predict_proba(X_test)
117 |         predictions = wrapped_model.predict_classes(X_test)
118 |         # validate predictions and probabilities have correct shape
119 |         assert len(predictions.shape) == 1
120 |         assert len(probabilities.shape) == 2
121 |     else:
122 |         predictions = wrapped_model.predict(X_test)
123 |         # validate predictions have correct shape
124 |         assert len(predictions.shape) == 1 or predictions.shape[1] == 1
125 | 
126 | 
127 | def validate_functions(wrapped_model, function_names):
128 |     for function_name in function_names:
129 |         assert hasattr(wrapped_model, function_name)
130 | 


--------------------------------------------------------------------------------