├── .coveragerc
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── config.yml
│ └── feature_request.yml
└── workflows
│ ├── publish-to-pypi.yml
│ ├── publish-to-readthedocs.yml
│ └── run-tests.yml
├── .gitignore
├── .gitleaks.toml
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── ISSUE_POLICY.md
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README-development.md
├── README.md
├── SECURITY.md
├── THIRD_PARTY_LICENSES.txt
├── authorizer
├── Dockerfile
├── func.yaml
├── requirements.txt
├── src
│ ├── authorizer.py
│ └── utils
│ │ ├── auth_utils.py
│ │ ├── header_utils.py
│ │ └── identity_utils.py
└── tests
│ ├── test_auth_utils.py
│ └── test_header_utils.py
├── container-image
├── Dockerfile
├── environment.yaml
└── run
│ ├── launch_mlflow.sh
│ └── run.py
├── demos
├── deploy_mlflow_model
│ ├── conda-runtime
│ │ ├── README.md
│ │ ├── elastic-net-deployment_build_conda.yaml
│ │ └── elastic-net-deployment_prebuilt_conda.yaml
│ └── container-runtime
│ │ ├── README.md
│ │ ├── container
│ │ ├── Dockerfile.pyfunc
│ │ └── nginx.conf
│ │ ├── elastic-net-deployment-container.yaml
│ │ └── input.json
├── pyspark_logistic_regression_dataflow_job
│ ├── MLproject
│ ├── README.md
│ ├── logistic_regression.py
│ ├── oci-datascience-config.json
│ ├── oci-datascience-template.yaml
│ ├── run.ipynb
│ └── run.sh
├── sklearn_elasticnet_wine_notebook_job
│ ├── MLproject
│ ├── README.md
│ ├── oci-datascience-config.json
│ ├── oci-datascience-template.yaml
│ ├── run.ipynb
│ ├── run.sh
│ └── train.ipynb
└── sklearn_elasticnet_wine_script_job
│ ├── README.md
│ ├── oci-datascience-config.json
│ ├── oci-datascience-template.yaml
│ ├── run.ipynb
│ └── run.sh
├── dev-requirements.txt
├── docs
├── Makefile
├── requirements.txt
└── source
│ ├── _static
│ ├── logo-dark-mode.png
│ └── logo-light-mode.png
│ ├── concepts.rst
│ ├── conf.py
│ ├── demos_examples.rst
│ ├── index.rst
│ ├── model_deployment.rst
│ ├── project.rst
│ ├── quickstart.rst
│ ├── release_notes.rst
│ └── tracking_server.rst
├── oci_mlflow
├── __init__.py
├── auth_plugin.py
├── deployment.py
├── oci_object_storage.py
├── project.py
├── telemetry_logging.py
├── templates
│ ├── __init__.py
│ ├── project_description.jinja2
│ ├── runtime.yaml.jinja2
│ └── score.py.jinja2
└── utils.py
├── pyproject.toml
├── pytest.ini
├── setup.cfg
├── setup.py
├── test-requirements.txt
└── tests
└── plugins
├── __init__.py
└── unitary
├── __init__.py
├── artifacts
├── 1.txt
├── 2.txt
└── sub_folder
│ ├── 3.txt
│ └── 4.txt
├── test_auth.py
├── test_deployment.py
├── test_files
├── conda.yaml
├── invalid-file-type.txt
├── oci-datascience-template_test.yaml
├── test-empty-project
├── test-model
└── test.txt
├── test_oci_object_storage.py
├── test_project.py
├── test_telemetry.py
└── test_utils.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | **/*.jinja2
4 | **/__init__.py
5 | container-image/*
6 | docs/*
7 | tests/*
8 |
9 | [report]
10 | exclude_lines =
11 | pragma: no cover
12 | def __repr__
13 | if __name__ == .__main__.:
14 | @(abc\.)?abstractmethod
15 | raise AssertionError
16 | raise NotImplementedError
17 | omit =
18 | **/*.jinja2
19 | **/__init__.py
20 | container-image/*
21 | docs/*
22 | tests/*
23 | show_missing = true
24 | skip_empty = true
25 | precision = 2
26 |
27 | [html]
28 | directory = htmlcov
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: Bug observed in oci-mlflow library
3 | title: "[Bug]: "
4 | labels: [Bug, Backlog]
5 |
6 | body:
7 | - type: markdown
8 | attributes:
9 | value: |
10 | If you think you've found a security vulnerability, don't raise a GitHub issue and follow the instructions
11 | in our [security policy](https://github.com/oracle/oci-mlflow/security/policy).
12 |
13 | ---
14 |
15 | Thank you for taking the time to file a bug report.
16 | - type: checkboxes
17 | id: checks
18 | attributes:
19 | label: oci-mlflow version used
20 | options:
21 | - label: >
22 | I have checked that this issue has not already been reported.
23 | required: true
24 | - label: >
25 | I have confirmed this bug exists on the
26 | [latest version](https://github.com/oracle/oci-mlflow/releases) of oci-mlflow.
27 | - label: >
28 | I have confirmed this bug exists on the main branch of oci-mlflow.
29 | - label: >
30 | I agree to follow the [Code of Conduct](https://github.com/oracle/.github/blob/main/CODE_OF_CONDUCT.md).
31 | required: true
32 | - type: textarea
33 | id: description
34 | attributes:
35 | label: Description
36 | description: >
37 | Please provide a brief description of the problem, describe setup used as that may be the key to the issue.
38 | validations:
39 | required: true
40 | - type: textarea
41 | id: how-to-reproduce
42 | attributes:
43 | label: How to Reproduce
44 | description: >
45 | Please provide a copy-pastable short code example.
46 | If possible provide an ordered list of steps on how to reproduce the problem.
47 | placeholder: >
48 | mlflow deployments help -t oci-datascience
49 |
50 | ...
51 | render: python
52 | validations:
53 | required: true
54 | - type: textarea
55 | id: what-was-observed
56 | attributes:
57 | label: What was Observed
58 | description: >
59 | Please provide snippets of output or describe wrong behavior.
60 | validations:
61 | required: true
62 | - type: textarea
63 | id: what-was-expected
64 | attributes:
65 | label: What was Expected
66 | description: >
67 | Please describe what should have happened and how it is different from what was observed.
68 | validations:
69 | required: true
70 | - type: textarea
71 | id: version
72 | attributes:
73 | label: Version
74 | description: >
75 | Please paste the output of ``pip freeze | grep oci-mlflow``
76 | value: >
77 |
78 |
79 | Paste here the output of ``pip freeze | grep oci-mlflow``
80 |
81 |
82 | validations:
83 | required: true
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: Check the docs
4 | url: https://oci-mlflow.readthedocs.io
5 | about: If you need help with your first steps with oci-mlflow please check the docs.
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature Request
2 | description: Feature and enhancement proposals in oci-mlflow library
3 | title: "[FR]: "
4 | labels: [Task, Backlog]
5 |
6 | body:
7 | - type: markdown
8 | attributes:
9 | value: |
10 | Before proceeding, please review the [Contributing to this repository](https://github.com/oracle/oci-mlflow/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/oracle/.github/blob/main/CODE_OF_CONDUCT.md).
11 |
12 | ---
13 |
14 | Thank you for submitting a feature request.
15 | - type: dropdown
16 | id: contribution
17 | attributes:
18 | label: Willingness to contribute
19 | description: Would you or another member of your organization be willing to contribute an implementation of this feature?
20 | options:
21 | - Yes. I can contribute this feature independently.
22 | - Yes. I would be willing to contribute this feature with guidance from the oci-mlflow team.
23 | - No. I cannot contribute this feature at this time.
24 | validations:
25 | required: true
26 | - type: textarea
27 | attributes:
28 | label: Proposal Summary
29 | description: |
30 | In a few sentences, provide a clear, high-level description of the feature request
31 | validations:
32 | required: true
33 | - type: textarea
34 | attributes:
35 | label: Motivation
36 | description: |
37 | - What is the use case for this feature?
38 | - Why is this use case valuable to support for OCI DataScience users in general?
39 | - Why is this use case valuable to support for your project(s) or organization?
40 | - Why is it currently difficult to achieve this use case?
41 | value: |
42 | > #### What is the use case for this feature?
43 |
44 | > #### Why is this use case valuable to support for OCI DataScience users in general?
45 |
46 | > #### Why is this use case valuable to support for your project(s) or organization?
47 |
48 | > #### Why is it currently difficult to achieve this use case?
49 | validations:
50 | required: true
51 | - type: textarea
52 | attributes:
53 | label: Details
54 | description: |
55 | Use this section to include any additional information about the feature. If you have a proposal for how to implement this feature, please include it here. For implementation guidelines, please refer to the [Contributing to this repository](https://github.com/oracle/oci-mlflow/blob/main/CONTRIBUTING.md).
56 | validations:
57 | required: false
--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
1 | name: "[DO NOT TRIGGER] Publish to PyPI"
2 |
3 | # To run this workflow manually from the Actions tab
4 | on: workflow_dispatch
5 |
6 | jobs:
7 | build-n-publish:
8 | name: Build and publish Python 🐍 distribution 📦 to PyPI
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v4
13 | - name: Set up Python
14 | uses: actions/setup-python@v5
15 | with:
16 | python-version: "3.x"
17 | - name: Build distribution 📦
18 | run: |
19 | pip install build
20 | make dist
21 | - name: Validate
22 | run: |
23 | pip install dist/*.whl
24 | python -c "import oci_mlflow;"
25 | # # To run publish to test PyPI, secret with token needs to be added to oracle/oci-mlflow project.
26 | # # This one - GH_OCI_MLFLOW_TESTPYPI_TOKEN - removed from project secrets after initial test.
27 | # - name: Publish distribution 📦 to Test PyPI
28 | # env:
29 | # TWINE_USERNAME: __token__
30 | # TWINE_PASSWORD: ${{ secrets.GH_OCI_MLFLOW_TESTPYPI_TOKEN }}
31 | # run: |
32 | # pip install twine
33 | # twine upload -r testpypi dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD
34 | - name: Publish distribution 📦 to PyPI
35 | env:
36 | TWINE_USERNAME: __token__
37 | TWINE_PASSWORD: ${{ secrets.GH_OCI_MLFLOW_PYPI_TOKEN }}
38 | run: |
39 | pip install twine
40 | twine upload dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD
41 |
--------------------------------------------------------------------------------
/.github/workflows/publish-to-readthedocs.yml:
--------------------------------------------------------------------------------
1 | name: "Publish Docs"
2 |
3 | on:
4 | # Auto-trigger this workflow on tag creation
5 | push:
6 | tags:
7 | - 'v*.*.*'
8 |
9 | env:
10 | RTDS_MLFLOW_PROJECT: https://readthedocs.org/api/v3/projects/accelerated-data-science
11 | RTDS_MLFLOW_TOKEN: ${{ secrets.RTDS_MLFLOW_TOKEN }}
12 |
13 | jobs:
14 | build-n-publish:
15 | name: Build and publish Docs 📖 to Readthedocs
16 | runs-on: ubuntu-latest
17 |
18 | steps:
19 | - name: When tag 🏷️ pushed - Trigger Readthedocs build
20 | if: github.event_name == 'push' && startsWith(github.ref_name, 'v')
21 | run: |
22 | # trigger build/publish of latest version
23 | curl \
24 | -X POST \
25 | -H "Authorization: Token $RTDS_MLFLOW_TOKEN" $RTDS_MLFLOW_PROJECT/versions/latest/builds/
26 | # add 15 minutes wait time for readthedocs see freshly created tag
27 | sleep 15m
28 | # trigger build/publish of v*.*.* version
29 | curl \
30 | -X POST \
31 | -H "Authorization: Token $RTDS_MLFLOW_TOKEN" $RTDS_MLFLOW_PROJECT/versions/${{ github.ref_name }}/builds/
--------------------------------------------------------------------------------
/.github/workflows/run-tests.yml:
--------------------------------------------------------------------------------
1 | name: Run Tests
2 |
3 | on:
4 | pull_request:
5 | paths:
6 | - "container-image/**"
7 | - "oci_mlflow/**"
8 | - "tests/**"
9 | - "**requirements.txt"
10 | - pyproject.toml
11 | # To run this workflow manually from the Actions tab
12 | workflow_dispatch:
13 |
14 | # Cancel in progress workflows on pull_requests.
15 | # https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value
16 | concurrency:
17 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
18 | cancel-in-progress: true
19 |
20 | permissions:
21 | contents: read
22 | pull-requests: write
23 |
24 | # hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359
25 | env:
26 | SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5
27 |
28 | jobs:
29 | test:
30 | name: python ${{ matrix.python-version }}
31 | runs-on: ubuntu-latest
32 | timeout-minutes: 20
33 |
34 | strategy:
35 | fail-fast: false
36 | matrix:
37 | python-version: ["3.8", "3.9", "3.10"]
38 | include:
39 | - python-version: "3.9"
40 | cov-reports: --cov=oci_mlflow --cov-report=xml --cov-report=html
41 |
42 | steps:
43 | - uses: actions/checkout@v4
44 |
45 | # Caching python libraries installed with pip
46 | # https://github.com/actions/cache/blob/main/examples.md#python---pip
47 | - uses: actions/cache@v4
48 | with:
49 | path: ~/.cache/pip
50 | key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }}
51 | restore-keys: |
52 | ${{ runner.os }}-pip-
53 | - uses: actions/setup-python@v5
54 | with:
55 | python-version: ${{ matrix.python-version }}
56 |
57 | - name: "Test config setup"
58 | shell: bash
59 | env:
60 | HOME_RUNNER_DIR: /home/runner
61 | run: |
62 | set -x # print commands that are executed
63 | mkdir -p "$HOME_RUNNER_DIR"/.oci
64 | openssl genrsa -out $HOME_RUNNER_DIR/.oci/oci_mlflow_user.pem 2048
65 | cat <> "$HOME_RUNNER_DIR/.oci/config"
66 | [DEFAULT]
67 | user=ocid1.user.oc1..xxx
68 | fingerprint=00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00
69 | tenancy=ocid1.tenancy.oc1..xxx
70 | region=test_region
71 | key_file=$HOME_RUNNER_DIR/.oci/oci_mlflow_user.pem
72 | EOT
73 | ls -lha "$HOME_RUNNER_DIR"/.oci
74 | echo "Test config file:"
75 | cat $HOME_RUNNER_DIR/.oci/config
76 |
77 | - name: "Run tests"
78 | timeout-minutes: 5
79 | shell: bash
80 | run: |
81 | set -x # print commands that are executed
82 | $CONDA/bin/conda init
83 | source /home/runner/.bashrc
84 | pip install -r test-requirements.txt
85 | python -m pytest ${{ matrix.cov-reports }} tests
86 |
87 | - name: "Calculate coverage"
88 | if: ${{ success() }} && ${{ github.event.issue.pull_request }}
89 | run: |
90 | set -x # print commands that are executed
91 |
92 | # Prepare default cov body text
93 | COV_BODY_INTRO="📌 Overall coverage:\n\n"
94 | echo COV_BODY="$COV_BODY_INTRO No success to gather report. 😿" >> $GITHUB_ENV
95 |
96 | # Calculate overall coverage and update body message
97 | COV=$(grep -E 'pc_cov' htmlcov/index.html | cut -d'>' -f 2 | cut -d'%' -f 1)
98 | if [[ ! -z $COV ]]; then
99 | ROUNDED_COV=$(echo $COV | cut -d'.' -f 1)
100 | if [[ $ROUNDED_COV -lt 50 ]]; then COLOR=red; elif [[ $ROUNDED_COV -lt 80 ]]; then COLOR=yellow; else COLOR=green; fi
101 | echo COV_BODY="$COV_BODY_INTRO " >> $GITHUB_ENV
102 | fi
103 |
104 | - name: "Add comment with coverage info to PR"
105 | uses: actions/github-script@v7
106 | if: ${{ success() }} && ${{ github.event.issue.pull_request }}
107 | with:
108 | github-token: ${{ github.token }}
109 | script: |
110 | github.rest.issues.createComment({
111 | issue_number: context.issue.number,
112 | owner: context.repo.owner,
113 | repo: context.repo.repo,
114 | body: '${{ env.COV_BODY }}'
115 | })
116 |
117 | - name: "Save coverage files"
118 | uses: actions/upload-artifact@v4
119 | if: ${{ matrix.cov-reports }}
120 | with:
121 | name: cov-reports
122 | path: |
123 | htmlcov/
124 | .coverage
125 | coverage.xml
126 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | docs/build/
14 | docs/docs_html/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | ads_latest.zip
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 | docs/build/
71 | docs/dask-worker-space/
72 |
73 | # PyBuilder
74 | target/
75 |
76 | # Jupyter Notebook
77 | .ipynb_checkpoints
78 |
79 | # pyenv
80 | .python-version
81 |
82 | # celery beat schedule file
83 | celerybeat-schedule
84 |
85 | # SageMath parsed files
86 | *.sage.py
87 |
88 | # dotenv
89 | .env
90 |
91 | # virtualenv
92 | .venv
93 | venv/
94 | ENV/
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # Rope project settings
101 | .ropeproject
102 |
103 | # mkdocs documentation
104 | /site
105 |
106 | # mypy
107 | .mypy_cache/
108 |
109 | ### Linux ###
110 | *~
111 |
112 | # IntelliJ/Pycharm settings
113 | *.iml
114 | .idea
115 | .vscode/
116 |
117 | # Mac OS metadata
118 | .DS_Store
119 | .AppleDouble
120 | .LSOverride
121 |
122 | # dask related
123 | dask-worker-space
124 |
125 | #sqllite databases
126 | *.db
127 |
128 | Untitled*.ipynb
129 | *.lock
130 | *.dirlock
131 |
132 |
133 | # test results
134 | testresults.xml
135 |
136 |
137 |
138 | # html reports of covered code by pytests and
139 | # execution data collected by coverage.py
140 | *coverage_html_report*
141 | .coverage
142 | .coverage*
143 | **.sqlite
144 |
145 | extensions/**/node_modules/
146 | extensions/**/*.tgz
147 | logs/
148 | **/node_modules
149 |
150 | # side-effects of running notebooks
151 | **/.mlx_static/**
152 |
153 | # vim
154 | *.swp
155 |
156 | *.whl
157 | .env
158 | local_workarea
159 | mlruns
160 | tmp
161 | conda
162 |
163 | docs/build
164 |
--------------------------------------------------------------------------------
/.gitleaks.toml:
--------------------------------------------------------------------------------
1 | title = "Gitleaks Config"
2 |
3 | # Gitleaks feature, extending the existing base config from:
4 | # https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml
5 | [extend]
6 | useDefault = true
7 |
8 | # Allowlist's 'stopwords' and 'regexes' excludes any secrets or mathching patterns from the current repository.
9 | # Paths listed in allowlist will not be scanned.
10 | [allowlist]
11 | description = "Global allow list"
12 | stopwords = ["test_password", "sample_key"]
13 | regexes = [
14 | '''example-password''',
15 | '''this-is-not-the-secret''',
16 | ''''''
17 | ]
18 |
19 | # Describe rule to search real ocids
20 | [[rules]]
21 | description = "Real ocids"
22 | id = "ocid"
23 | regex = '''ocid[123]\.[a-z1-9A-Z]*\.oc\d\.[a-z1-9A-Z]*\.[a-z1-9A-Z]+'''
24 | keywords = [
25 | "ocid"
26 | ]
27 |
28 | # Describe rule to search generic secrets
29 | [[rules]]
30 | description = "Generic secret"
31 | id = "generic-secret"
32 | regex = '''(?i)((key|api|token|secret|passwd|password|psw|pass|pswd)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z!@#$%^&*<>\\\-_.=]{3,100})['\"]'''
33 | entropy = 0
34 | secretGroup = 4
35 | keywords = [
36 | "key","api","token","secret","passwd","password", "psw", "pass", "pswd"
37 | ]
38 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | # ruff
3 | - repo: https://github.com/astral-sh/ruff-pre-commit
4 | rev: v0.4.9
5 | hooks:
6 | - id: ruff
7 | types_or: [ python, pyi, jupyter ]
8 | args: [ --fix ]
9 | files: ^ads
10 | exclude: ^docs/
11 | - id: ruff-format
12 | types_or: [ python, pyi, jupyter ]
13 | exclude: ^docs/
14 | # Standard hooks
15 | - repo: https://github.com/pre-commit/pre-commit-hooks
16 | rev: v4.4.0
17 | hooks:
18 | - id: check-ast
19 | exclude: ^docs/
20 | - id: check-docstring-first
21 | exclude: ^(docs/|tests/)
22 | - id: check-json
23 | - id: check-merge-conflict
24 | - id: check-yaml
25 | args: ['--allow-multiple-documents']
26 | - id: detect-private-key
27 | - id: end-of-file-fixer
28 | - id: pretty-format-json
29 | args: ['--autofix']
30 | - id: trailing-whitespace
31 | args: [ --markdown-linebreak-ext=md ]
32 | exclude: ^docs/
33 | # Regex based rst files common mistakes detector
34 | - repo: https://github.com/pre-commit/pygrep-hooks
35 | rev: v1.10.0
36 | hooks:
37 | - id: rst-backticks
38 | files: ^docs/
39 | - id: rst-inline-touching-normal
40 | files: ^docs/
41 | # Hardcoded secrets and ocids detector
42 | - repo: https://github.com/gitleaks/gitleaks
43 | rev: v8.17.0
44 | hooks:
45 | - id: gitleaks
46 | # Oracle copyright checker
47 | - repo: https://github.com/oracle-samples/oci-data-science-ai-samples/
48 | rev: cbe0136
49 | hooks:
50 | - id: check-copyright
51 | name: check-copyright
52 | entry: .pre-commit-scripts/check-copyright.py
53 | language: script
54 | types_or: ['python', 'shell', 'bash']
55 | exclude: ^docs/
56 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 |
3 | # Required
4 | version: 2
5 |
6 | # Set the version of Python and other tools you might need
7 | build:
8 | os: ubuntu-22.04
9 | tools:
10 | python: "3.9"
11 |
12 | # Build documentation in the docs/ directory with Sphinx
13 | sphinx:
14 | configuration: docs/source/conf.py
15 |
16 | # Optionally declare the Python requirements required to build your docs
17 | python:
18 | install:
19 | - requirements: docs/requirements.txt
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to this repository
2 |
3 | We welcome your contributions! There are multiple ways to contribute.
4 |
5 | ## Opening issues
6 |
7 | For bugs or enhancement requests, file a GitHub issue unless it's
8 | security related. When filing a bug, remember that the better written the bug is,
9 | the more likely it is to be fixed. If you think you've found a security
10 | vulnerability, don't raise a GitHub issue and follow the instructions in our
11 | [security policy](./SECURITY.md).
12 |
13 | ## Contributing code
14 |
15 | We welcome your code contributions. Before submitting code using a pull request,
16 | you must sign the [Oracle Contributor Agreement](https://oca.opensource.oracle.com) (OCA) and
17 | your commits need to include the following line using the name and e-mail
18 | address you used to sign the OCA:
19 |
20 | ```text
21 | Signed-off-by: Your Name
22 | ```
23 |
24 | This can be automatically added to pull requests by committing with `--sign-off`
25 | or `-s`, for example:
26 |
27 | ```text
28 | git commit --signoff
29 | ```
30 |
31 | Only pull requests from committers that can be verified as having signed the OCA
32 | are accepted.
33 |
34 | ## Pull request process
35 |
36 | 1. Ensure there is an issue created to track and discuss the fix or enhancement
37 | you intend to submit.
38 | 2. Fork this repository.
39 | 3. Create a branch in your fork to implement the changes. We recommend using
40 | the issue number as part of your branch name, for example `1234-fixes`.
41 | 4. Ensure that any documentation is updated with the changes that are required
42 | by your change.
43 | 5. Ensure that any samples are updated if the base image has been changed.
44 | 6. Submit the pull request. *Don't leave the pull request blank*. Explain exactly
45 | what your changes are meant to do and provide simple steps about how to validate
46 | your changes. Ensure that you reference the issue you created as well.
47 | 7. We assign the pull request to 2-3 people for review before it is merged.
48 |
49 | ## Code of conduct
50 |
51 | Follow the [Golden Rule](https://en.wikipedia.org/wiki/Golden_Rule). If you'd
52 | like more specific guidelines, see the
53 | [Contributor Covenant Code of Conduct](https://www.contributor-covenant.org/version/1/4/code-of-conduct/).
54 |
--------------------------------------------------------------------------------
/ISSUE_POLICY.md:
--------------------------------------------------------------------------------
1 | # Issue Policy
2 |
3 | The OCI MLflow Issue Policy outlines the categories of OCI MLflow GitHub issues and discusses the guidelines and processes associated with each type of issue.
4 |
5 | Before filing an issue, make sure to [search for related issues](https://github.com/oracle/oci-mlflow/issues) and check if they address the same problem you're encountering.
6 |
7 | ## Issue Categories
8 |
9 | Our policy states that GitHub issues fall into the following categories:
10 |
11 | 1. Feature Requests
12 | 2. Bug Reports
13 | 3. Documentation Fixes
14 | 4. Installation Issues
15 |
16 | Each category has its own GitHub issue template. Please refrain from deleting the issue template unless you are certain that your issue does not fit within its scope.
17 |
18 | ### Feature Requests
19 |
20 | #### Guidelines
21 |
22 | To increase the likelihood of having a feature request accepted, please ensure that:
23 |
24 | - The request has a minimal scope (note that it's easier to add additional functionality later than to remove functionality).
25 | - The request has a significant impact on users and provides value that justifies the maintenance efforts required to support the feature in the future.
26 |
27 | #### Lifecycle
28 |
29 | Feature requests typically go through the following stages:
30 |
31 | 1. Submit a feature request GitHub Issue, providing a brief overview of the proposal and its motivation. If possible, include an implementation overview as well.
32 | 2. The issue will be triaged to determine if more information is needed from the author, assign a priority, and route the request to the appropriate committers.
33 | 3. Discuss the feature request with a committer who will provide input on the implementation overview or request a more detailed design if necessary.
34 | 4. Once there is agreement on the feature request and its implementation, an implementation owner will be assigned.
35 | 5. The implementation owner will start developing the feature and ultimately submit associated pull requests to the OCI MLflow Repository.
36 |
37 | ### Bug Reports
38 |
39 | #### Guidelines
40 |
41 | To ensure that maintainers can effectively assist with any reported bugs, please follow these guidelines:
42 |
43 | - Fill out the bug report template completely, providing appropriate levels of detail, especially in the "Code to reproduce issue" section.
44 | - Verify that the bug you are reporting meets one of the following criteria:
45 | - It is a regression where a recent release of OCI MLflow no longer supports an operation that was supported in an earlier release.
46 | - A documented feature or functionality does not work as intended when executing a provided example from the documentation.
47 | - Any raised exception is directly from OCI MLflow and not the result of an underlying package's exception.
48 | - Make an effort to diagnose and troubleshoot the issue before filing the report.
49 | - Ensure that the environment in which you encountered the bug is supported as defined in the documentation.
50 | - Validate that OCI MLflow supports the functionality you are experiencing issues with. Remember that the absence of a feature does not constitute a bug.
51 | - Read the documentation for the feature related to the issue you are reporting. If you are certain that you are following the documented guidelines, please file a bug report.
52 |
53 | #### Lifecycle
54 |
55 | Bug reports typically go through the following stages:
56 |
57 | 1. Submit a bug report GitHub Issue, providing a high-level description of the bug and all the necessary information to reproduce it.
58 | 2. The bug report will be triaged to determine if more information is required from the author, assign a priority, and route the issue to the appropriate committers.
59 | 3. An OCI MLflow committer will reproduce the bug and provide feedback on how to implement a fix.
60 | 4. Once an approach has been agreed upon, an owner for the fix will be assigned. For severe bugs, OCI MLflow committers may choose to take ownership to ensure a timely resolution.
61 | 5. The fix owner will start implementing the solution and ultimately submit associated pull requests.
62 |
63 | ### Documentation Fixes
64 |
65 | #### Lifecycle
66 |
67 | Documentation issues typically go through the following stages:
68 |
69 | 1. Submit a documentation GitHub Issue, describing the issue and indicating its location(s) in the OCI MLflow documentation.
70 | 2. The issue will be triaged to determine if more information is needed from the author, assign a priority, and route the request to the appropriate committers.
71 | 3. An OCI MLflow committer will confirm the documentation issue and provide feedback on how to implement a fix.
72 | 4. Once an approach has been agreed upon, an owner for the fix will be assigned. For severe documentation issues, OCI MLflow committers may choose to take ownership to ensure a timely resolution.
73 | 5. The fix owner will start implementing the solution and ultimately submit associated pull requests.
74 |
75 | ### Installation Issues
76 |
77 | #### Lifecycle
78 |
79 | Installation issues typically go through the following stages:
80 |
81 | 1. Submit an installation GitHub Issue, describing the issue and indicating the platforms it affects.
82 | 2. The issue will be triaged to determine if more information is needed from the author, assign a priority, and route the issue to the appropriate committers.
83 | 3. An OCI MLflow committer will confirm the installation issue and provide feedback on how to implement a fix.
84 | 4. Once an approach has been agreed upon, an owner for the fix will be assigned. For severe installation issues, OCI MLflow committers may choose to take ownership to ensure a timely resolution.
85 | 5. The fix owner will start implementing the solution and ultimately submit associated pull requests.
86 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2023 Oracle and/or its affiliates. All rights reserved.
2 |
3 | The Universal Permissive License (UPL), Version 1.0
4 |
5 | Subject to the condition set forth below, permission is hereby granted to any
6 | person obtaining a copy of this software, associated documentation and/or data
7 | (collectively the "Software"), free of charge and under any and all copyright
8 | rights in the Software, and any and all patent rights owned or freely
9 | licensable by each licensor hereunder covering either (i) the unmodified
10 | Software as contributed to or provided by such licensor, or (ii) the Larger
11 | Works (as defined below), to deal in both
12 |
13 | (a) the Software, and
14 | (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
15 | one is included with the Software (each a "Larger Work" to which the Software
16 | is contributed by such licensors),
17 |
18 | without restriction, including without limitation the rights to copy, create
19 | derivative works of, display, perform, and distribute the Software and make,
20 | use, sell, offer for sale, import, export, have made, and have sold the
21 | Software and the Larger Work(s), and to sublicense the foregoing rights on
22 | either these or other terms.
23 |
24 | This license is subject to the following condition:
25 | The above copyright notice and either this complete permission notice or at
26 | a minimum a reference to the UPL must be included in all copies or
27 | substantial portions of the Software.
28 |
29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 | SOFTWARE.
36 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include THIRD_PARTY_LICENSES.txt
3 | include oci_mlflow/version.json
4 | include oci_mlflow/templates/*.jinja2
5 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | -include .env
2 |
3 | .PHONY: clean dist wheel
4 |
5 | TAG:=latest
6 | IMAGE_NAME:=oci-mlflow
7 | CONTAINERDIR:=container-image
8 | RND:=1
9 |
10 | clean:
11 | @rm -rf dist build oci_mlflow.egg-info $(CONTAINERDIR)/run/*.whl
12 | @find ./ -name '*.pyc' -exec rm -f {} \;
13 | @find ./ -name 'Thumbs.db' -exec rm -f {} \;
14 | @find ./ -name '*~' -exec rm -f {} \;
15 |
16 | dist: clean
17 | @python -m build
18 |
19 | build-image:
20 | docker build --network host --build-arg RND=$(RND) -t $(IMAGE_NAME):$(TAG) -f container-image/Dockerfile .
21 | $(MAKE) clean
22 |
23 | launch: build-image
24 | @docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --name oci-mlflow $(IMAGE_NAME):$(TAG)
25 |
26 | launch-shell: build-image
27 | @docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --entrypoint bash --name oci-mlflow-shell $(IMAGE_NAME):$(TAG)
28 |
29 | wheel: dist
30 | @cp dist/*.whl container-image/run/
31 |
--------------------------------------------------------------------------------
/README-development.md:
--------------------------------------------------------------------------------
1 | # Development
2 | The target audience for this README is developers wanting to contribute to `OCI MLflow` plugins. If you want to use the OCI MLflow plugins with your own programs, see `README.md`.
3 |
4 | ## Setting Up Dependencies
5 |
6 | ```
7 | python3 -m pip install -r dev-requirements.txt
8 | ```
9 |
10 | # Generating the wheel
11 | The OCI MLflow plugins are packaged as a wheel. To generate the wheel, you can run:
12 |
13 | ```
14 | make dist
15 | ```
16 |
17 | Alternatively you can run -
18 |
19 | ```
20 | python -m build
21 | ```
22 |
23 | This wheel can then be installed using pip.
24 |
25 |
26 | ## Setting Up Tracking Server
27 |
28 | Create a file called `.env` in the root folder of the project with following contents -
29 |
30 | ```
31 | # Defaults to resource_principal if not provided
32 | OCIFS_IAM_TYPE=api_key
33 |
34 | # Artifacts location. Can be local folder or OCI Object Storage bucket
35 | MLFLOW_ARTIFACTS_DESTINATION=oci://bucket@namespace/
36 | MLFLOW_DEFAULT_ARTIFACT_ROOT=oci://bucket@namespace/
37 |
38 | # Backend provider. Default is sqllite
39 | BACKEND_PROVIDER=sqllite
40 |
41 | # ------MySQL-----------------------
42 | # BACKEND_PROVIDER=mysql
43 |
44 | # The database credentials can be stored in the Vault service, or they can be provided in the config.
45 | # See more details how to save the credentials to the Vault -
46 | # https://accelerated-data-science.readthedocs.io/en/latest/user_guide/secrets/mysql.html
47 |
48 | # DB_SECRET_OCID=ocid1.vaultsecret.oc1.iad..
49 |
50 | # ----OR------------------------------
51 | # MLFLOW_BACKEND_STORE_URI=mysql+mysqlconnector://{username}:{password}@{host}:{db_port}/{db_name}
52 | # ------------------------------------
53 |
54 | MLFLOW_SERVE_ARTIFACTS=1
55 | MLFLOW_GUNICORN_OPTS=--log-level debug
56 | MLFLOW_WORKERS=4
57 | MLFLOW_HOST=0.0.0.0
58 | ```
59 |
60 | ### Building And Running Tracking Server
61 |
62 | To build an `oci-mlflow` container image run -
63 |
64 | ```
65 | make clean build-image
66 | ```
67 |
68 | Alternatively you can run -
69 | ```
70 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile .
71 | ```
72 |
73 | To build and launch tracking server run -
74 |
75 | ```
76 | make clean launch
77 | ```
78 |
79 | Alternatively you can run -
80 | ```
81 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile .
82 | docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --name oci-mlflow:latest
83 | ```
84 |
85 | To build `oci-mlflow` wheel file and then rebuild and launch the container image, run -
86 |
87 | ```
88 | make clean wheel launch
89 | ```
90 |
91 | Alternatively you can run -
92 |
93 | ```
94 | python -m build
95 | cp dist/*.whl container-image/run/
96 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile .
97 | docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --name oci-mlflow oci-mlflow:latest
98 | ```
99 |
100 | To build and start a shell prompt within `oci-mlflow` container image, run -
101 |
102 | ```
103 | make clean wheel launch-shell
104 | ```
105 |
106 | Alternatively you can run -
107 |
108 | ```
109 | python -m build
110 | cp dist/*.whl container-image/run/
111 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile .
112 | docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --entrypoint bash --name oci-mlflow-shell oci-mlflow:latest
113 | ```
114 |
115 | # Running Tests
116 | The SDK uses pytest as its test framework. To run tests use:
117 |
118 | ```
119 | python3 -m pytest tests/*
120 | ```
121 |
122 | # Generating Documentation
123 | Sphinx is used for documentation. You can generate HTML locally with the following:
124 |
125 | ```
126 | python3 -m pip install -r dev-requirements.txt
127 | cd docs
128 | make html
129 | ```
130 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OCI Mlflow Plugin
2 |
3 | [](https://pypi.org/project/oci-mlflow/) [](https://pypi.org/project/oci-mlflow/)
4 |
5 | The OCI MLflow plugin enables OCI users to use OCI resources to manage their machine learning use case life cycle. This table below provides the mapping between the MLflow features and the OCI resources that are used.
6 |
7 | | MLflow Use Case | OCI Resource |
8 | | -------- | ------- |
9 | | User running machine learning experiments on notebook, logs model artifacts, model performance etc | Data Science Jobs, Object Storage, MySQL |
10 | | Batch workloads using spark | Data Flow, Object Storage, MySQL |
11 | | Model Catalog | Data Science Model Catalog |
12 | | Model Deployment | Data Science Model Deployment |
13 | | User running machine learning experiments on notebook, logs model artifacts, model performance etc | Object Storage, MySQL |
14 |
15 |
16 | ## Installation
17 |
18 | To install the `oci-mlflow` plugin call -
19 |
20 | ```bash
21 | python3 -m pip install oci-mlflow
22 | ```
23 |
24 | To test the `oci-mlflow` plugin call -
25 |
26 | ```bash
27 | mlflow deployments help -t oci-datascience
28 | ```
29 |
30 | ## Documentation
31 | - [OCI MLflow Documentation](https://oci-mlflow.readthedocs.io/en/latest/index.html)
32 | - [Getting started with Oracle Accelerated Data Science SDK](https://accelerated-data-science.readthedocs.io/en/latest/index.html)
33 | - [Getting started with OCI Data Science Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm)
34 | - [Getting started with Data Science Environments](https://docs.oracle.com/en-us/iaas/data-science/using/conda_environ_list.htm)
35 | - [Getting started with Custom Conda Environments](https://docs.oracle.com/en-us/iaas/data-science/using/conda_create_conda_env.htm)
36 | - [Getting started with Model Catalog](https://docs.oracle.com/en-us/iaas/data-science/using/models-about.htm)
37 | - [Getting started with Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm)
38 | - [Oracle AI & Data Science Blog](https://blogs.oracle.com/ai-and-datascience/)
39 | - [OCI Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)
40 |
41 | ## Examples
42 | ### Running MLflow projects on the OCI `Data Science jobs` and `Data Flow applications` -
43 |
44 | ```bash
45 | export MLFLOW_TRACKING_URI=
46 | mlflow run . --experiment-name My-Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json
47 | ```
48 |
49 | ### Deploying MLflow models to the OCI Model Deployments -
50 |
51 | ```bash
52 | mlflow deployments help -t oci-datascience
53 |
54 | export MLFLOW_TRACKING_URI=
55 |
56 | mlflow deployments create --name -m models:// -t oci-datascience --config deploy-config-file=deployment_specification.yaml
57 | ```
58 |
59 |
60 | ## Contributing
61 |
62 | This project welcomes contributions from the community. Before submitting a pull request, please[review our contribution guide](./CONTRIBUTING.md)
63 |
64 | Find Getting Started instructions for developers in [README-development.md](https://github.com/oracle/oci-mlflow/blob/main/README-development.md)
65 |
66 | ## Security
67 |
68 | Consult the security guide [SECURITY.md](https://github.com/oracle/oci-mlflow/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
69 |
70 | ## License
71 |
72 | Copyright (c) 2023 Oracle and/or its affiliates. Licensed under the [Universal Permissive License v1.0](https://oss.oracle.com/licenses/upl/)
73 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Reporting security vulnerabilities
2 |
3 | Oracle values the independent security research community and believes that
4 | responsible disclosure of security vulnerabilities helps us ensure the security
5 | and privacy of all our users.
6 |
7 | Please do NOT raise a GitHub Issue to report a security vulnerability. If you
8 | believe you have found a security vulnerability, please submit a report to
9 | [secalert_us@oracle.com][1] preferably with a proof of concept. Please review
10 | some additional information on [how to report security vulnerabilities to Oracle][2].
11 | We encourage people who contact Oracle Security to use email encryption using
12 | [our encryption key][3].
13 |
14 | We ask that you do not use other channels or contact the project maintainers
15 | directly.
16 |
17 | Non-vulnerability related security issues including ideas for new or improved
18 | security features are welcome on GitHub Issues.
19 |
20 | ## Security updates, alerts and bulletins
21 |
22 | Security updates will be released on a regular cadence. Many of our projects
23 | will typically release security fixes in conjunction with the
24 | Oracle Critical Patch Update program. Additional
25 | information, including past advisories, is available on our [security alerts][4]
26 | page.
27 |
28 | ## Security-related information
29 |
30 | We will provide security related information such as a threat model, considerations
31 | for secure use, or any known security issues in our documentation. Please note
32 | that labs and sample code are intended to demonstrate a concept and may not be
33 | sufficiently hardened for production use.
34 |
35 | [1]: mailto:secalert_us@oracle.com
36 | [2]: https://www.oracle.com/corporate/security-practices/assurance/vulnerability/reporting.html
37 | [3]: https://www.oracle.com/security-alerts/encryptionkey.html
38 | [4]: https://www.oracle.com/security-alerts/
39 |
--------------------------------------------------------------------------------
/authorizer/Dockerfile:
--------------------------------------------------------------------------------
1 | # Provide the path of base OS image here.
2 | FROM ghcr.io/oracle/oraclelinux:8-slim
3 |
4 | # Install python and pip.
5 | RUN echo "-internal" > /etc/dnf/vars/ociregion &&\
6 | microdnf repolist &&\
7 | microdnf -y update &&\
8 | microdnf install -y python38 &&\
9 | microdnf install -y python38-pip &&\
10 | microdnf clean all
11 |
12 | COPY requirements.txt ./
13 |
14 | # Rest of this section contains commands to install python dependencies specified in requirement.txt and then setting up python path, docker CMD, Entrypoint etc. These commands should remain same except the pip install which might change depending upon command syntax and the PyPi repository.
15 | RUN /usr/bin/python3.8 -m pip install --default-timeout=100 --target /python/ -r requirements.txt
16 |
17 | WORKDIR /authorizer
18 | COPY src/authorizer.py /authorizer/
19 | COPY src/utils /authorizer/utils
20 |
21 | RUN rm -fr ~/.cache/pip /tmp* .pip_cache /var/cache/ /requirements.txt
22 | RUN mkdir /tmp/
23 |
24 | ENV FN_LISTENER=unix:/tmp/lsnr.sock
25 | ENV FN_FORMAT=http-stream
26 | ENV PYTHONPATH=/authorizer:/python:/authorizer/utils
27 | ENV RP_AUTH=true
28 | ENTRYPOINT ["/python/bin/fdk", "/authorizer/authorizer.py", "authorizer"]
29 |
--------------------------------------------------------------------------------
/authorizer/func.yaml:
--------------------------------------------------------------------------------
1 | schema_version: 20180708
2 | name: authorizer
3 | version: 0.0.1
4 | runtime: docker
5 | entrypoint: /python/bin/fdk /function/authorizer.py authorizer
6 | memory: 256
7 |
--------------------------------------------------------------------------------
/authorizer/requirements.txt:
--------------------------------------------------------------------------------
1 | fdk>=0.1.48
2 | oci>=2.85.0
--------------------------------------------------------------------------------
/authorizer/src/authorizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | import io
7 | import json
8 | import logging
9 | import copy
10 |
11 | import fdk.response
12 | from fdk import context, response
13 |
14 | from authorizer.src.utils.identity_utils import (
15 | ExtendedIdentityDataPlaneClient,
16 | AuthenticationException,
17 | AuthorizationException
18 | )
19 |
20 | from authorizer.src.utils.auth_utils import (
21 | get_signer,
22 | SignerType,
23 | do_authn,
24 | do_authz,
25 | get_group_ids_from_config
26 | )
27 |
28 | from authorizer.src.utils.header_utils import (
29 | extract_and_validate_headers,
30 | AuthorizationHeaderMissingException,
31 | MissingRequiredHeadersException
32 | )
33 |
34 | logger = logging.getLogger(__name__)
35 |
36 | # Initialising here so that this call is cached for future fn executions
37 | identity_client = ExtendedIdentityDataPlaneClient(
38 | config={},
39 | signer=get_signer(SignerType.AUTO)
40 | )
41 |
42 | # The rest api methods currently supported by mlflow
43 | # https://mlflow.org/docs/latest/rest-api.html
44 | MLFLOW_REST_API_METHODS = ["post", "get", "delete", "patch", "put"]
45 |
46 |
47 | def authorizer(ctx: context.InvokeContext, data: io.BytesIO = None) -> fdk.response.Response:
48 | """Performs authn and authz for given data.
49 |
50 | Parameters
51 | ----------
52 | ctx: InvokeContext
53 | An instance of InvokeContext.
54 | data: BytesIO
55 | Data in BytesIO format.
56 |
57 | Returns
58 | -------
59 | Response
60 | An instance of Response.
61 | """
62 | try:
63 | headers = extract_and_validate_headers(data.getvalue())
64 | except (
65 | AuthorizationHeaderMissingException,
66 | MissingRequiredHeadersException
67 | ):
68 | return response.Response(
69 | ctx, status_code=401, response_data=json.dumps(
70 | {
71 | "active": False,
72 | "wwwAuthenticate": "Signature"
73 | }
74 | )
75 | )
76 | path_segment = copy.deepcopy(headers["(request-target)"])
77 | principal = None
78 | for method in MLFLOW_REST_API_METHODS:
79 | headers["(request-target)"] = [method + " " + path_segment[0]]
80 | try:
81 | principal = do_authn(identity_client, headers)
82 | except AuthenticationException:
83 | pass
84 |
85 | if principal:
86 | try:
87 | do_authz(
88 | identity_client,
89 | principal,
90 | get_group_ids_from_config(ctx.Config())
91 | )
92 |
93 | return response.Response(
94 | ctx, status_code=200, response_data=json.dumps(
95 | {
96 | "active": True,
97 | "context": {
98 | "subjectId": principal.subject_id
99 | }
100 | }
101 | )
102 | )
103 | except AuthorizationException as ex:
104 | logger.error('Error occurred while performing authZ: %s', str(ex))
105 |
106 | return response.Response(
107 | ctx, status_code=401, response_data=json.dumps(
108 | {
109 | "active": False,
110 | "wwwAuthenticate": "Signature"
111 | }
112 | )
113 | )
114 |
--------------------------------------------------------------------------------
/authorizer/src/utils/auth_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | import os
7 | from enum import Enum
8 | from typing import Dict, List
9 |
10 | from oci.auth.signers import (
11 | InstancePrincipalsSecurityTokenSigner,
12 | get_resource_principals_signer
13 | )
14 | from oci.identity_data_plane.models import (
15 | AuthenticateClientDetails,
16 | AuthenticateClientResult,
17 | FilterGroupMembershipDetails,
18 | FilterGroupMembershipResult
19 | )
20 | from oci.identity_data_plane.models.principal import Principal
21 |
22 | from authorizer.src.utils.identity_utils import (
23 | AuthenticationException,
24 | AuthorizationException,
25 | ExtendedIdentityDataPlaneClient
26 | )
27 |
28 | ALLOWED_GROUP_IDS = "GROUP_IDS"
29 |
30 |
31 | class SignerType(Enum):
32 | AUTO = 0
33 | RESOURCE_PRINCIPAL = 1
34 | INSTANCE_PRINCIPAL = 2
35 |
36 |
37 | def get_signer(signer_type: SignerType = SignerType.AUTO):
38 | """Gets the corresponding signer from signer type.
39 |
40 | Parameters
41 | ----------
42 | signer_type: integer
43 | The signer type
44 |
45 | Returns
46 | -------
47 | Signer:
48 | An instance of Signer.
49 | """
50 | if signer_type == SignerType.AUTO:
51 | if _get_env_bool("RP_AUTH", False):
52 | signer_type = SignerType.RESOURCE_PRINCIPAL
53 | else:
54 | signer_type = SignerType.INSTANCE_PRINCIPAL
55 | if signer_type == SignerType.RESOURCE_PRINCIPAL:
56 | return get_resource_principals_signer()
57 | else:
58 | return _get_internal_instance_principal_signer()
59 |
60 |
61 | def do_authn(
62 | identity_client: ExtendedIdentityDataPlaneClient,
63 | headers: Dict[str, List[str]]
64 | ) -> Principal:
65 | """Performs the authn validation from given headers.
66 |
67 | Parameters
68 | ----------
69 | identity_client: ExtendedIdentityDataPlaneClient
70 | An instance of ExtendedIdentityDataPlaneClient
71 | headers: dict
72 | A dict of headers to be authenticated
73 |
74 | Returns
75 | -------
76 | Principal:
77 | An instance of Principal
78 | """
79 | client_details = AuthenticateClientDetails()
80 | client_details.request_headers = headers
81 | authenticate_response = identity_client.authenticate_client(authenticate_client_details=client_details)
82 | authenticate_result: AuthenticateClientResult = authenticate_response.data
83 | if authenticate_result.principal is None:
84 | raise AuthenticationException(authenticate_response.status, authenticate_result.error_message)
85 | return authenticate_result.principal
86 |
87 |
88 | def do_authz(
89 | identity_client: ExtendedIdentityDataPlaneClient,
90 | principal: Principal,
91 | expected_group_ids: List[str]
92 | ) -> List[str]:
93 | """Performs the authz validation from principal and expected group ids.
94 |
95 | Parameters
96 | ----------
97 | identity_client: ExtendedIdentityDataPlaneClient
98 | An instance of ExtendedIdentityDataPlaneClient.
99 | principal: Principal
100 | An instance of Principal.
101 | expected_group_ids: list
102 | A list of allowed group ids.
103 |
104 | Returns
105 | -------
106 | List:
107 | A list of allowed group ids.
108 | """
109 | filter_group_membership_details = FilterGroupMembershipDetails()
110 | filter_group_membership_details.principal = principal
111 | filter_group_membership_details.group_ids = expected_group_ids
112 | membership_response = identity_client.filter_group_membership(filter_group_membership_details)
113 | membership_result: FilterGroupMembershipResult = membership_response.data
114 | if not set(expected_group_ids).intersection(membership_result.group_ids):
115 | raise AuthorizationException(membership_response.status, expected_group_ids, principal.subject_id)
116 | return membership_result.group_ids
117 |
118 |
119 | def get_group_ids_from_config(config: Dict) -> List[str]:
120 | """Gets group ids from config.
121 |
122 | Parameters
123 | ----------
124 | config: dict
125 | A dict of configurations
126 |
127 | Returns
128 | -------
129 | List
130 | A list of group ids seperated in the original string by ','
131 | """
132 | return config.get(ALLOWED_GROUP_IDS, "").replace(" ", "").split(",")
133 |
134 |
135 | def _get_internal_instance_principal_signer() -> InstancePrincipalsSecurityTokenSigner:
136 | """Overrides metadata url of InstancePrincipalSigner class"""
137 | override_metadata_url = os.getenv(
138 | "METADATA_OVERRIDE_URL",
139 | InstancePrincipalsSecurityTokenSigner.METADATA_URL_BASE
140 | )
141 | InstancePrincipalsSecurityTokenSigner.METADATA_URL_BASE = override_metadata_url
142 | InstancePrincipalsSecurityTokenSigner.GET_REGION_URL = \
143 | '{}/instance/region'.format(override_metadata_url)
144 | InstancePrincipalsSecurityTokenSigner.LEAF_CERTIFICATE_URL = \
145 | '{}/identity/cert.pem'.format(override_metadata_url)
146 | InstancePrincipalsSecurityTokenSigner.LEAF_CERTIFICATE_PRIVATE_KEY_URL = \
147 | '{}/identity/key.pem'.format(override_metadata_url)
148 | InstancePrincipalsSecurityTokenSigner.INTERMEDIATE_CERTIFICATE_URL = \
149 | '{}/identity/intermediate.pem'.format(override_metadata_url)
150 | return InstancePrincipalsSecurityTokenSigner()
151 |
152 |
153 | def _get_env_bool(env_var: str, default: bool = False) -> bool:
154 | env_val = os.getenv(env_var)
155 |
156 | if env_val is None:
157 | return default
158 |
159 | return env_val.lower() in ("true","t","1")
160 |
--------------------------------------------------------------------------------
/authorizer/src/utils/header_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | import json
7 | from typing import Dict, List
8 | from urllib import request
9 |
10 | HEADERS_JSON_KEY = "data"
11 | AUTHORIZATION_KEY = "authorization"
12 | SIGNATURE_HEADERS_KEY = "headers"
13 |
14 |
15 | class MissingRequiredHeadersException(Exception):
16 | def __init__(
17 | self,
18 | required_headers: List[str],
19 | provided_headers: List[str]
20 | ):
21 | self.required_headers = required_headers
22 | self.provided_headers = provided_headers
23 |
24 | def __str__(self):
25 | return "Headers required for authentication were not provided.\nProvided headers: {0}\nRequired headers: {1} \
26 | \nMissing headers: {2}".format(self.provided_headers, self.required_headers, set(self.required_headers)
27 | .difference(set(self.provided_headers)))
28 |
29 |
30 | class AuthorizationHeaderMissingException(Exception):
31 | def __str__(self):
32 | return "Expected the authorization header to be present, but it was not found"
33 |
34 |
35 | class AuthorizationSigningHeadersMissingException(Exception):
36 | def __str__(self):
37 | return "Headers used to sign request was not present in authorization header"
38 |
39 |
40 | def extract_and_validate_headers(data: bytes) -> Dict[str, List[str]]:
41 | """ Extracts headers from json document passed by APIGW and outputs in format required
42 | by authenticate client api
43 |
44 | input: {
45 | "type": "USER_DEFINED",
46 | "data": {
47 | "": "",
48 | "": "",
49 | "": ""
50 | }
51 | }
52 | output: {
53 | "" : [],
54 | "": []
55 | }
56 |
57 | Parameters
58 | ----------
59 | data: bytes
60 | Input data in bytes.
61 |
62 | Returns
63 | -------
64 | Dict:
65 | A string-list dict.
66 | """
67 | headers = json.loads(data).get(HEADERS_JSON_KEY)
68 | headers = {str.lower(k): [v] for k, v in headers.items()}
69 | if not headers.get("date") and headers.get("x-date"):
70 | headers["date"] = headers["x-date"]
71 | try:
72 | required_headers = _get_required_headers_from_signature(headers.get(AUTHORIZATION_KEY)[0])
73 | except TypeError:
74 | raise AuthorizationHeaderMissingException()
75 | try:
76 | return {key: headers[key] for key in required_headers}
77 | except KeyError:
78 | raise MissingRequiredHeadersException(required_headers, provided_headers=list(headers.keys()))
79 |
80 |
81 | def _get_required_headers_from_signature(auth_header: str) -> List[str]:
82 | """Extracts headers required to validate from authorization header:
83 |
84 | input: 'Signature algorithm="rsa-sha256", headers="date (request-target) host",keyId="",
85 | signature=""'
86 | output: ['date', '(request-target)', 'host', 'authorization']
87 |
88 | Parameters
89 | ----------
90 | auth_header: str
91 | The auth header string.
92 |
93 | Returns
94 | -------
95 | List:
96 | A list of headers from auth string.
97 | """
98 | kv = request.parse_keqv_list(request.parse_http_list(auth_header))
99 | signing_headers = kv.get(SIGNATURE_HEADERS_KEY)
100 | if signing_headers is None:
101 | raise AuthorizationSigningHeadersMissingException()
102 | required_headers = signing_headers.split(" ")
103 | required_headers.append(AUTHORIZATION_KEY)
104 | return list(map(str.lower, required_headers))
105 |
--------------------------------------------------------------------------------
/authorizer/src/utils/identity_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | from typing import List
7 |
8 | import six
9 | from oci import retry
10 | from oci.identity_data_plane import DataplaneClient
11 | from oci.identity_data_plane.models import (
12 | AuthenticateClientDetails,
13 | FilterGroupMembershipDetails
14 | )
15 | from oci.response import Response
16 |
17 |
18 | class ExtendedIdentityDataPlaneClient(DataplaneClient):
19 | """Identity Dataplane Client with additional API support for authn/authz validation."""
20 |
21 | def authenticate_client(
22 | self,
23 | authenticate_client_details: AuthenticateClientDetails,
24 | **kwargs
25 | ) -> Response:
26 | """Performs authn validation from oci backend.
27 |
28 | Parameters
29 | ----------
30 | authenticate_client_details: AuthenticateClientDetails
31 | An instance of AuthenticateClientDetails to send to oci backend.
32 | kwargs:
33 | retry_strategy: obj
34 | A retry strategy to apply to all calls made by this service client (i.e. at the client level).
35 | There is no retry strategy applied by default.
36 |
37 | Returns
38 | -------
39 | Response:
40 | An instance of Response
41 | """
42 | resource_path = "/authentication/authenticateClient"
43 | method = "POST"
44 | operation_name = "authenticate_client"
45 | api_reference_link = ""
46 | expected_kwargs = ["retry_strategy"]
47 | extra_kwargs = [_key for _key in six.iterkeys(kwargs) if _key not in expected_kwargs]
48 | if extra_kwargs:
49 | raise ValueError(
50 | "authenticate_client got unknown kwargs: {!r}".format(extra_kwargs))
51 |
52 | header_params = {
53 | "accept": "application/json",
54 | "content-type": "application/json"
55 | }
56 |
57 | retry_strategy = self.base_client.get_preferred_retry_strategy(
58 | operation_retry_strategy=kwargs.get('retry_strategy'),
59 | client_retry_strategy=self.retry_strategy
60 | )
61 |
62 | if retry_strategy:
63 | if not isinstance(retry_strategy, retry.NoneRetryStrategy):
64 | self.base_client.add_opc_client_retries_header(header_params)
65 | retry_strategy.add_circuit_breaker_callback(self.circuit_breaker_callback)
66 | return retry_strategy.make_retrying_call(
67 | self.base_client.call_api,
68 | resource_path=resource_path,
69 | method=method,
70 | header_params=header_params,
71 | body=authenticate_client_details,
72 | response_type="AuthenticateClientResult",
73 | allow_control_chars=None,
74 | operation_name=operation_name,
75 | api_reference_link=api_reference_link)
76 | else:
77 | return self.base_client.call_api(
78 | resource_path=resource_path,
79 | method=method,
80 | header_params=header_params,
81 | body=authenticate_client_details,
82 | response_type="AuthenticateClientResult",
83 | allow_control_chars=None,
84 | operation_name=operation_name,
85 | api_reference_link=api_reference_link)
86 |
87 | def filter_group_membership(
88 | self,
89 | filter_membership_details: FilterGroupMembershipDetails,
90 | **kwargs
91 | ) -> Response:
92 | """Validates if given group ids are authorized from oci backend.
93 |
94 | Parameters
95 | ----------
96 | filter_membership_details: FilterGroupMembershipDetails
97 | An instance of FilterGroupMembershipDetails to send to oci backend.
98 | kwargs:
99 | retry_strategy: obj
100 | A retry strategy to apply to all calls made by this service client (i.e. at the client level).
101 | There is no retry strategy applied by default.
102 |
103 | Returns
104 | -------
105 | Response:
106 | An instance of Response
107 | """
108 | resource_path = "/filterGroupMembership"
109 | method = "POST"
110 | operation_name = "filter_group_membership"
111 | api_reference_link = ""
112 | expected_kwargs = ["retry_strategy"]
113 |
114 | extra_kwargs = [_key for _key in six.iterkeys(kwargs) if _key not in expected_kwargs]
115 | if extra_kwargs:
116 | raise ValueError(
117 | "filter_group_membership got unknown kwargs: {!r}".format(extra_kwargs))
118 |
119 | header_params = {
120 | "accept": "application/json",
121 | "content-type": "application/json"
122 | }
123 |
124 | retry_strategy = self.base_client.get_preferred_retry_strategy(
125 | operation_retry_strategy=kwargs.get('retry_strategy'),
126 | client_retry_strategy=self.retry_strategy
127 | )
128 |
129 | if retry_strategy:
130 | if not isinstance(retry_strategy, retry.NoneRetryStrategy):
131 | self.base_client.add_opc_client_retries_header(header_params)
132 | retry_strategy.add_circuit_breaker_callback(self.circuit_breaker_callback)
133 | return retry_strategy.make_retrying_call(
134 | self.base_client.call_api,
135 | resource_path=resource_path,
136 | method=method,
137 | header_params=header_params,
138 | body=filter_membership_details,
139 | response_type="FilterGroupMembershipResult",
140 | allow_control_chars=None,
141 | operation_name=operation_name,
142 | api_reference_link=api_reference_link)
143 | else:
144 | return self.base_client.call_api(
145 | resource_path=resource_path,
146 | method=method,
147 | header_params=header_params,
148 | body=filter_membership_details,
149 | response_type="FilterGroupMembershipResult",
150 | allow_control_chars=None,
151 | operation_name=operation_name,
152 | api_reference_link=api_reference_link)
153 |
154 |
155 | class AuthenticationException(Exception):
156 | def __init__(self, status_code: int, error_msg: str):
157 | self.status_code = status_code
158 | self.error_msg = error_msg
159 |
160 | def __str__(self):
161 | return "Could not authenticate client: Status code: {0}, Error Message: {1}".format(
162 | self.status_code, self.error_msg)
163 |
164 |
165 | class AuthorizationException(Exception):
166 | def __init__(self, status_code: int, expected_group_ids: List[str], subject: str):
167 | self.expected_group_ids = expected_group_ids
168 | self.status_code = status_code
169 | self.subject = subject
170 |
171 | def __str__(self):
172 | return "Could not authorize client: Status code: {0}, Expected subject: {1} to be part any of the following " \
173 | "groups:{2}".format(self.status_code, self.subject, self.expected_group_ids)
174 |
--------------------------------------------------------------------------------
/authorizer/tests/test_auth_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | import os
7 | from unittest.mock import MagicMock, patch
8 |
9 | import oci.identity_data_plane.models
10 | import pytest
11 |
12 | from authorizer.src.utils import auth_utils
13 | from authorizer.src.utils.identity_utils import (
14 | AuthenticationException,
15 | ExtendedIdentityDataPlaneClient
16 | )
17 |
18 | MOCK_RET_VAL = "MOCK_RET_VAL"
19 |
20 |
21 | @patch('authorizer.src.utils.auth_utils._get_internal_instance_principal_signer')
22 | def test_get_ip_signer(get_ip_mock: MagicMock):
23 | get_ip_mock.return_value = MOCK_RET_VAL
24 | assert (
25 | auth_utils.get_signer(auth_utils.SignerType.INSTANCE_PRINCIPAL) == MOCK_RET_VAL
26 | )
27 | get_ip_mock.assert_called_once()
28 |
29 | def test_get_group_ids():
30 | group_ids = auth_utils.get_group_ids_from_config({"GROUP_IDS": "id1, id2, id3, id4 "})
31 | assert group_ids == ["id1", "id2", "id3", "id4"]
32 |
33 | @patch('authorizer.src.utils.auth_utils.get_resource_principals_signer')
34 | def test_get_rp_signer(rp_signer_mock: MagicMock):
35 | rp_signer_mock.return_value = MOCK_RET_VAL
36 | assert (
37 | auth_utils.get_signer(auth_utils.SignerType.RESOURCE_PRINCIPAL) == MOCK_RET_VAL
38 | )
39 | rp_signer_mock.assert_called_once()
40 |
41 | @patch('authorizer.src.utils.auth_utils._get_env_bool')
42 | def test_auto_ip_signer(get_env_mock: MagicMock):
43 | os.environ["RP_AUTH"] = "false"
44 | get_env_mock.return_value = False
45 | with patch('authorizer.src.utils.auth_utils.InstancePrincipalsSecurityTokenSigner') as ip_signer_mock:
46 | auth_utils.get_signer(auth_utils.SignerType.AUTO)
47 | assert (
48 | auth_utils.get_signer(auth_utils.SignerType.AUTO) == ip_signer_mock.return_value
49 | )
50 | ip_signer_mock.assert_called()
51 | get_env_mock.assert_called_with("RP_AUTH", False)
52 |
53 | @patch('authorizer.src.utils.auth_utils._get_env_bool')
54 | @patch('authorizer.src.utils.auth_utils.get_resource_principals_signer')
55 | def test_auto_rp_signer(rp_signer_mock: MagicMock, get_env_mock: MagicMock):
56 | os.environ["RP_AUTH"] = "true"
57 | rp_signer_mock.return_value = MOCK_RET_VAL
58 | get_env_mock.return_value = True
59 | assert (
60 | auth_utils.get_signer(auth_utils.SignerType.AUTO) == MOCK_RET_VAL
61 | )
62 | get_env_mock.assert_called_once_with("RP_AUTH", False)
63 | rp_signer_mock.assert_called_once()
64 |
65 | @patch('authorizer.src.utils.auth_utils.InstancePrincipalsSecurityTokenSigner')
66 | def test_get_internal_ip_signer(ip_signer_mock: MagicMock):
67 | ip_signer_mock.return_value = MOCK_RET_VAL
68 | test_override_url = "test"
69 | os.environ["METADATA_OVERRIDE_URL"] = test_override_url
70 | assert (
71 | auth_utils._get_internal_instance_principal_signer() == MOCK_RET_VAL
72 | )
73 | assert (
74 | ip_signer_mock.GET_REGION_URL == test_override_url + "/instance/region"
75 | )
76 | assert (
77 | ip_signer_mock.METADATA_URL_BASE == test_override_url
78 | )
79 | ip_signer_mock.assert_called_once()
80 |
81 | def test_get_env_bool_val_error():
82 | os.environ["_test"] = "garbage"
83 | with pytest.raises(ValueError):
84 | auth_utils._get_env_bool("_test")
85 |
86 | def test_get_env_bool_val_none():
87 | os.environ.pop('_test', None)
88 | assert (auth_utils._get_env_bool("_test", False) is False)
89 | assert (auth_utils._get_env_bool("_test", True) is True)
90 |
91 | def test_get_env_bool_val_true():
92 | os.environ["_test"] = "tRuE"
93 | assert (auth_utils._get_env_bool("_test", False) is True)
94 |
95 | def test_get_env_bool_val_false():
96 | os.environ["_test"] = "FALSe"
97 | assert (auth_utils._get_env_bool("_test", True) is False)
98 |
99 | def test_do_authn_fail():
100 | authenticate_result = oci.identity_data_plane.models.AuthenticateClientResult()
101 | authenticate_result.principal = None
102 | authenticate_result.error_message = "authn failed"
103 |
104 | response = oci.response.Response(
105 | status=200,
106 | data=authenticate_result,
107 | headers=None,
108 | request=None
109 | )
110 | response.data = authenticate_result
111 |
112 | headers = {"foo": ["test"]}
113 |
114 | auth_client_details = oci.identity_data_plane.models.AuthenticateClientDetails()
115 | auth_client_details.request_headers = headers
116 |
117 | idc_mock = MagicMock(type=ExtendedIdentityDataPlaneClient)
118 | idc_mock.authenticate_client = MagicMock()
119 | idc_mock.authenticate_client.return_value = response
120 | with pytest.raises(AuthenticationException):
121 | auth_utils.do_authn(idc_mock, headers)
122 |
123 | idc_mock.authenticate_client.assert_called_once_with(
124 | authenticate_client_details=auth_client_details
125 | )
126 |
127 | def test_do_authn_pass():
128 | authenticate_result = oci.identity_data_plane.models.AuthenticateClientResult()
129 | authenticate_result.principal = MOCK_RET_VAL
130 | authenticate_result.error_message = None
131 |
132 | response = oci.response.Response(
133 | status=200,
134 | data=authenticate_result,
135 | headers=None,
136 | request=None
137 | )
138 |
139 | headers = {"foo": ["test"]}
140 |
141 | auth_client_details = oci.identity_data_plane.models.AuthenticateClientDetails()
142 | auth_client_details.request_headers = headers
143 |
144 | idc_mock = MagicMock(type=ExtendedIdentityDataPlaneClient)
145 | idc_mock.authenticate_client = MagicMock()
146 | idc_mock.authenticate_client.return_value = response
147 | assert (
148 | auth_utils.do_authn(idc_mock, headers) == authenticate_result.principal
149 | )
150 |
151 | idc_mock.authenticate_client.assert_called_once_with(authenticate_client_details=auth_client_details)
152 |
153 | def test_authz_pass():
154 | principal = oci.identity_data_plane.models.Principal()
155 | expected_group_ids = ["g1", "g3"]
156 | idc = MagicMock(type=ExtendedIdentityDataPlaneClient)
157 | idc.filter_group_membership = MagicMock()
158 |
159 | filter_group_membership_details = oci.identity_data_plane.models.FilterGroupMembershipDetails()
160 | filter_group_membership_details.principal = principal
161 | filter_group_membership_details.group_ids = expected_group_ids
162 |
163 | filter_group_membership_result = oci.identity_data_plane.models.FilterGroupMembershipResult()
164 | filter_group_membership_result.group_ids = ["g1"]
165 | filter_group_membership_result.principal = principal
166 |
167 | response = oci.response.Response(
168 | status=200,
169 | data=filter_group_membership_result,
170 | headers=None,
171 | request=None
172 | )
173 | idc.filter_group_membership.return_value = response
174 |
175 | assert (
176 | auth_utils.do_authz(idc, principal, expected_group_ids) == filter_group_membership_result.group_ids
177 | )
178 | idc.filter_group_membership.assert_called_once_with(filter_group_membership_details)
179 |
180 | def test_authz_fail():
181 | principal = oci.identity_data_plane.models.Principal()
182 | expected_group_ids = ["g1", "g3"]
183 | idc = MagicMock(type=ExtendedIdentityDataPlaneClient)
184 | idc.filter_group_membership = MagicMock()
185 |
186 | filter_group_membership_details = oci.identity_data_plane.models.FilterGroupMembershipDetails()
187 | filter_group_membership_details.principal = principal
188 | filter_group_membership_details.group_ids = expected_group_ids
189 |
190 | filter_group_membership_result = oci.identity_data_plane.models.FilterGroupMembershipResult()
191 | filter_group_membership_result.group_ids = ["g2"]
192 | filter_group_membership_result.principal = principal
193 |
194 | response = oci.response.Response(
195 | status=401,
196 | data=filter_group_membership_result,
197 | headers=None,
198 | request=None
199 | )
200 | idc.filter_group_membership.return_value = response
201 |
202 | with pytest.raises(auth_utils.AuthorizationException):
203 | auth_utils.do_authz(idc, principal, expected_group_ids)
204 | idc.filter_group_membership.assert_called_once_with(filter_group_membership_details)
205 |
--------------------------------------------------------------------------------
/authorizer/tests/test_header_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | import json
7 | from unittest.mock import MagicMock, patch
8 |
9 | import pytest
10 |
11 | from authorizer.src.utils.header_utils import (
12 | AuthorizationHeaderMissingException,
13 | AuthorizationSigningHeadersMissingException,
14 | MissingRequiredHeadersException,
15 | _get_required_headers_from_signature,
16 | extract_and_validate_headers
17 | )
18 |
19 |
20 | def generate_apigw_json_payload(headers: dict) -> bytes:
21 | headers = {k.lower(): v for k, v in headers.items()}
22 | pl = {"type": "USER_DEFINED",
23 | "data": headers}
24 | return bytes(json.dumps(pl), 'utf-8')
25 |
26 | @patch('authorizer.src.utils.header_utils._get_required_headers_from_signature')
27 | def test_extract_validate_headers_pass(_get_required_headers_from_signature: MagicMock):
28 | headers = {"FoO": "test", "method": "get", "authorization": "authz"}
29 | expected_response = {"foo": ["test"]}
30 | _get_required_headers_from_signature.return_value = ["foo"]
31 | assert (extract_and_validate_headers(generate_apigw_json_payload(headers)) == expected_response)
32 | _get_required_headers_from_signature.assert_called_once_with("authz")
33 |
34 | def test_extract_validate_headers_missing_authz():
35 | headers = {"FoO": "test", "method": "get"}
36 | with pytest.raises(AuthorizationHeaderMissingException):
37 | extract_and_validate_headers(generate_apigw_json_payload(headers))
38 |
39 | @patch('authorizer.src.utils.header_utils._get_required_headers_from_signature')
40 | def test_extract_validate_headers_missing_headers(_get_required_headers_from_signature: MagicMock):
41 | headers = {"FoO": "test", "method": "get", "authorization": "authz"}
42 | _get_required_headers_from_signature.return_value = ["foo1"]
43 |
44 | with pytest.raises(MissingRequiredHeadersException):
45 | extract_and_validate_headers(generate_apigw_json_payload(headers))
46 | _get_required_headers_from_signature.assert_called_once_with("authz")
47 |
48 | def test_get_required_headers_from_signature_pass():
49 | authz_header = 'Signature algorithm="rsa-sha256", headers="Date (request-target) host", signature=""'
50 | expected_resp = ["date", "(request-target)", "host", "authorization"]
51 | assert (_get_required_headers_from_signature(authz_header) == expected_resp)
52 |
53 | def test_get_required_headers_from_signature_fail():
54 | authz_header = 'Signature algorithm="rsa-sha256", signature=""'
55 | with pytest.raises(AuthorizationSigningHeadersMissingException):
56 | _get_required_headers_from_signature(authz_header)
57 |
--------------------------------------------------------------------------------
/container-image/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Oracle and/or its affiliates.
2 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3 |
4 | FROM ghcr.io/oracle/oraclelinux8-instantclient:21
5 |
6 | RUN rm -rf /var/cache/yum/* && yum clean all && yum install -y gcc make patch vim iproute net-tools git && rm -rf /var/cache/yum/*
7 | RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh
8 | RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh;
9 | ENV PATH="/miniconda/bin:$PATH"
10 |
11 | USER root
12 | # Create sync directory and expose as environment variable
13 |
14 | ARG CONDA_ENV_FILE=environment.yaml
15 | ARG CONDA_ENV_NAME=oci-mlflow
16 | ARG MLFLOW_DIR=/etc/mlflow
17 | ARG CONTAINER_ARTIFACT_DIR=container-image
18 | ARG RND
19 |
20 | COPY ${CONTAINER_ARTIFACT_DIR}/${CONDA_ENV_FILE} /opt/env.yaml
21 | RUN conda install -y conda-forge::mamba && mamba env create -f /opt/env.yaml --name ${CONDA_ENV_NAME} && conda clean -afy
22 | ENV PATH="/miniconda/envs/${CONDA_ENV_NAME}}/bin:$PATH"
23 |
24 | RUN conda init bash && source ~/.bashrc && conda activate ${CONDA_ENV_NAME}
25 |
26 | RUN mkdir ${MLFLOW_DIR}
27 | COPY ${CONTAINER_ARTIFACT_DIR}/run/* ${MLFLOW_DIR}/
28 | RUN chmod a+x ${MLFLOW_DIR}/launch_mlflow.sh
29 |
30 | ENV MLFLOW_DIR=${MLFLOW_DIR}
31 |
32 | EXPOSE 5000
33 | HEALTHCHECK --interval=30s CMD curl -f -sI http://localhost:5000 || exit 1
34 |
35 | RUN if [ -f ${MLFLOW_DIR}/oci_mlflow*.whl ]; then \
36 | local_whl=$(find ${MLFLOW_DIR} -name "*.whl" -exec basename {} \; | head -n 1 ); \
37 | source ~/.bashrc && conda activate ${CONDA_ENV_NAME} && pip install ${MLFLOW_DIR}/$local_whl; \
38 | fi
39 |
40 | RUN echo "conda activate oci-mlflow">>/root/.bashrc
41 | SHELL ["/bin/bash", "--login", "-c"]
42 |
43 | ENTRYPOINT [ "bash", "--login" , "-c"]
44 | CMD ["python $MLFLOW_DIR/run.py"]
45 |
--------------------------------------------------------------------------------
/container-image/environment.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - main::python=3.9
3 | - main::pip
4 | - pip:
5 | - oracledb
6 | - mlflow
7 | - oracle-ads>=2.8.5
8 | - mysql-connector-python
9 | - oci-mlflow
10 |
--------------------------------------------------------------------------------
/container-image/run/launch_mlflow.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash --login
2 | # Copyright (c) 2023 Oracle and/or its affiliates.
3 | # Licensed under the Universal Permissive License v 1.0 as shown at
4 | # https://oss.oracle.com/licenses/upl/
5 |
6 | set -m -e -o pipefail
7 |
8 | conda activate oci-mlflow
9 |
10 | echo "========== MLflow server is launchng... =========="
11 |
12 | mlflow server $@
13 |
14 | echo "========== MLflow server is shutting down... =========="
15 |
16 | exit $LastExitCode
17 |
--------------------------------------------------------------------------------
/container-image/run/run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import ads
8 | import logging
9 | import os
10 | import sys
11 | import subprocess
12 | import shlex
13 |
14 | logger = logging.getLogger(__name__)
15 | handler = logging.StreamHandler(sys.stdout)
16 | handler.setLevel(logging.INFO)
17 | logger.addHandler(handler)
18 |
19 | BACKEND_PROVIDER = os.environ.get("BACKEND_PROVIDER", "sqlite")
20 | MYSQL = "mysql"
21 | SQLITE = "sqlite"
22 | OBJECT_STORAGE = "object_storage"
23 | DEFAULT_DB_NAME = "mlflow"
24 | ARTIFACT_STORE_URI = "ARTIFACT_STORE_URI"
25 | EXTRA_MLFLOW_OPTIONS = "EXTRA_MLFLOW_OPTIONS"
26 | MLFLOW_LAUNCH_SCRIPT = "/etc/mlflow/launch_mlflow.sh"
27 | DB_SECRET_OCID = "DB_SECRET_OCID"
28 | PATCH_SCRIPT_PATH = "/etc/mlflow/patches"
29 |
30 |
31 | # Default authentication is resource_principal. To switch to other authentications forms such as `api_key` or `instance_principal`
32 | # set the environment variable - `OCIFS_IAM_TYPE`
33 |
34 | AUTH_TYPE = os.environ.get("OCIFS_IAM_TYPE", "resource_principal")
35 |
36 |
37 | class BackendStore:
38 | def uri():
39 | pass
40 |
41 |
42 | class MySQLBackendStore(BackendStore):
43 | DEFAULT_PORT = "3306"
44 |
45 | def uri():
46 | """
47 | Fetch credentials from vault using secret ocid. This requires the credentials to be saved in vault using oracle-ads provided API.
48 | More information - https://accelerated-data-science.readthedocs.io/en/latest/user_guide/secrets/mysql.html
49 |
50 | If vault ocid is not supplied, retrieve db credentials from the environment variable.
51 | """
52 | if os.environ.get(DB_SECRET_OCID):
53 | from ads.secrets.mysqldb import MySQLDBSecretKeeper
54 |
55 | secret_ocid = os.environ[DB_SECRET_OCID]
56 | logger.info(
57 | f"Found environment variable {DB_SECRET_OCID}. Retrieving secret using auth type: {AUTH_TYPE}"
58 | )
59 |
60 | ads.set_auth(AUTH_TYPE)
61 |
62 | mysqldb_secret = MySQLDBSecretKeeper.load_secret(secret_ocid).to_dict()
63 | username = mysqldb_secret["user_name"]
64 | password = mysqldb_secret["password"]
65 | host = mysqldb_secret["host"]
66 | db_port = mysqldb_secret.get("port", MySQLBackendStore.DEFAULT_PORT)
67 | db_name = mysqldb_secret.get(
68 | "database"
69 | ) # if database was not saved in the secret, the value for 'database' will be None
70 | if db_name is None:
71 | db_name = DEFAULT_DB_NAME
72 | else:
73 | username = os.environ.get("DBUSERNAME")
74 | password = os.environ.get("DBPASSWORD")
75 | host = os.environ.get("DBHOST")
76 | db_name = os.environ.get("DBNAME", DEFAULT_DB_NAME)
77 | db_port = os.environ.get("DBPORT", MySQLBackendStore.DEFAULT_PORT)
78 |
79 | return (
80 | f"mysql+mysqlconnector://{username}:{password}@{host}:{db_port}/{db_name}"
81 | )
82 |
83 |
84 | class SQLiteBackendStore(BackendStore):
85 | def uri():
86 | """
87 | Reference:
88 | ----------
89 |
90 | https://mlflow.org/docs/latest/tracking.html#scenario-3-mlflow-on-localhost-with-tracking-server
91 |
92 | """
93 | return "sqlite:///mydb.sqlite"
94 |
95 |
96 | class BackendStoreFactory:
97 | providers = {MYSQL: MySQLBackendStore, SQLITE: SQLiteBackendStore}
98 |
99 | @classmethod
100 | def handler(cls, name):
101 | return cls.providers.get(name)
102 |
103 |
104 | def generate_backend_store_uri(provider):
105 | return BackendStoreFactory.handler(provider).uri()
106 |
107 |
108 | def configure_mlflow_environment():
109 | mlflow_options = {}
110 | if not os.environ.get("MLFLOW_BACKEND_STORE_URI"):
111 | backend_store_uri = generate_backend_store_uri(BACKEND_PROVIDER)
112 | mlflow_options = {"backend-store-uri": backend_store_uri}
113 |
114 | mlflow_cmd_option = " ".join([f"--{k} {mlflow_options[k]}" for k in mlflow_options])
115 | return mlflow_cmd_option
116 |
117 |
118 | def launch_mlflow():
119 | try:
120 | mlflow_cmd_option = configure_mlflow_environment()
121 | except Exception as e:
122 | logger.error(e)
123 | raise Exception("Failed to create MLFlow configuration")
124 |
125 | # shlex.split can cause issues for "--gunicorn-opts".
126 | # It is better to pass extra args through environment variables.
127 | # More info - https://mlflow.org/docs/latest/cli.html#mlflow-server
128 | cmd_split = shlex.split(mlflow_cmd_option)
129 | subprocess.run(
130 | [MLFLOW_LAUNCH_SCRIPT] + cmd_split, # capture_output=True
131 | )
132 |
133 |
134 | if __name__ == "__main__":
135 | launch_mlflow()
136 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/conda-runtime/README.md:
--------------------------------------------------------------------------------
1 | # Conda Environment based Deployment
2 |
3 | This example demonstrates how to use a conda pack based on the conda.yaml in the MLflow model to deploy a model. MLflow model consists of conda.yaml which captures the required dependencies for running the model.
4 |
5 | ## Create a model and register
6 |
7 | 1. Build Model
8 |
9 | Run the `sklearn_elasticnet_wine `__ in the project demos
10 |
11 | 2. Deploy Model
12 |
13 | There are two example specification in the folder -
14 | * ``elastic-net-deployment_build_conda.yaml``: This will be build conda environment and export it as conda pack, uploads to object storage and deploy
15 | * ``elastic-net-deployment_prebuilt_conda.yaml``: This will use the conda pack that is already saved in the object storage
16 |
17 | Update the yaml file to reflect correct values for -
18 |
19 | * logId
20 | * logGroupId
21 | * projectId
22 | * compartmentId
23 | * uri with the right bucket name and namespace
24 |
25 |
26 |
27 | ```
28 | MLFLOW_TRACKING_URI= \
29 | OCIFS_IAM_TYPE=api_key \
30 | mlflow deployments \
31 | create --name elasticnet_test_deploy -m models:/ElasticnetWineModel/1 \
32 | -t oci-datascience \
33 | --config deploy-config-file=elastic-net-deployment_build_conda.yaml
34 |
35 | ```
36 |
37 | 1. Invoke Prediction Endpoint
38 |
39 | ```
40 | import requests
41 | import oci
42 | from oci.signer import Signer
43 |
44 | body = {
45 | "columns": [
46 | "fixed acidity",
47 | "volatile acidity",
48 | "citric acid",
49 | "residual sugar",
50 | "chlorides",
51 | "free sulfur dioxide",
52 | "total sulfur dioxide",
53 | "density",
54 | "pH",
55 | "sulphates",
56 | "alcohol",
57 | ],
58 | "data": [[7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8]],
59 | "index": [0],
60 | }
61 |
62 |
63 |
64 | config = oci.config.from_file()
65 | auth = Signer(
66 | tenancy=config['tenancy'],
67 | user=config['user'],
68 | fingerprint=config['fingerprint'],
69 | private_key_file_location=config['key_file'],)
70 |
71 | endpoint = 'https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad./predict'
72 |
73 |
74 | requests.post(endpoint, json=body, auth=auth, headers={}).json()
75 | ```
76 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/conda-runtime/elastic-net-deployment_build_conda.yaml:
--------------------------------------------------------------------------------
1 | kind: deployment
2 | spec:
3 | infrastructure:
4 | kind: infrastructure
5 | type: modelDeployment
6 | spec:
7 | logGroupId: ocid1.loggroup.oc1.iad..
8 | logId: ocid1.log.oc1.iad..
9 | projectId: ocid1.datascienceproject.oc1.iad..
10 | compartmentId: ocid1.compartment.oc1..
11 | shapeName: VM.Standard.E3.Flex
12 | shapeConfigDetails:
13 | memoryInGBs: 32
14 | ocpus: 4
15 | blockStorageSize: 50
16 | replica: 1
17 | runtime:
18 | kind: runtime
19 | type: conda
20 | spec:
21 | uri: oci://bucketname@namespace/path/to/conda
22 | pythonVersion:
23 |
24 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/conda-runtime/elastic-net-deployment_prebuilt_conda.yaml:
--------------------------------------------------------------------------------
1 | kind: deployment
2 | spec:
3 | infrastructure:
4 | kind: infrastructure
5 | type: modelDeployment
6 | spec:
7 | logGroupId: ocid1.loggroup.oc1.iad..
8 | logId: ocid1.log.oc1.iad..
9 | projectId: ocid1.datascienceproject.oc1.iad..
10 | compartmentId: ocid1.compartment.oc1..
11 | shapeName: VM.Standard.E3.Flex
12 | shapeConfigDetails:
13 | memoryInGBs: 32
14 | ocpus: 4
15 | blockStorageSize: 50
16 | replica: 1
17 | runtime:
18 | kind: runtime
19 | type: conda
20 | spec:
21 | uri:
22 | name: elasticnet_v1
23 | destination: oci://bucket@namespace/mlflow-conda-envs/
24 | gpu: false
25 | overwrite: false
26 | keepLocal: true
27 | localCondaDir: ./conda
28 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/container-runtime/README.md:
--------------------------------------------------------------------------------
1 | # Container based deployment
2 |
3 | ## Overview
4 |
5 | This demo shows how to use containers for deploying models stored in MLflow registry.
6 |
7 | 1. Build Model
8 |
9 | Run the `sklearn_elasticnet_wine `__ in the project demos
10 |
11 | 2. Build Container image
12 |
13 | To install conda dependencies on container image, copy over `conda.yaml` from the mlflow artifact and save it in the same folder as the `Dockefile.pyfunc`. The artifacts to build a container image is available in ``../container`` folder.
14 |
15 | ```
16 | docker build -t {region}.ocir.io//mlflow-model-runtime/sklearn:v1 -f Dockerfile.pyfunc .
17 | ```
18 |
19 | ### Push the container to OCIR
20 |
21 | ```
22 | docker push {region}.ocir.io//mlflow-model-runtime/sklearn:v1
23 | ```
24 |
25 |
26 | ### Create Endpoint
27 |
28 | Update ``elastic-net-deployment_container.yaml`` to reflect correct values for -
29 |
30 | * logId
31 | * logGroupId
32 | * logId
33 | * projectId
34 | * compartmentId
35 | * image
36 |
37 |
38 | ```
39 | MLFLOW_TRACKING_URI= \
40 | OCIFS_IAM_TYPE=api_key \
41 | mlflow deployments \
42 | create --name elasticnet_test_deploy_container -m models:/ElasticnetWineModel/1 \
43 | -t oci-datascience \
44 | --config deploy-config-file=elastic-net-deployment-container.yaml
45 | ```
46 |
47 | 3. Invoke Prediction Endpoint
48 |
49 | 3.1 Using Python SDK
50 |
51 | ```
52 | import requests
53 | import oci
54 | from oci.signer import Signer
55 |
56 | body = {
57 | "dataframe_split": {
58 | "columns": [
59 | "fixed acidity",
60 | "volatile acidity",
61 | "citric acid",
62 | "residual sugar",
63 | "chlorides",
64 | "free sulfur dioxide",
65 | "total sulfur dioxide",
66 | "density",
67 | "pH",
68 | "sulphates",
69 | "alcohol",
70 | ],
71 | "data": [[7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8]],
72 | "index": [0],
73 | }
74 | }
75 |
76 |
77 |
78 | config = oci.config.from_file()
79 | auth = Signer(
80 | tenancy=config['tenancy'],
81 | user=config['user'],
82 | fingerprint=config['fingerprint'],
83 | private_key_file_location=config['key_file'],)
84 |
85 | endpoint = 'https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad../predict'
86 |
87 |
88 | requests.post(endpoint, json=body, auth=auth, headers={}).json()
89 |
90 | ```
91 |
92 | 3.1 Using MLflow CLI
93 |
94 | ```
95 |
96 | cat < input.json
97 | {
98 | "dataframe_split": {
99 | "columns": [
100 | "fixed acidity",
101 | "volatile acidity",
102 | "citric acid",
103 | "residual sugar",
104 | "chlorides",
105 | "free sulfur dioxide",
106 | "total sulfur dioxide",
107 | "density",
108 | "pH",
109 | "sulphates",
110 | "alcohol"
111 | ],
112 | "data": [[7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8]],
113 | "index": [0]
114 | }
115 | }
116 | EOF
117 |
118 | mlflow deployments predict --name ocid1.datasciencemodeldeployment.oc1.iad.. -t oci-datascience -I ./input.json
119 | ```
120 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/container-runtime/container/Dockerfile.pyfunc:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Oracle and/or its affiliates.
2 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3 |
4 | FROM iad.ocir.io/namespace/image:tag
5 |
6 | RUN yum install -y --setopt=skip_missing_names_on_install=False maven java-11-openjdk wget curl nginx sudo
7 |
8 | # Data Science service extracts the model to /opt/ds/model/deployed_model
9 | RUN mkdir -p /opt/ds/model/deployed_model && \
10 | mkdir -p /opt/ml && \
11 | ln -s /opt/ml/model /opt/ds/model/deployed_model
12 |
13 | RUN export JAVA_HOME=/usr/lib/jvm/$(ls /usr/lib/jvm/| grep java-11-openjdk*)
14 | ENV GUNICORN_CMD_ARGS="--timeout 60 -k gevent"
15 | # Set up the program in the image
16 | WORKDIR /opt/mlflow
17 |
18 | RUN mvn --batch-mode dependency:copy -Dartifact=org.mlflow:mlflow-scoring:2.1.1:pom -DoutputDirectory=/opt/java
19 | RUN mvn --batch-mode dependency:copy -Dartifact=org.mlflow:mlflow-scoring:2.1.1:jar -DoutputDirectory=/opt/java/jars
20 | RUN cp /opt/java/mlflow-scoring-2.1.1.pom /opt/java/pom.xml
21 | RUN cd /opt/java && mvn --batch-mode dependency:copy-dependencies -DoutputDirectory=/opt/java/jars
22 |
23 | ENV MLFLOW_DISABLE_ENV_CREATION="true"
24 | ENV DISABLE_NGINX=true
25 |
26 | COPY conda.yaml /opt/conda.yaml
27 | RUN mamba env update --name oci-mlflow -f /opt/conda.yaml && pip install gevent
28 |
29 | ENV NGINX_ROOT=/etc/nginx
30 | ENV NGINX_PID=/var/run/nginx.pid
31 | ENV NGINX_BIN=/usr/sbin/nginx
32 | ENV NGINX_USER=root
33 |
34 |
35 | EXPOSE 5001
36 |
37 | COPY nginx.conf /etc/nginx/nginx.conf
38 | ENTRYPOINT [ "/bin/bash", "--login", "-c" ]
39 | CMD ["nginx -p $PWD && mlflow models serve -p 8080 -h 0.0.0.0 -m /opt/ds/model/deployed_model --env-manager local"]
40 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/container-runtime/container/nginx.conf:
--------------------------------------------------------------------------------
1 | user root;
2 | worker_processes auto;
3 | error_log /dev/stdout info;
4 | pid /var/run/nginx.pid;
5 |
6 |
7 | events {
8 | }
9 |
10 | http {
11 | log_format main '$remote_addr - $remote_user [$time_local] "$request" '
12 | '$status $body_bytes_sent "$http_referer" '
13 | '"$http_user_agent" "$http_x_forwarded_for"';
14 |
15 | access_log /dev/stdout main;
16 |
17 | tcp_nopush on;
18 | tcp_nodelay on;
19 | keepalive_timeout 65;
20 | types_hash_max_size 2048;
21 |
22 | include /etc/nginx/mime.types;
23 | default_type application/octet-stream;
24 |
25 |
26 | server {
27 | listen 5001;
28 | client_body_temp_path /tmp/client_body_temp;
29 | proxy_temp_path /tmp/proxy_temp;
30 |
31 |
32 | location /predict {
33 | proxy_pass http://127.0.0.1:8080/invocations;
34 | }
35 | location /health {
36 | proxy_pass http://127.0.0.1:8080/health;
37 | }
38 |
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/container-runtime/elastic-net-deployment-container.yaml:
--------------------------------------------------------------------------------
1 | kind: deployment
2 | spec:
3 | infrastructure:
4 | kind: infrastructure
5 | type: modelDeployment
6 | spec:
7 | logGroupId: ocid1.loggroup.oc1.iad..
8 | logId: ocid1.log.oc1.iad..
9 | projectId: ocid1.datascienceproject.oc1.iad..
10 | compartmentId: ocid1.compartment.oc1..
11 | shapeName: VM.Standard.E3.Flex
12 | shapeConfigDetails:
13 | memoryInGBs: 32
14 | ocpus: 4
15 | blockStorageSize: 50
16 | replica: 1
17 | runtime:
18 | kind: runtime
19 | type: container
20 | spec:
21 | image: iad.ocir.io//mlflow-model-runtime/sklearn:v1
22 | serverPort: 5001
23 | healthCheckPort: 5001
24 |
--------------------------------------------------------------------------------
/demos/deploy_mlflow_model/container-runtime/input.json:
--------------------------------------------------------------------------------
1 | {
2 | "dataframe_split": {
3 | "columns": [
4 | "mean radius",
5 | "mean texture",
6 | "mean perimeter",
7 | "mean area",
8 | "mean smoothness",
9 | "mean compactness",
10 | "mean concavity",
11 | "mean concave points"
12 | ],
13 | "data": [
14 | [
15 | 17.99,
16 | 10.38,
17 | 122.8,
18 | 1001.0,
19 | 0.1184,
20 | 0.2776,
21 | 0.3001,
22 | 0.1471
23 | ]
24 | ],
25 | "index": [
26 | 0
27 | ]
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/MLproject:
--------------------------------------------------------------------------------
1 | name: pyspark_logistic_regression_dataflow_job
2 |
3 | entry_points:
4 | main:
5 | parameters:
6 | seed: { type: float, default: 24 }
7 | command: "logistic_regression.py --seed {seed}"
8 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/README.md:
--------------------------------------------------------------------------------
1 | ## Run MLflow project on the Data Flow cluster
2 | ---
3 |
4 | This demo shows how to run an MLflow project on the Data Flow cluster. This directory contains an MLflow project file that trains a logistic regression model on the Iris dataset.
5 |
6 | ## Prerequisites
7 | - First, install MLflow library
8 | ```
9 | pip install mlflow
10 | ```
11 | - Set the tracking server endpoint
12 | ```
13 | export MLFLOW_TRACKING_URI=
14 | ```
15 | - Install the oci-mlflow package
16 | ```
17 | pip install oci-mlflow
18 | ```
19 |
20 | ## Running this example on the Data Flow cluster
21 | - Prepare the OCI config, which is a JSON file containing the authentication information and also path to the job configuration YAML file
22 | ```
23 | {
24 | "oci_auth": "api_key",
25 | "oci_job_template_path": "./oci-datascience-template.yaml"
26 | }
27 | ```
28 | - Prepare the job configuration file
29 | ```
30 | kind: job
31 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}"
32 | spec:
33 | infrastructure:
34 | kind: infrastructure
35 | spec:
36 | compartmentId: ocid1.compartment.oc1..
37 | driverShape: VM.Standard.E4.Flex
38 | driverShapeConfig:
39 | memory_in_gbs: 32
40 | ocpus: 2
41 | executorShape: VM.Standard.E4.Flex
42 | executorShapeConfig:
43 | memory_in_gbs: 32
44 | ocpus: 2
45 | language: PYTHON
46 | logsBucketUri:
47 | numExecutors: 1
48 | sparkVersion: 3.2.1
49 | privateEndpointId: ocid1.dataflowprivateendpoint.oc1..
50 | type: dataFlow
51 | runtime:
52 | kind: runtime
53 | spec:
54 | configuration:
55 | spark.driverEnv.MLFLOW_TRACKING_URI:
56 | conda:
57 | type: published
58 | uri: oci://bucket@namespace/prefix
59 | condaAuthType: resource_principal
60 | scriptBucket:
61 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}"
62 | overwrite: True
63 | type: dataFlow
64 | ```
65 | In the config file we also specify a Private Endpoint, which allows the cluster to reach out the tracking server URI, in case if tracking server deployed in the private network. However the private endpoint is not necessary for the case when the tracking server has public Ip address. More details about the Private Endpoint can be found in the official [documentation](https://docs.oracle.com/en-us/iaas/data-flow/using/private-network.htm).
66 |
67 | - Run the example project using CLI
68 | ```
69 | mlflow run . --param-list seed=24 --experiment-name My_Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json
70 | ```
71 | - Run the example project using SDK
72 | ```
73 | import mlflow
74 |
75 | mlflow.set_tracking_uri("")
76 |
77 | mlflow.run(".",
78 | experiment_name="My_Experiment",
79 | backend="oci-datascience",
80 | backend_config="oci-datascience-config.json"
81 | )
82 | ```
83 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/logistic_regression.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Oracle and/or its affiliates.
2 | # Licensed under the Universal Permissive License v 1.0 as shown at
3 | # https://oss.oracle.com/licenses/upl/
4 |
5 | import click
6 | import mlflow
7 | from pyspark.ml.classification import LogisticRegression
8 | from pyspark.ml.feature import VectorAssembler
9 | from pyspark.sql import SparkSession
10 | from sklearn.datasets import load_iris
11 |
12 |
13 | @click.command()
14 | @click.option("--seed", "-s", help="The seed for sampling.", default=20, required=False)
15 | def main(seed):
16 | spark = SparkSession.builder.getOrCreate()
17 |
18 | df = load_iris(as_frame=True).frame.rename(columns={"target": "label"})
19 | df = spark.createDataFrame(df)
20 | df = VectorAssembler(inputCols=df.columns[:-1], outputCol="features").transform(df)
21 | train, test = df.randomSplit([0.8, 0.2], seed)
22 |
23 | mlflow.pyspark.ml.autolog()
24 |
25 | with mlflow.start_run():
26 | lor = LogisticRegression(maxIter=5)
27 | lorModel = lor.fit(train)
28 | mlflow.log_param("randomSplit", [0.8, 0.2])
29 | mlflow.log_param("Seed", seed)
30 |
31 | pred = lorModel.transform(test)
32 | pred.select(lorModel.getPredictionCol()).show(10)
33 | spark.stop()
34 |
35 |
36 | if __name__ == "__main__":
37 | main()
38 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/oci-datascience-config.json:
--------------------------------------------------------------------------------
1 | {
2 | "oci_auth": "api_key",
3 | "oci_job_template_path": "{work_dir}/oci-datascience-template.yaml"
4 | }
5 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/oci-datascience-template.yaml:
--------------------------------------------------------------------------------
1 | kind: job
2 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}"
3 | spec:
4 | infrastructure:
5 | kind: infrastructure
6 | spec:
7 | compartmentId: ocid1.compartment.oc1..
8 | driverShape: VM.Standard.E4.Flex
9 | driverShapeConfig:
10 | memory_in_gbs: 32
11 | ocpus: 2
12 | executorShape: VM.Standard.E4.Flex
13 | executorShapeConfig:
14 | memory_in_gbs: 32
15 | ocpus: 2
16 | language: PYTHON
17 | logsBucketUri: oci://bucket@namespace/
18 | numExecutors: 1
19 | sparkVersion: 3.2.1
20 | privateEndpointId: ocid1.dataflowprivateendpoint.oc1..
21 | type: dataFlow
22 | runtime:
23 | kind: runtime
24 | spec:
25 | configuration:
26 | spark.driverEnv.MLFLOW_TRACKING_URI:
27 | conda:
28 | type: published
29 | uri: oci://bucket@namespace/prefix
30 | condaAuthType: resource_principal
31 | scriptBucket: oci://bucket@namespace/dataflow/script
32 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}"
33 | overwrite: True
34 | type: dataFlow
35 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/run.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n",
10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n",
11 | "# https://oss.oracle.com/licenses/upl/\n",
12 | "\n",
13 | "import mlflow\n",
14 | "\n",
15 | "mlflow.set_tracking_uri(\"/\")\n",
16 | "\n",
17 | "mlflow.run(\".\",\n",
18 | " experiment_name=\"spark/logistic_regression\",\n",
19 | " backend=\"oci-datascience\",\n",
20 | " backend_config=\"oci-datascience-config.json\"\n",
21 | ")"
22 | ]
23 | }
24 | ],
25 | "metadata": {
26 | "kernelspec": {
27 | "display_name": "Python 3",
28 | "language": "python",
29 | "name": "python3"
30 | },
31 | "language_info": {
32 | "codemirror_mode": {
33 | "name": "ipython",
34 | "version": 3
35 | },
36 | "file_extension": ".py",
37 | "mimetype": "text/x-python",
38 | "name": "python",
39 | "nbconvert_exporter": "python",
40 | "pygments_lexer": "ipython3",
41 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]"
42 | },
43 | "vscode": {
44 | "interpreter": {
45 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd"
46 | }
47 | }
48 | },
49 | "nbformat": 4,
50 | "nbformat_minor": 4
51 | }
52 |
--------------------------------------------------------------------------------
/demos/pyspark_logistic_regression_dataflow_job/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2023 Oracle and/or its affiliates.
3 | # Licensed under the Universal Permissive License v 1.0 as shown at
4 | # https://oss.oracle.com/licenses/upl/
5 |
6 | export MLFLOW_TRACKING_URI=
7 | mlflow run . --param-list seed=24 --experiment-name spark/logistic_regression --backend oci-datascience --backend-config ./oci-datascience-config.json
8 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/MLproject:
--------------------------------------------------------------------------------
1 | # The source of this workload is a Jupyter Notebook which will be run on the DataScience job using MLflow CLI/SDK.
2 |
3 | name: sklearn_elasticnet_wine_notebook_job
4 |
5 | entry_points:
6 | main:
7 | command: "train.ipynb"
8 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/README.md:
--------------------------------------------------------------------------------
1 | ## Run MLflow project on the Data Science job
2 | ---
3 |
4 | This demo shows how to run an MLflow project on the Data Science job within a Notebook runtime. This directory contains an MLflow project that trains a linear regression model on the UC Irvine Wine Quality Dataset.
5 |
6 | ## Prerequisites
7 | - First, install MLflow library
8 | ```
9 | pip install mlflow
10 | ```
11 | - Set the tracking server endpoint.
12 | ```
13 | export MLFLOW_TRACKING_URI=
14 | ```
15 | - Install the oci-mlflow package
16 | ```
17 | pip install oci-mlflow
18 | ```
19 |
20 | ## Running this example on the Data Science job
21 | To run this example on the Data Science job, the custom conda environment was prepared and published to the Object Storage bucket. The `generalml_p38_cpu_v1` service conda environment was used as a base environment for the custom one.
22 |
23 | - Install the OCI MLflow package
24 | ```
25 | pip install oci-mlflow
26 | ```
27 | - Prepare the OCI config, which is a JSON file containing the authentication information and also path to the job configuration YAML file
28 | ```
29 | {
30 | "oci_auth": "api_key",
31 | "oci_job_template_path": "./oci-datascience-template.yaml"
32 | }
33 | ```
34 | - Prepare the job configuration file
35 | ```
36 | kind: job
37 | name: "{Job name. For MLflow, it will be replaced with the Project name}"
38 | spec:
39 | infrastructure:
40 | kind: infrastructure
41 | spec:
42 | blockStorageSize: 50
43 | subnetId: ocid1.subnet.oc1.iad..
44 | compartmentId: ocid1.compartment.oc1..
45 | projectId: ocid1.datascienceproject.oc1.iad..
46 | logGroupId: ocid1.loggroup.oc1.iad..
47 | logId: ocid1.log.oc1.iad..
48 | shapeConfigDetails:
49 | memoryInGBs: 20
50 | ocpus: 2
51 | shapeName: VM.Standard.E3.Flex
52 | type: dataScienceJob
53 | runtime:
54 | kind: runtime
55 | spec:
56 | args: []
57 | conda:
58 | type: published
59 | uri:
60 | env:
61 | - name: TEST
62 | value: TEST_VALUE
63 | entrypoint: "{Entry point notebook. For MLflow, it will be replaced with the CMD}"
64 | source: "{Path to the source code directory. For MLflow, it will be replaced with path to the project}"
65 | notebookEncoding: utf-8
66 | type: notebook
67 | ```
68 |
69 | - Run the example project using CLI
70 |
71 | ```
72 | mlflow run . --experiment-name My_Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json
73 | ```
74 |
75 | - Run the example project using SDK
76 | ```
77 | import mlflow
78 |
79 | mlflow.set_tracking_uri("")
80 |
81 | mlflow.run(".",
82 | experiment_name="MyExperiment",
83 | backend="oci-datascience",
84 | backend_config="oci-datascience-config.json"
85 | )
86 | ```
87 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/oci-datascience-config.json:
--------------------------------------------------------------------------------
1 | {
2 | "oci_auth": "api_key",
3 | "oci_job_template_path": "{work_dir}/oci-datascience-template.yaml"
4 | }
5 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/oci-datascience-template.yaml:
--------------------------------------------------------------------------------
1 | kind: job
2 | name: "{Job name. For MLflow, it will be replaced with the Project name}"
3 | spec:
4 | infrastructure:
5 | kind: infrastructure
6 | spec:
7 | blockStorageSize: 50
8 | compartmentId: ocid1.compartment.oc1..
9 | jobInfrastructureType: ME_STANDALONE
10 | jobType: DEFAULT
11 | logGroupId: ocid1.loggroup.oc1.iad..
12 | logId: ocid1.log.oc1.iad..
13 | projectId: ocid1.datascienceproject.oc1.iad..
14 | shapeConfigDetails:
15 | memoryInGBs: 20
16 | ocpus: 2
17 | shapeName: VM.Standard.E3.Flex
18 | subnetId: ocid1.subnet.oc1.iad..
19 | type: dataScienceJob
20 | runtime:
21 | kind: runtime
22 | spec:
23 | args: []
24 | conda:
25 | type: published
26 | uri: oci://bucket@namespace/prefix
27 | env:
28 | - name: TEST
29 | value: TEST_VALUE
30 | entrypoint: "{Entry point notebook. For MLflow, it will be replaced with the CMD}"
31 | source: "{Path to the source code directory. For MLflow, it will be replaced with path to the project}"
32 | notebookEncoding: utf-8
33 | type: notebook
34 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/run.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n",
10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n",
11 | "# https://oss.oracle.com/licenses/upl/\n",
12 | "\n",
13 | "import mlflow\n",
14 | "\n",
15 | "mlflow.set_tracking_uri(\"/\")\n",
16 | "\n",
17 | "mlflow.run(\".\",\n",
18 | " experiment_name=\"sklearn/elastic_net\",\n",
19 | " backend=\"oci-datascience\",\n",
20 | " backend_config=\"oci-datascience-config.json\"\n",
21 | ")"
22 | ]
23 | }
24 | ],
25 | "metadata": {
26 | "kernelspec": {
27 | "display_name": "Python 3",
28 | "language": "python",
29 | "name": "python3"
30 | },
31 | "language_info": {
32 | "codemirror_mode": {
33 | "name": "ipython",
34 | "version": 3
35 | },
36 | "file_extension": ".py",
37 | "mimetype": "text/x-python",
38 | "name": "python",
39 | "nbconvert_exporter": "python",
40 | "pygments_lexer": "ipython3",
41 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]"
42 | },
43 | "vscode": {
44 | "interpreter": {
45 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd"
46 | }
47 | }
48 | },
49 | "nbformat": 4,
50 | "nbformat_minor": 4
51 | }
52 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2023 Oracle and/or its affiliates.
3 | # Licensed under the Universal Permissive License v 1.0 as shown at
4 | # https://oss.oracle.com/licenses/upl/
5 |
6 | export MLFLOW_TRACKING_URI=
7 | mlflow run . --experiment-name sklearn/elastic_net --backend oci-datascience --backend-config ./oci-datascience-config.json
8 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_notebook_job/train.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n",
10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n",
11 | "# https://oss.oracle.com/licenses/upl/\n",
12 | "\n",
13 | "import pandas as pd\n",
14 | "import numpy as np\n",
15 | "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n",
16 | "from sklearn.model_selection import train_test_split\n",
17 | "from sklearn.linear_model import ElasticNet\n",
18 | "\n",
19 | "import mlflow\n",
20 | "import mlflow.sklearn\n",
21 | "\n",
22 | "np.random.seed(40)\n",
23 | "\n",
24 | "alpha = 0.7\n",
25 | "l1_ratio = 0.06\n",
26 | "\n",
27 | "print(\"#\" * 20)\n",
28 | "print(\"ARGS:\")\n",
29 | "print(f\"args.alpha: {alpha}\")\n",
30 | "print(f\"args.l1_ratio: {l1_ratio}\")\n",
31 | "print(\"#\" * 20)\n",
32 | "\n",
33 | "data = pd.read_csv(\n",
34 | " \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\",\n",
35 | " delimiter=\";\",\n",
36 | ")\n",
37 | "\n",
38 | "\n",
39 | "# Split the data into training and test sets. (0.75, 0.25) split.\n",
40 | "train, test = train_test_split(data)\n",
41 | "\n",
42 | "# The predicted column is \"quality\" which is a scalar from [3, 9]\n",
43 | "train_x = train.drop([\"quality\"], axis=1)\n",
44 | "test_x = test.drop([\"quality\"], axis=1)\n",
45 | "train_y = train[[\"quality\"]]\n",
46 | "test_y = test[[\"quality\"]]\n",
47 | "\n",
48 | "\n",
49 | "with mlflow.start_run():\n",
50 | " lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n",
51 | " lr.fit(train_x, train_y)\n",
52 | "\n",
53 | " predicted_qualities = lr.predict(test_x)\n",
54 | " \n",
55 | " rmse = np.sqrt(mean_squared_error(test_y, predicted_qualities))\n",
56 | " mae = mean_absolute_error(test_y, predicted_qualities)\n",
57 | " r2 = r2_score(test_y, predicted_qualities)\n",
58 | "\n",
59 | " print(\"Elasticnet model (alpha={:f}, l1_ratio={:f}):\".format(alpha, l1_ratio))\n",
60 | " \n",
61 | " print(\"#\" * 50)\n",
62 | " print(f\"RMSE: {rmse}\")\n",
63 | " print(f\"MAE: {mae}\")\n",
64 | " print(f\"R2: {r2}\")\n",
65 | " print(\"#\" * 50)\n",
66 | "\n",
67 | " mlflow.log_param(\"alpha\", alpha)\n",
68 | " mlflow.log_param(\"l1_ratio\", l1_ratio)\n",
69 | " mlflow.log_metric(\"rmse\", rmse)\n",
70 | " mlflow.log_metric(\"r2\", r2)\n",
71 | " mlflow.log_metric(\"mae\", mae)\n",
72 | "\n",
73 | " mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\")"
74 | ]
75 | }
76 | ],
77 | "metadata": {
78 | "kernelspec": {
79 | "display_name": "Python 3.9.15 ('mlflow-oci')",
80 | "language": "python",
81 | "name": "python3"
82 | },
83 | "language_info": {
84 | "codemirror_mode": {
85 | "name": "ipython",
86 | "version": 3
87 | },
88 | "file_extension": ".py",
89 | "mimetype": "text/x-python",
90 | "name": "python",
91 | "nbconvert_exporter": "python",
92 | "pygments_lexer": "ipython3",
93 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]"
94 | },
95 | "orig_nbformat": 4,
96 | "vscode": {
97 | "interpreter": {
98 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd"
99 | }
100 | }
101 | },
102 | "nbformat": 4,
103 | "nbformat_minor": 2
104 | }
105 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_script_job/README.md:
--------------------------------------------------------------------------------
1 | ## Run MLflow project on the Data Science job
2 | ---
3 |
4 | This demo shows how to run an MLflow project locally as well as on the Data Science job within a Python runtime. This directory contains only the configuration files which are necessary to run the project on the Data Science job. The project by itself will be downloaded form the [GIT](https://github.com/mlflow/mlflow-example) repository. The project trains a linear regression model on the UC Irvine Wine Quality Dataset.
5 |
6 | ## Prerequisites
7 | - First, install MLflow library
8 | ```
9 | pip install mlflow
10 | ```
11 | - Set the tracking server endpoint.
12 | ```
13 | export MLFLOW_TRACKING_URI=
14 | ```
15 | - Install the oci-mlflow package
16 | ```
17 | pip install oci-mlflow
18 | ```
19 |
20 | ## Running this example locally
21 | The project will be executed on the local instance and the result will be added to the tracking URI specified above.
22 | - Run the example project using CLI
23 |
24 | ```
25 | mlflow run https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine --experiment-name My_Experiment
26 | ```
27 | - Run the example project using SDK
28 | ```
29 | import mlflow
30 |
31 | mlflow.set_tracking_uri("")
32 |
33 | mlflow.run("https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine",
34 | experiment_name="My_Experiment",
35 | )
36 | ```
37 |
38 | ## Running this example on the Data Science job
39 | To run this example on the Data Science job, the custom conda environment was prepared and published to the Object Storage bucket. The custom conda environment contains all the required packages provided in the [conda.yaml](https://github.com/mlflow/mlflow-example/blob/master/conda.yaml) as well as the `oci-mlflow` library. The `generalml_p38_cpu_v1` service conda environment was used as a base environment for the custom one.
40 | - Install the OCI MLflow package
41 | ```
42 | pip install oci-mlflow
43 | ```
44 | - Prepare and publish a custom conda environment which should contain `mlflow`, `oci-mlfow` and all libraries from the [conda.yaml](https://github.com/mlflow/mlflow-example/blob/master/conda.yaml)
45 |
46 | - Prepare the OCI config, which is a JSON file containing the authentication information and also path to the job configuration YAML file. Note, that the project folder already contains the all necessary files to run this example.
47 | ```
48 | {
49 | "oci_auth": "api_key",
50 | "oci_job_template_path": "./oci-datascience-template.yaml"
51 | }
52 | ```
53 | - Prepare the job configuration file.
54 | ```
55 | kind: job
56 | name: "{Job name. For MLflow, it will be replaced with the Project name}"
57 | spec:
58 | infrastructure:
59 | kind: infrastructure
60 | spec:
61 | blockStorageSize: 50
62 | subnetId: ocid1.subnet.oc1.iad..
63 | compartmentId: ocid1.compartment.oc1..
64 | projectId: ocid1.datascienceproject.oc1.iad..
65 | logGroupId: ocid1.loggroup.oc1.iad..
66 | logId: ocid1.log.oc1.iad..
67 | shapeName: VM.Standard.E3.Flex
68 | shapeConfigDetails:
69 | memoryInGBs: 20
70 | ocpus: 2
71 | type: dataScienceJob
72 | runtime:
73 | kind: runtime
74 | spec:
75 | args: []
76 | conda:
77 | type: published
78 | uri:
79 | env:
80 | - name: TEST
81 | value: TEST_VALUE
82 | entrypoint: "{Entry point script. For the MLFlow will be replaced with the CMD}"
83 | scriptPathURI: "{Path to the script. For the MLFlow will be replaced with path to the project}"
84 | type: python
85 |
86 | ```
87 | - Run the example project using CLI
88 |
89 | ```
90 | mlflow run https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine --experiment-name My_Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json
91 | ```
92 | - Run the example project using SDK
93 | ```
94 | import mlflow
95 |
96 | mlflow.set_tracking_uri("/")
97 |
98 | mlflow.run("https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine",
99 | experiment_name="My_Experiment",
100 | backend="oci-datascience",
101 | backend_config="oci-datascience-config.json"
102 | )
103 | ```
104 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_script_job/oci-datascience-config.json:
--------------------------------------------------------------------------------
1 | {
2 | "oci_auth": "api_key",
3 | "oci_job_template_path": "./oci-datascience-template.yaml"
4 | }
5 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_script_job/oci-datascience-template.yaml:
--------------------------------------------------------------------------------
1 | kind: job
2 | name: "{Job name. For MLflow, it will be replaced with the Project name}"
3 | spec:
4 | infrastructure:
5 | kind: infrastructure
6 | spec:
7 | blockStorageSize: 50
8 | subnetId: ocid1.subnet.oc1.iad..
9 | compartmentId: ocid1.compartment.oc1..
10 | projectId: ocid1.datascienceproject.oc1.iad..
11 | logGroupId: ocid1.loggroup.oc1.iad..
12 | logId: ocid1.log.oc1.iad..
13 | shapeName: VM.Standard.E3.Flex
14 | shapeConfigDetails:
15 | memoryInGBs: 20
16 | ocpus: 2
17 | jobInfrastructureType: ME_STANDALONE
18 | jobType: DEFAULT
19 | type: dataScienceJob
20 | runtime:
21 | kind: runtime
22 | spec:
23 | args: []
24 | conda:
25 | type: published
26 | uri: oci://bucket@namespace/prefix
27 | env:
28 | - name: TEST
29 | value: TEST_VALUE
30 | entrypoint: "{Entry point script. For the MLFlow will be replaced with the CMD}"
31 | scriptPathURI: "{Path to the script. For the MLFlow will be replaced with path to the project}"
32 | type: python
33 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_script_job/run.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n",
10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n",
11 | "# https://oss.oracle.com/licenses/upl/\n",
12 | "\n",
13 | "import mlflow\n",
14 | "\n",
15 | "mlflow.set_tracking_uri(\"/\")\n",
16 | "\n",
17 | "mlflow.run(\"https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine\",\n",
18 | " experiment_name=\"/sklearn/elastic_net\",\n",
19 | " backend=\"oci-datascience\",\n",
20 | " backend_config=\"oci-datascience-config.json\"\n",
21 | ")"
22 | ]
23 | }
24 | ],
25 | "metadata": {
26 | "kernelspec": {
27 | "display_name": "Python 3",
28 | "language": "python",
29 | "name": "python3"
30 | },
31 | "language_info": {
32 | "codemirror_mode": {
33 | "name": "ipython",
34 | "version": 3
35 | },
36 | "file_extension": ".py",
37 | "mimetype": "text/x-python",
38 | "name": "python",
39 | "nbconvert_exporter": "python",
40 | "pygments_lexer": "ipython3",
41 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]"
42 | },
43 | "vscode": {
44 | "interpreter": {
45 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd"
46 | }
47 | }
48 | },
49 | "nbformat": 4,
50 | "nbformat_minor": 4
51 | }
52 |
--------------------------------------------------------------------------------
/demos/sklearn_elasticnet_wine_script_job/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2023 Oracle and/or its affiliates.
3 | # Licensed under the Universal Permissive License v 1.0 as shown at
4 | # https://oss.oracle.com/licenses/upl/
5 |
6 | export MLFLOW_TRACKING_URI=
7 | mlflow run https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine --experiment-name /sklearn/elastic_net --backend oci-datascience --backend-config ./oci-datascience-config.json
8 |
--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | -r test-requirements.txt
2 | -r docs/requirements.txt
3 | build
4 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
22 | livehtml:
23 | sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 |
25 | clean:
26 | rm -rf $(BUILDDIR)/*
27 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | autodoc
2 | furo
3 | nbsphinx
4 | oci-mlflow
5 | sphinx
6 | sphinx_copybutton
7 | sphinx_code_tabs
8 | sphinx-autobuild
9 | sphinx-autorun
10 | sphinx-design
11 |
--------------------------------------------------------------------------------
/docs/source/_static/logo-dark-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/docs/source/_static/logo-dark-mode.png
--------------------------------------------------------------------------------
/docs/source/_static/logo-light-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/docs/source/_static/logo-light-mode.png
--------------------------------------------------------------------------------
/docs/source/concepts.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Concepts
3 | ========
4 |
5 | MLflow is a framework that enables engineering teams to easily move workflows from R&D to staging to
6 | production, overcoming one of the common data science problems of model reproducibility and productionalization.
7 |
8 | For a detailed view of the key concepts of MLflow please refer to their
9 | documentation: `https://mlflow.org/docs/latest/concepts.html `_
10 |
11 | **Benefits Of Using MLflow**
12 |
13 |
14 | - Open Source tool for MLops, removing vendor lock-in, works from laptop to cloud all with the same CLI/SDK
15 | - Supports many Tools and Frameworks, for example Spark, Keras, Pytorch, Tensorflow, XGBoost, etc. When using one of these
16 | frameworks, you can use MLflow to track your experiments, store your models, and deploy them to a variety of platforms where much of this happens
17 | automatically for you. Using `mlflow..autolog `_
18 | the framework will automatically log parameters & metrics.
19 | - Highly Customizable, thanks to Conda and Containers models and training workloads are extremely flexible.
20 | - It is ideal for data science projects, because the workflow enabled by MLflow scales from a data scientist tinkering
21 | on the weekend with some new ideas, to running a reproducible training experiment on large scale data in the cloud.
22 | - Focuses on the entire Machine learning lifecycle, by providing tools for data preparation, model training,
23 | model evaluation, model serving, and model deployment MLflow is a complete solution for the entire ML lifecycle,
24 | working together with Oracle OCI Data Science to scale and deploy highly available models in the cloud.
25 | - Custom Visualization, the MLflow interface allows you to create custom visualizations for an experiment to compare
26 | different runs and models.
27 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, 2023 Oracle and/or its affiliates.
2 | # Licensed under the Universal Permissive License v 1.0 as shown at
3 | # https://oss.oracle.com/licenses/upl/
4 |
5 | # -- Path setup --------------------------------------------------------------
6 |
7 | import datetime
8 | import os
9 | import sys
10 |
11 | autoclass_content = "both"
12 |
13 | sys.path.insert(0, os.path.abspath("../../"))
14 |
15 | import oci_mlflow
16 |
17 | version = oci_mlflow.__version__
18 | release = version
19 |
20 |
21 | # -- Project information -----------------------------------------------------
22 |
23 | project = "OCI MLflow"
24 | copyright = (
25 | f"2022, {datetime.datetime.now().year} Oracle and/or its affiliates. "
26 | f"Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/"
27 | )
28 | author = "Oracle Data Science"
29 |
30 | # -- General configuration ---------------------------------------------------
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | "sphinx.ext.napoleon",
37 | "sphinx.ext.autodoc",
38 | "sphinx.ext.doctest",
39 | "sphinx.ext.ifconfig",
40 | "sphinx.ext.todo",
41 | "sphinx.ext.extlinks",
42 | "sphinx.ext.intersphinx",
43 | "nbsphinx",
44 | "sphinx_code_tabs",
45 | "sphinx_copybutton",
46 | "sphinx.ext.duration",
47 | "sphinx.ext.autosummary",
48 | "sphinx.ext.intersphinx",
49 | "sphinx.ext.viewcode",
50 | "sphinx_autorun",
51 | ]
52 |
53 | intersphinx_mapping = {
54 | "python": ("https://docs.python.org/3/", None),
55 | "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
56 | }
57 | intersphinx_disabled_domains = ["std"]
58 |
59 |
60 | # Add any paths that contain templates here, relative to this directory.
61 | templates_path = ["_templates"]
62 |
63 | # Get version
64 | import oci_mlflow
65 |
66 | version = oci_mlflow.__version__
67 | release = version
68 |
69 | # Unless we want to expose real buckets and namespaces
70 | nbsphinx_allow_errors = True
71 |
72 | # List of patterns, relative to source directory, that match files and
73 | # directories to ignore when looking for source files.
74 | # This pattern also affects html_static_path and html_extra_path.
75 | exclude_patterns = ["build", "**.ipynb_checkpoints", "Thumbs.db", ".DS_Store"]
76 |
77 |
78 | # -- Options for HTML output -------------------------------------------------
79 |
80 | # The theme to use for HTML and HTML Help pages. See the documentation for
81 | # a list of builtin themes.
82 | #
83 | html_theme = "furo"
84 | language = "en"
85 |
86 | # Disable the generation of the various indexes
87 | html_use_modindex = False
88 | html_use_index = False
89 |
90 | html_theme_options = {
91 | "light_logo": "logo-light-mode.png",
92 | "dark_logo": "logo-dark-mode.png",
93 | }
94 |
95 | # Add any paths that contain custom static files (such as style sheets) here,
96 | # relative to this directory. They are copied after the builtin static files,
97 | # so a file named "default.css" will overwrite the builtin "default.css".
98 | html_static_path = ["_static"]
99 |
--------------------------------------------------------------------------------
/docs/source/demos_examples.rst:
--------------------------------------------------------------------------------
1 | ================
2 | Demos & Examples
3 | ================
4 |
5 | Please note that the demo videos shared below may not represent Oracle's official views. They have been posted by individual users on their personal channels.
6 |
7 | .. admonition:: Examples
8 | :class: note
9 |
10 | .. list-table::
11 | :widths: 50 50
12 | :header-rows: 1
13 |
14 | * - Demo
15 | - Description
16 |
17 | * - `Run MLflow project on the local environment `__
18 | - | 1. `Installing `__ `GML for CPUs` conda environment
19 | | 2. Introduction to the `MLflow projects `__
20 | | 3. Running `sklearn_elasticnet_wine `__ example
21 | | 4. Checking the experiment's result
22 |
23 | * - `Run MLflow project on the OCI DSC Jobs `__
24 | - | 1. `OCI Data Science Jobs `__
25 | | 2. `Publishing `__ `GML for CPUs` conda environment
26 | | 3. Preparing :ref:`configuration files `
27 | | 4. Running `sklearn_elasticnet_wine `__ example
28 | | 5. Checking the experiment's result
29 |
30 | * - `Run MLflow project on the DSC Jobs with BYOC `__
31 | - | 1. `OCI Data Science Jobs `__
32 | | 2. `Bring Your Own Container `__
33 | | 3. `Publishing a container image `__ to the OCR
34 | | 4. Preparing :ref:`configuration files `
35 | | 5. Running `docker `__ project example
36 | | 6. Checking the experiment's result
37 |
38 | * - `Run MLflow project on the Data Flow service `__
39 | - | 1. `Running Spark Application on OCI Data Flow `__
40 | | 2. `Publishing `__ `PySpark 3.2 and Data Flow` conda environment
41 | | 3. Preparing :ref:`configuration files `
42 | | 4. `Allowing Data Flow to Access a Private Network `__
43 | | 5. Running `pyspark_ml_autologging `__ project example
44 | | 6. Checking the experiment's result
45 |
46 | * - `Deploy MLflow model on OCI Data Science `__
47 | - | 1. `Deploy Model on OCI Data Science `__
48 | | 2. `Publishing `__ `GML for CPUs` conda environment
49 | | 3. Preparing :ref:`configuration files `
50 | | 4. Running `sklearn_elasticnet_wine `__ project example
51 | | 5. Checking the experiment's result
52 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | ==================
2 | OCI MLflow Plugins
3 | ==================
4 |
5 | The OCI MLflow plugins allow to utilize Oracle Cloud Infrastructure (OCI) resources to run MLflow experiments.
6 |
7 | |PyPI|_ |Python|_
8 |
9 | .. |PyPI| image:: https://img.shields.io/pypi/v/oci-mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white
10 | .. _PyPI: https://pypi.org/project/oci-mlflow/
11 | .. |Python| image:: https://img.shields.io/pypi/pyversions/oci-mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white
12 | .. _Python: https://pypi.org/project/oci-mlflow/
13 |
14 |
15 | .. toctree::
16 | :maxdepth: 2
17 | :caption: Contents:
18 |
19 | release_notes
20 | quickstart
21 | concepts
22 | tracking_server
23 | project
24 | model_deployment
25 | demos_examples
26 |
--------------------------------------------------------------------------------
/docs/source/model_deployment.rst:
--------------------------------------------------------------------------------
1 | ====================
2 | Deploy MLflow models
3 | ====================
4 |
5 | OCI Data Science supports two forms of runtime environment for running
6 | inference -
7 |
8 | - Conda Environment packaged using `conda
9 | pack `__
10 | - Container image
11 |
12 | MLflow CLI and SDK can be used for deploying models on OCI Data Science.
13 | The CLI and the SDK API accepts
14 | `target `__
15 | parameter to specify the deployment target. To deploy on OCI, the
16 | specify ``oci-datascience`` as the target.
17 |
18 | .. admonition:: Prerequisites
19 | :class: note
20 |
21 | - pip install oci-mlflow
22 | - pip install oracle-ads[opctl]
23 | - A model is registered in MLflow Tracking server
24 | - The conda pack used for model deployment must have ``mlflow``
25 |
26 | CLI help
27 | --------
28 |
29 | Check CLI options for ``mlfow deploments`` supported with
30 | ``oci-datascience`` target by running -
31 |
32 | ::
33 |
34 | mlflow deployments help -t oci-datascience
35 |
36 | Create Inference Endpoint Using Conda Environments
37 | --------------------------------------------------
38 |
39 | In conda based deployment, the dependencies required to run the model is
40 | packaged using `conda pack `__. OCI Data Science provides curated conda environments to support wide variety of popular machine learning frameworks. To use conda runtime, you would
41 | choose from one of the following options: -
42 |
43 | 1. The service provided conda pack
44 | 2. Build your own conda environment and package it using `conda pack `__ and upload
45 | to object storage bucket . More details on how to manage your own conda packs can be
46 | found `here `__
47 | 3. MLflow CLI with help of ``oci-mlflow`` plugin, can build conda environment from ``conda.yaml`` available in the model
48 | artifact and push it to the object storage. The ``conda.yaml`` file is auto generated by ``mlflow``
49 | when you log/register a model. Before using the autogenerated ``conday.yaml``, verify that it has all the required dependencies. You could create the right ``conda.yaml`` while logging the model by providing the conda dependencies as dictionary input. More information is available in the `API docs `__
50 |
51 |
52 |
53 | Deployment Specification
54 | ~~~~~~~~~~~~~~~~~~~~~~~~
55 |
56 | Create a model deployment specification in YAML file. Refer [schema] for
57 | YAML specification. Copy one of the templates below and customize it as
58 | per your requirement -
59 |
60 | 1. Template to build conda pack on the fly using ``conda.yaml`` stored
61 | in the model artifact.
62 |
63 | .. admonition:: Prerequisites
64 | :class: note
65 |
66 | - Build a local ``OCI Data Science Job`` `compatible docker image `__
67 | - Connect to Object Storage through the Internet
68 |
69 | .. tabs::
70 |
71 | .. code-tab:: yaml
72 |
73 | kind: deployment
74 | spec:
75 | infrastructure:
76 | kind: infrastructure
77 | type: modelDeployment
78 | spec:
79 | logGroupId: ocid1.loggroup.oc1.iad..
80 | logId: ocid1.log.oc1.iad..
81 | projectId: ocid1.datascienceproject.oc1.iad..
82 | compartmentId: ocid1.compartment.oc1..
83 | shapeName: VM.Standard.E3.Flex
84 | shapeConfigDetails:
85 | memoryInGBs: 32
86 | ocpus: 4
87 | blockStorageSize: 50
88 | runtime:
89 | kind: runtime
90 | type: conda
91 | spec:
92 | uri:
93 | name: bc_sklearn_conda
94 | destination: oci://mayoor-dev@ociodscdev/mlflow-conda-envs/
95 | gpu: false
96 | overwrite: false
97 | keepLocal: true
98 | localCondaDir: ./conda
99 | #scoreCode: path/to/score.py [optional: This is required if you want to customize score.py]
100 |
101 | 2. Template to deploy using previously published conda pack.
102 |
103 | .. tabs::
104 |
105 | .. code-tab:: yaml
106 |
107 | kind: deployment
108 | spec:
109 | infrastructure:
110 | kind: infrastructure
111 | type: modelDeployment
112 | spec:
113 | logGroupId: ocid1.loggroup.oc1.iad..
114 | logId: ocid1.log.oc1.iad..
115 | projectId: ocid1.datascienceproject.oc1.iad..
116 | compartmentId: ocid1.compartment.oc1..
117 | shapeName: VM.Standard.E3.Flex
118 | shapeConfigDetails:
119 | memoryInGBs: 32
120 | ocpus: 4
121 | blockStorageSize: 50
122 | runtime:
123 | kind: runtime
124 | type: conda
125 | spec:
126 | uri: oci://bucket@namespace/path/to/conda-env
127 | pythonVersion: 3.9.15
128 | #scoreCode: path/to/score.py [optional: This is required if you want to customize score.py]
129 |
130 | Create Deployment
131 | ~~~~~~~~~~~~~~~~~
132 |
133 | Use MLflow CLI/SDK to create a deployment. Once the deployment
134 | specification is created, pass it as input to the ``mlflow deployments``
135 | command using
136 | ``--config deploy-config-file=`` and
137 | ``oci-datascience`` as the target.
138 |
139 |
140 | .. code-block:: bash
141 |
142 | export MLFLOW_TRACKING_URI=
143 |
144 | mlflow deployments create --name -m models:// -t oci-datascience --config deploy-config-file=deployment_specification.yaml
145 |
146 | Invoke Inference Endpoint
147 | ~~~~~~~~~~~~~~~~~~~~~~~~~
148 |
149 | Invoke the endpoint through code or CLI. Here is an example of invoking
150 | an endpoint using ``oci raw-request`` CLI command -
151 |
152 | ::
153 |
154 | data='{"columns":["mean radius","mean texture","mean perimeter","mean area","mean smoothness","mean compactness","mean concavity","mean concave points"],"index":[0],"data":[[17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471]]}'
155 |
156 | oci raw-request --http-method POST --target-uri https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad../predict --request-body "$data"
157 |
158 | {
159 | "data": {
160 | "predictions": [
161 | 0
162 | ]
163 | },
164 | "headers": {
165 | "Connection": "keep-alive",
166 | "Content-Length": "20",
167 | "Content-Type": "application/json",
168 | "Date": "Wed, 15 Feb 2023 04:26:18 GMT",
169 | "Server": "nginx/1.14.1",
170 | "X-Content-Type-Options": "nosniff",
171 | "opc-request-id": "72BD2656826241C586FD29D9F03EA2E1/D95ADB6267CD5390F9E6D26108E60AF9/907E1377442682A9A72AB1D797056240"
172 | },
173 | "status": "200 OK"
174 | }
175 |
176 | Create Inference endpoint Using Container Images
177 | ------------------------------------------------
178 |
179 | Container image allows you to not just bundle the runtime dependencies,
180 | but also allows you to use the inference serving framework of your
181 | choice. The container has to adhere to following requirements -
182 |
183 | 1. Provides ``/predict`` endpoint for prediction
184 | 2. Provides ``/health`` endpoint for health check
185 | 3. Is published to ``ocir`` registry in your tenancy and the policies
186 | are setup such that OCI Data Science service can pull the image from
187 | your registry. More infomration
188 | `here `__
189 |
190 | In order to adhere to these requirements, you will have to add a reverse
191 | proxy on your container which will map the default endpoint offered by
192 | your model serving framework to ``/predict`` and health endpoint to
193 | ``/health``
194 |
195 | Refer `how to produce a container
196 | image `__
197 | that uses ``mlflow models serve`` framework for model
198 |
199 | .. _prerequisites-1:
200 |
201 | .. admonition:: Prerequisites
202 | :class: note
203 |
204 | - pip install oci-mlflow
205 | - pip install oracle-ads[opctl]
206 | - A model is registered in MLflow Tracking server
207 | - Container image is published to ``ocir``
208 |
209 | .. _deployment-specification-1:
210 |
211 | Deployment Specification
212 | ~~~~~~~~~~~~~~~~~~~~~~~~
213 |
214 | Create a model deployment specification in YAML file. Refer [schema] for
215 | YAML specification. Copy the template below and customize it as per your
216 | requirement -
217 |
218 |
219 | .. tabs::
220 |
221 | .. code-tab:: yaml
222 |
223 | kind: deployment
224 | spec:
225 | infrastructure:
226 | kind: infrastructure
227 | type: modelDeployment
228 | spec:
229 | logGroupId: ocid1.loggroup.oc1.iad..
230 | logId: ocid1.log.oc1.iad..
231 | projectId: ocid1.datascienceproject.oc1.iad..
232 | compartmentId: ocid1.compartment.oc1..
233 | shapeName: VM.Standard.E3.Flex
234 | shapeConfigDetails:
235 | memoryInGBs: 32
236 | ocpus: 4
237 | blockStorageSize: 50
238 | replica: 1
239 | runtime:
240 | kind: runtime
241 | type: container
242 | spec:
243 | image: {region}.ocir.io//
244 | serverPort: 5001
245 | healthCheckPort: 5001
246 |
247 | .. _create-deployment-1:
248 |
249 | Create Deployment
250 | ~~~~~~~~~~~~~~~~~
251 |
252 | Use MLflow CLI/SDK to create a deployment. Once the deployment
253 | specification is created, pass it as input to the ``mlflow deployments``
254 | command using
255 | ``--config deploy-config-file=`` and
256 | ``oci-datascience`` as the target.
257 |
258 | .. tabs::
259 |
260 | .. code-tab:: shell
261 |
262 | export MLFLOW_TRACKING_URI=
263 |
264 | mlflow deployments create --name -m models:// -t oci-datascience --config deploy-config-file=deployment_specification.yaml
265 |
266 | .. _invoke-inference-endpoint-1:
267 |
268 | Invoke Inference Endpoint
269 | ~~~~~~~~~~~~~~~~~~~~~~~~~
270 |
271 | Invoke the endpoint through code or CLI. Here is an example of invoking
272 | an endpoint using ``oci raw-request`` CLI command -
273 |
274 | ::
275 |
276 | data='{"dataframe_split": {"columns":["mean radius","mean texture","mean perimeter","mean area","mean smoothness","mean compactness","mean concavity","mean concave points"],"index":[0],"data":[[17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471]]}}'
277 |
278 | oci raw-request --http-method POST --target-uri https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad../predict --request-body "$data"
279 |
280 | {
281 | "data": {
282 | "predictions": [
283 | 0
284 | ]
285 | },
286 | "headers": {
287 | "Connection": "keep-alive",
288 | "Content-Length": "20",
289 | "Content-Type": "application/json",
290 | "Date": "Wed, 15 Feb 2023 04:26:18 GMT",
291 | "Server": "nginx/1.14.1",
292 | "X-Content-Type-Options": "nosniff",
293 | "opc-request-id": "72BD2656826241C586FD29D9F03EA2E1/D95ADB6267CD5390F9E6D26108E60AF9/907E1377442682A9A72AB1D797056240"
294 | },
295 | "status": "200 OK"
296 | }
297 |
298 | Update Model Deployment Details
299 | -------------------------------
300 |
301 | To update model deployment configuration use the YAML specification file
302 | that was created for deployment and then make changes to the attributes
303 | that you want to change. Use mlflow CLI/SDK and provide the OCID of the
304 | model deployment for name parameter and use
305 | ``--config deploy-config-file=`` option.
306 |
307 | ::
308 |
309 | mlflow deployments update --name ocid1.datasciencemodeldeployment.oc1.. -t oci-datascience --config deploy-config-file=./deployment_update_config.yaml
310 |
311 | **Note:** You may not be able to change all the configuration in one
312 | pass. Check `Editing Model
313 | Deployments `__
314 | for more details.
315 |
316 | Get Model Deployment Information
317 | --------------------------------
318 |
319 | Fetch Model deployment information for any OCI by providing OCID of the
320 | model deployment for name parameter.
321 |
322 | ::
323 |
324 | mlflow deployments get -t oci-datascience --name ocid1.datasciencemodeldeployment.oc1..
325 |
--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
1 | ##########
2 | Quickstart
3 | ##########
4 |
5 | `MLflow `_ is a popular open source platform to manage the ML lifecycle, including
6 | experimentation, reproducibility, deployment, and a central model registry. MLflow currently offers four components:
7 |
8 | - MLflow Tracking *(experiment tracking)*
9 | - MLflow Projects *(code packaging format for reproducible runs using Conda on Data Science Jobs and Data Flow)*
10 | - MLflow Models *(package models for deployment in real time scoring, and batch scoring)*
11 | - Model Registry *(manage models)*
12 |
13 | Using MLflow with `Oracle Cloud Infrastructure (OCI) Data Science `_ you will first need to install the Oracle OCI MLflow plugin:
14 |
15 | .. note::
16 |
17 | The OCI MLflow plugin will also install (if necessary) the ``mlflow`` and ``oracle-ads`` packages
18 |
19 | .. list-table::
20 | :widths: 25 75
21 | :header-rows: 1
22 | :align: left
23 |
24 | * - Package Name
25 | - Latest Version
26 | * - MLflow
27 | - .. image:: https://img.shields.io/pypi/v/mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white
28 | * - oracle-ads
29 | - .. image:: https://img.shields.io/pypi/v/oracle-ads.svg?style=for-the-badge&logo=pypi&logoColor=white
30 |
31 |
32 | - Install the ``oci-mlflow`` plugin
33 |
34 | .. code-block:: shell
35 |
36 | pip install oci-mlflow
37 |
38 | - Test ``oci-mlflow`` plugin setup
39 |
40 | .. code-block:: shell
41 |
42 | mlflow deployments help -t oci-datascience
43 |
44 |
45 | Background reading to understand the concepts of MLflow and OCI Data Science:
46 |
47 | - Getting started with `OCI Data Science Jobs `__
48 | - Getting started with `Oracle Accelerated Data Science SDK `__ to simplify `creating `__ and `running `__ Jobs
49 | - Getting started with `Data Science Environments `__
50 | - Getting started with `Custom Conda Environments `__
51 |
52 | **Authentication and Policies:**
53 |
54 | - Getting started with `OCI Data Science Policies `__
55 | - `API Key-Based Authentication `__ - ``api_key``
56 | - `Resource Principal Authentication `__ - ``resource_principal``
57 | - `Instance Principal Authentication `__ - ``instance_principal``
58 |
59 | **OCI Integration Points**
60 |
61 | The ``oci_mlflow`` plugin enables OCI users to use OCI resources to manage their machine learning usecase life cycle. This
62 | table below provides the mapping between the MLflow features and the OCI resources that are used.
63 |
64 | .. note::
65 | .. list-table::
66 | :widths: 15 10
67 | :header-rows: 1
68 | :align: left
69 |
70 | * - MLflow Use Case
71 | - OCI Resource
72 | * - User running machine learning experiments on notebook, logs model artifacts, model performance etc
73 | - Data Science Jobs, Object Storage, MySQL
74 | * - Batch workloads using spark
75 | - Data Flow, Object Storage, MySQL
76 | * - Model Deployment
77 | - Data Science Model Deployment
78 | * - User running machine learning experiments on notebook, logs model artifacts, model performance etc
79 | - Object Storage, MySQL
80 |
--------------------------------------------------------------------------------
/docs/source/release_notes.rst:
--------------------------------------------------------------------------------
1 | =============
2 | Release Notes
3 | =============
4 |
5 | 1.0.2
6 | -----
7 | Release date: Jul 27, 2023
8 |
9 | **New Features and Enhancements:**
10 |
11 | * Changed the default authentication to the resource principal.
12 |
13 |
14 | 1.0.1
15 | -----
16 | Release date: Jun 15, 2023
17 |
18 | **New Features and Enhancements:**
19 |
20 | * Updated the ``README-development.md`` file for better clarity and ease of use.
21 | * Improved the ``Dockerfile`` to provide the option of running the tracking server using a local ``oci-mlflow`` wheel.
22 | * Refactored Object Storage (OS) plugin to leverage Oracle Cloud Infrastructure (OCI) `OS UploadManager `__, enhancing the functionality and improving performance.
23 |
24 | **Bug Fixes:**
25 |
26 | * Fixed the issue with ``launch_mlflow.sh`` where the copyright information was added in the wrong place, resulting in an error when running ``launch_mlflow.sh``.
--------------------------------------------------------------------------------
/oci_mlflow/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*--
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import json
8 | import logging
9 | import os
10 |
11 | logger = logging.getLogger("oci.mlflow")
12 | logger.setLevel(logging.INFO)
13 |
14 | # https://packaging.python.org/en/latest/guides/single-sourcing-package-version/#single-sourcing-the-package-version
15 | from importlib import metadata
16 |
17 | __version__ = metadata.version("oci_mlflow")
18 |
19 |
20 | def setup_default_auth():
21 | """Setup default auth."""
22 | if os.environ.get("OCIFS_IAM_TYPE") and os.environ.get("OCI_IAM_TYPE"):
23 | return
24 | if os.environ.get("OCIFS_IAM_TYPE"):
25 | os.environ["OCI_IAM_TYPE"] = os.environ["OCIFS_IAM_TYPE"]
26 | elif os.environ.get("OCI_IAM_TYPE"):
27 | os.environ["OCIFS_IAM_TYPE"] = os.environ["OCI_IAM_TYPE"]
28 | elif os.environ.get("OCI_RESOURCE_PRINCIPAL_VERSION"):
29 | os.environ["OCIFS_IAM_TYPE"] = "resource_principal"
30 | os.environ["OCI_IAM_TYPE"] = "resource_principal"
31 | else:
32 | os.environ["OCIFS_IAM_TYPE"] = "api_key"
33 | os.environ["OCI_IAM_TYPE"] = "api_key"
34 |
35 |
36 | setup_default_auth()
37 |
--------------------------------------------------------------------------------
/oci_mlflow/auth_plugin.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*--
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | from ads.common.auth import default_signer
8 | from mlflow.tracking.request_auth.abstract_request_auth_provider import RequestAuthProvider
9 |
10 | OCI_REQUEST_AUTH = "OCI_REQUEST_AUTH"
11 |
12 |
13 | class OCIMLFlowAuthRequestProvider(RequestAuthProvider):
14 |
15 | def get_name(self):
16 | """
17 | Get the name of the request auth provider.
18 |
19 | :return: str of request auth name
20 | """
21 | return OCI_REQUEST_AUTH
22 |
23 | def get_auth(self):
24 | """
25 | Generate oci signer based on oci environment variable.
26 |
27 | :return: OCI MLFlow signer
28 | """
29 | return default_signer()["signer"]
30 |
--------------------------------------------------------------------------------
/oci_mlflow/oci_object_storage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*--
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import os
8 | from typing import List
9 | from urllib.parse import urlparse
10 |
11 | import fsspec
12 | from ads.common import auth
13 | from ads.common.oci_client import OCIClientFactory
14 | from mlflow.entities import FileInfo
15 | from mlflow.store.artifact.artifact_repo import ArtifactRepository
16 | from mlflow.utils.file_utils import relative_path_to_artifact_path
17 | from oci import object_storage
18 | from oci.auth.signers import InstancePrincipalsDelegationTokenSigner
19 | from ocifs import OCIFileSystem
20 |
21 | from oci_mlflow import logger
22 |
23 | OCI_SCHEME = "oci"
24 | OCI_PREFIX = f"{OCI_SCHEME}://"
25 | DEFAULT_DELEGATION_TOKEN_PATH = "/opt/spark/delegation-secrets/delegation.jwt"
26 | DELEGATION_TOKEN_PATH = "DELEGATION_TOKEN_PATH"
27 |
28 |
29 | def parse_os_uri(uri: str):
30 | """
31 | Parse an OCI object storage URI, returning tuple (bucket, namespace, path).
32 |
33 | Parameters
34 | ----------
35 | uri: str
36 | The OCI Object Storage URI.
37 |
38 | Returns
39 | -------
40 | Tuple
41 | The (bucket, ns, type)
42 |
43 | Raise
44 | -----
45 | Exception
46 | If provided URI is not an OCI OS bucket URI.
47 | """
48 | parsed = urlparse(uri)
49 | if parsed.scheme.lower() != OCI_SCHEME:
50 | raise Exception("Not an OCI object storage URI: %s" % uri)
51 | path = parsed.path
52 |
53 | if path.startswith("/"):
54 | path = path[1:]
55 |
56 | bucket, ns = parsed.netloc.split("@")
57 |
58 | return bucket, ns, path
59 |
60 |
61 | def get_token_path():
62 | """
63 | Gets delegation token path.
64 |
65 | Return
66 | ------
67 | str
68 | The delegation token path.
69 | """
70 | token_path = (
71 | DEFAULT_DELEGATION_TOKEN_PATH
72 | if os.path.exists(DEFAULT_DELEGATION_TOKEN_PATH)
73 | else os.environ.get(DELEGATION_TOKEN_PATH)
74 | )
75 | return token_path
76 |
77 |
78 | def get_delegation_token_signer(token_path: str):
79 | """
80 | Generate delegation token signer.
81 |
82 | Parameters
83 | ----------
84 | token_path: str
85 | The delegation token path.
86 |
87 | Return
88 | ------
89 | oci.auth.signers.InstancePrincipalsDelegationTokenSigner
90 | The delegation token signer.
91 |
92 | """
93 | with open(token_path) as fd:
94 | delegation_token = fd.read()
95 | signer = InstancePrincipalsDelegationTokenSigner(delegation_token=delegation_token)
96 | return signer
97 |
98 |
99 | def get_signer(token_path: str = None):
100 | """
101 | Generate default_signer. If running in Data Flow, use InstancePrincipalsDelegationTokenSigner.
102 | If running locally, use default signer.
103 |
104 | Parameters
105 | ----------
106 | token_path: str
107 | Defaults to None. The delegation token path.
108 |
109 | Return
110 | ------
111 | dict
112 | Contains keys - config, signer and client_kwargs.
113 |
114 | - The config contains the config loaded from the configuration loaded from the default location if the default
115 | auth mode is API keys, otherwise it is empty dictionary.
116 | - The signer contains the signer object created from default auth mode.
117 | - client_kwargs contains the `client_kwargs` that was passed in as input parameter.
118 |
119 | """
120 | if token_path:
121 | auth.set_auth(
122 | signer_callable=get_delegation_token_signer,
123 | signer_kwargs={"token_path": token_path},
124 | )
125 | return auth.default_signer()
126 |
127 |
128 | class ArtifactUploader:
129 | """
130 | The class helper to upload model artifacts.
131 |
132 | Attributes
133 | ----------
134 | upload_manager: UploadManager
135 | The uploadManager simplifies interaction with the Object Storage service.
136 | """
137 |
138 | def __init__(self):
139 | """Initializes `ArtifactUploader` instance."""
140 | self.upload_manager = object_storage.UploadManager(
141 | OCIClientFactory(**get_signer(token_path=get_token_path())).object_storage
142 | )
143 |
144 | def upload(self, file_path: str, dst_path: str):
145 | """Uploads model artifacts.
146 |
147 | Parameters
148 | ----------
149 | file_path: str
150 | The source file path.
151 | dst_path: str
152 | The destination path.
153 | """
154 | bucket_name, namespace_name, object_name = parse_os_uri(dst_path)
155 | logger.debug(f"{bucket_name=}, {namespace_name=}, {object_name=}")
156 | response = self.upload_manager.upload_file(
157 | namespace_name=namespace_name,
158 | bucket_name=bucket_name,
159 | object_name=object_name,
160 | file_path=file_path,
161 | )
162 | logger.debug(response)
163 |
164 |
165 | class OCIObjectStorageArtifactRepository(ArtifactRepository):
166 | """MLFlow Plugin implementation for storing artifacts to OCI Object Storage."""
167 |
168 | def _download_file(self, remote_file_path, local_path):
169 | if not remote_file_path.startswith(self.artifact_uri):
170 | full_path = os.path.join(self.artifact_uri, remote_file_path)
171 | else:
172 | full_path = remote_file_path
173 | fs: OCIFileSystem = self.get_fs()
174 | logger.info(f"{full_path}, {remote_file_path}")
175 | fs.download(full_path, str(local_path))
176 |
177 | def log_artifact(self, local_file: str, artifact_path: str = None):
178 | """
179 | Logs a local file as an artifact, optionally taking an ``artifact_path`` to place it in
180 | within the run's artifacts. Run artifacts can be organized into directories, so you can
181 | place the artifact in a directory this way.
182 |
183 | Parameters
184 | ----------
185 | local_file:str
186 | Path to artifact to log.
187 | artifact_path:str
188 | Directory within the run's artifact directory in which to log the artifact.
189 | """
190 | # Since the object storage path should contain "/", the code below needs to use concatenation "+" instead of
191 | # os.path.join(). The latter can introduce "\" in Windows which can't be recognized by object storage as a valid prefix.
192 | # `artifact_path` must not be space character like " " or " ".
193 | if isinstance(artifact_path, str) and artifact_path.isspace():
194 | raise ValueError("`artifact_path` must not be whitespace string.")
195 | artifact_path = artifact_path.rstrip("/") + "/" if artifact_path else ""
196 | dest_path = self.artifact_uri.rstrip("/") + "/" + artifact_path + os.path.basename(local_file)
197 | ArtifactUploader().upload(local_file, dest_path)
198 |
199 | def log_artifacts(self, local_dir: str, artifact_path: str = None):
200 | """
201 | Logs the files in the specified local directory as artifacts, optionally taking
202 | an ``artifact_path`` to place them in within the run's artifacts.
203 |
204 | Parameters
205 | ----------
206 | local_dir:str
207 | Directory of local artifacts to log.
208 | artifact_path:str
209 | Directory within the run's artifact directory in which to log the artifacts.
210 | """
211 | artifact_uploader = ArtifactUploader()
212 | # Since the object storage path should contain "/", the code below needs to use concatenation "+" instead of
213 | # os.path.join(). The latter can introduce "\" in Windows which can't be recognized by object storage as a valid prefix.
214 | # `artifact_path` must not be space character like " " or " ".
215 | if isinstance(artifact_path, str) and artifact_path.isspace():
216 | raise ValueError("`artifact_path` must not be whitespace string.")
217 | artifact_path = artifact_path.rstrip("/") + "/" if artifact_path else ""
218 | dest_path = self.artifact_uri.rstrip("/") + "/" + artifact_path
219 | local_dir = os.path.abspath(local_dir)
220 |
221 | for root, _, filenames in os.walk(local_dir):
222 | upload_path = dest_path
223 | if root != local_dir:
224 | rel_path = os.path.relpath(root, local_dir)
225 | rel_path = relative_path_to_artifact_path(rel_path)
226 | upload_path = dest_path + rel_path
227 | for f in filenames:
228 | artifact_uploader.upload(
229 | file_path=os.path.join(root, f),
230 | dst_path=upload_path + f
231 | )
232 |
233 | def get_fs(self):
234 | """
235 | Gets fssepc filesystem based on the uri scheme.
236 | """
237 | self.fs = fsspec.filesystem(
238 | urlparse(self.artifact_uri).scheme,
239 | **get_signer(token_path=get_token_path()),
240 | ) # FileSystem class corresponding to the URI scheme.
241 |
242 | return self.fs
243 |
244 | def list_artifacts(self, path: str = "") -> List[FileInfo]:
245 | """
246 | Return all the artifacts for this run_id directly under path. If path is a file, returns
247 | an empty list. Will error if path is neither a file nor directory.
248 |
249 | Parameters
250 | ----------
251 | path:str
252 | Relative source path that contains desired artifacts
253 |
254 | Returns
255 | -------
256 | List[FileInfo]
257 | List of artifacts as FileInfo listed directly under path.
258 | """
259 | result = []
260 | dest_path = self.artifact_uri
261 | if path:
262 | dest_path = os.path.join(dest_path, path)
263 |
264 | logger.debug(f"{path=}, {self.artifact_uri=}, {dest_path=}")
265 |
266 | fs = self.get_fs()
267 | files = (
268 | os.path.relpath(f"{OCI_PREFIX}{f}", self.artifact_uri)
269 | for f in fs.glob(f"{dest_path}/*")
270 | )
271 |
272 | for file in files:
273 | file_isdir = fs.isdir(os.path.join(self.artifact_uri, file))
274 | size = 0
275 | if not file_isdir:
276 | size = fs.info(os.path.join(self.artifact_uri, file)).get("size", 0)
277 | result.append(FileInfo(file, file_isdir, size))
278 |
279 | logger.debug(f"{result=}")
280 |
281 | result.sort(key=lambda f: f.path)
282 | return result
283 |
284 | def delete_artifacts(self, artifact_path: str = None):
285 | """
286 | Delete the artifacts at the specified location.
287 | Supports the deletion of a single file or of a directory. Deletion of a directory
288 | is recursive.
289 |
290 | Parameters
291 | ----------
292 | artifact_path: str
293 | Path of the artifact to delete.
294 | """
295 | dest_path = self.artifact_uri
296 | if artifact_path:
297 | dest_path = os.path.join(self.artifact_uri, artifact_path)
298 | fs = self.get_fs()
299 | files = fs.ls(dest_path, refresh=True)
300 | for to_delete_obj in files:
301 | fs.delete(to_delete_obj)
302 |
--------------------------------------------------------------------------------
/oci_mlflow/telemetry_logging.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import re
8 | import os
9 | from dataclasses import dataclass
10 | from typing import Any, Callable
11 | from functools import wraps
12 |
13 | TELEMETRY_ARGUMENT_NAME = "telemetry"
14 |
15 |
16 | def telemetry(
17 | entry_point: str = "",
18 | name: str = "oci.mlflow",
19 | environ_variable: str = "EXTRA_USER_AGENT_INFO",
20 | ) -> Callable:
21 | """The telemetry decorator.
22 |
23 | Parameters
24 | ----------
25 | entry_point: str
26 | The entry point of the telemetry.
27 | Example: "plugin=project&action=run"
28 | name: str
29 | The name of the telemetry.
30 | environ_variable: (str, optional). Defaults to `EXTRA_USER_AGENT_INFO`.
31 | The name of the environment variable to capture the telemetry sequence.
32 |
33 | Examples
34 | --------
35 | >>> @telemetry(entry_point="plugin=project&action=run",name="oci.mlflow")
36 | ... def test_function(**kwargs)
37 | ... print(kwargs.pop("telemetry"))
38 | """
39 |
40 | def decorator(func: Callable) -> Callable:
41 | @wraps(func)
42 | def wrapper(*args, **kwargs) -> Any:
43 | telemetry = Telemetry(name=name, environ_variable=environ_variable).begin(
44 | entry_point
45 | )
46 | try:
47 | # Injects the telemetry object to the kwargs arguments of the decorated function.
48 | # This is necessary to be able to add some extra information to the telemetry
49 | # from the decorated function.
50 | return func(*args, **{**kwargs, **{TELEMETRY_ARGUMENT_NAME: telemetry}})
51 | except:
52 | raise
53 | finally:
54 | telemetry.clean()
55 |
56 | return wrapper
57 |
58 | return decorator
59 |
60 |
61 | @dataclass
62 | class Telemetry:
63 | """Class to capture telemetry sequence into the environment variable.
64 | It is doing nothing but adding the telemetry sequence in the specified environment variable.
65 |
66 | Attributes
67 | ----------
68 | name: str
69 | The name of the telemetry.
70 | environ_variable: (str, optional). Defaults to `EXTRA_USER_AGENT_INFO`.
71 | The name of the environment variable to capture the telemetry sequence.
72 | """
73 |
74 | name: str
75 | environ_variable: str = "EXTRA_USER_AGENT_INFO"
76 |
77 | def __post_init__(self):
78 | self.name = self._prepare(self.name)
79 | os.environ[self.environ_variable] = ""
80 |
81 | def clean(self) -> "Telemetry":
82 | """Cleans the associated environment variable.
83 |
84 | Returns
85 | -------
86 | self: Telemetry
87 | An instance of the Telemetry.
88 | """
89 | os.environ[self.environ_variable] = ""
90 | return self
91 |
92 | def _begin(self):
93 | self.clean()
94 | os.environ[self.environ_variable] = self.name
95 |
96 | def begin(self, value: str = "") -> "Telemetry":
97 | """The method that needs to be invoked in the beginning of the capturing telemetry sequence.
98 | It resets the value of the associated environment variable.
99 |
100 | Parameters
101 | ----------
102 | value: str
103 | The value that need to be added to the telemetry.
104 |
105 | Returns
106 | -------
107 | self: Telemetry
108 | An instance of the Telemetry.
109 | """
110 | return self.clean().add(self.name).add(value)
111 |
112 | def add(self, value: str) -> "Telemetry":
113 | """Adds the new value to the telemetry.
114 |
115 | Parameters
116 | ----------
117 | value: str
118 | The value that need to be added to the telemetry.
119 |
120 | Returns
121 | -------
122 | self: Telemetry
123 | An instance of the Telemetry.
124 | """
125 | if not os.environ.get(self.environ_variable):
126 | self._begin()
127 |
128 | if value:
129 | current_value = os.environ.get(self.environ_variable, "")
130 | new_value = self._prepare(value)
131 | if new_value not in current_value:
132 | os.environ[self.environ_variable] = f"{current_value}&{new_value}"
133 | return self
134 |
135 | def print(self) -> None:
136 | """Prints the telemetry sequence from environment variable."""
137 | print(f"{self.environ_variable} = {os.environ.get(self.environ_variable)}")
138 |
139 | def _prepare(self, value: str):
140 | """Replaces the special characters with the `_` in the input string."""
141 | return (
142 | re.sub("[^a-zA-Z0-9\.\-\_\&\=]", "_", re.sub(r"\s+", " ", value))
143 | if value
144 | else ""
145 | )
146 |
--------------------------------------------------------------------------------
/oci_mlflow/templates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/oci_mlflow/templates/__init__.py
--------------------------------------------------------------------------------
/oci_mlflow/templates/project_description.jinja2:
--------------------------------------------------------------------------------
1 | {% if job_info["spec"]["infrastructure"]["type"] == "dataScienceJob" %}
2 | ## OCI Data Science Job Details
3 | #### **Job:** [{{job_info["spec"]["name"]}}](https://cloud.oracle.com/data-science/jobs/{{job_info["spec"]["id"]}})
4 | #### **Job Run:** [{{job_run_info["displayName"]}}](https://cloud.oracle.com/data-science/job-runs/{{job_run_info["id"]}})
5 | #### **Compute instance shape:** {{job_info["spec"]["infrastructure"]["spec"]["shapeName"]}}
6 | #### **Storage:** {{job_info["spec"]["infrastructure"]["spec"]["blockStorageSize"]}} GB
7 | {% else %}
8 | ## OCI Data Flow Application Details
9 | #### **Application:** [{{job_info["spec"]["name"]}}](https://cloud.oracle.com/data-flow/apps/details/{{job_info["spec"]["id"]}})
10 | #### **Application Run:** [{{job_run_info["displayName"]}}](https://cloud.oracle.com/data-flow/runs/details/{{job_run_info["id"]}})
11 | #### **Driver shape:** {{job_info["spec"]["infrastructure"]["spec"]["driverShape"]}}
12 | #### **Executor shape:** {{job_info["spec"]["infrastructure"]["spec"]["executorShape"]}}
13 | #### **Number of executors:** {{job_info["spec"]["infrastructure"]["spec"]["numExecutors"]}}
14 | #### **Spark version:** {{job_info["spec"]["infrastructure"]["spec"]["sparkVersion"]}}
15 | {% endif %}
16 |
--------------------------------------------------------------------------------
/oci_mlflow/templates/runtime.yaml.jinja2:
--------------------------------------------------------------------------------
1 | MODEL_ARTIFACT_VERSION: '3.0'
2 | MODEL_DEPLOYMENT:
3 | INFERENCE_CONDA_ENV:
4 | INFERENCE_ENV_PATH: {{conda_pack_uri}}
5 | INFERENCE_PYTHON_VERSION: {{python_version}}
6 |
--------------------------------------------------------------------------------
/oci_mlflow/templates/score.py.jinja2:
--------------------------------------------------------------------------------
1 | import mlflow.pyfunc
2 | import pandas as pd
3 | import os
4 | import numpy as np
5 |
6 |
7 | def load_model():
8 | cur_dir = os.path.dirname(os.path.realpath(__file__))
9 | return mlflow.pyfunc.load_model(cur_dir)
10 |
11 |
12 | def predict(data, model=load_model()):
13 | if isinstance(data, dict):
14 | payload = pd.DataFrame(**data)
15 | else:
16 | payload = np.array(data)
17 | pred = model.predict(payload).tolist()
18 | return {'prediction': pred}
19 |
--------------------------------------------------------------------------------
/oci_mlflow/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import inspect
8 | import os
9 | from dataclasses import dataclass
10 | from typing import Dict, Union
11 |
12 | import ads
13 | import ocifs
14 | import yaml
15 | from ads.common.auth import AuthType, default_signer
16 | from ads.opctl.conda.cmds import _create, _publish
17 | from ads.opctl.config.base import ConfigProcessor
18 | from ads.opctl.config.merger import ConfigMerger
19 | from ads.opctl.constants import DEFAULT_ADS_CONFIG_FOLDER
20 | from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE
21 |
22 | from oci_mlflow import __version__, logger
23 |
24 | OCIFS_IAM_TYPE = "OCIFS_IAM_TYPE"
25 | WORK_DIR = "{work_dir}"
26 |
27 | DEFAULT_TAGS = {"oracle_ads": ads.__version__, "oci_mlflow": __version__}
28 |
29 |
30 | class UnsupportedAuthTypeError(Exception):
31 | def __init__(self, auth_type: str):
32 | super().__init__(
33 | f"The provided authentication type: {auth_type} is not supported. "
34 | f"Allowed values are: {AuthType.values()}"
35 | )
36 |
37 |
38 | @dataclass
39 | class OCIBackendConfig:
40 | """Class representing OCI config.
41 |
42 | Attributes
43 | ----------
44 | oci_auth: str
45 | OCI auth type.
46 | oci_config_path: str
47 | Path to the OCI auth config.
48 | oci_profile: str
49 | The OCI auth profile.
50 | """
51 |
52 | oci_auth: str = ""
53 | oci_config_path: str = ""
54 | oci_profile: str = ""
55 |
56 | def __post_init__(self):
57 | self._validate()
58 |
59 | def _validate(self):
60 |
61 | # authentication type
62 | self.oci_auth = (
63 | self.oci_auth
64 | or os.environ.get(OCIFS_IAM_TYPE)
65 | or AuthType.RESOURCE_PRINCIPAL
66 | )
67 | if self.oci_auth not in AuthType:
68 | raise UnsupportedAuthTypeError(self.oci_auth)
69 |
70 | # OCI AUTH config path
71 | self.oci_config_path = self.oci_config_path or DEFAULT_LOCATION
72 |
73 | # OCI AUTH profile
74 | self.oci_profile = self.oci_profile or DEFAULT_PROFILE
75 |
76 | @classmethod
77 | def from_dict(cls, config: Dict[str, str]) -> "OCIBackendConfig":
78 | """Creates an instance of the OCIBackendConfig class from a dictionary.
79 |
80 | Parameters
81 | ----------
82 | config: Dict[str, str]
83 | List of properties and values in dictionary format.
84 |
85 | Returns
86 | -------
87 | OCIBackendConfig
88 | Instance of the OCIBackendConfig.
89 | """
90 | if not config:
91 | return cls()
92 |
93 | return cls(
94 | **{
95 | k: v
96 | for k, v in config.items()
97 | if k in inspect.signature(cls).parameters
98 | }
99 | )
100 |
101 |
102 | @dataclass
103 | class OCIProjectBackendConfig(OCIBackendConfig):
104 | """Class representing OCI project backend config.
105 |
106 | Attributes
107 | ----------
108 | oci_job_template_path: str
109 | Path to the Job template YAML.
110 | project_uri: str
111 | The project content location.
112 | work_dir: str
113 | The project work dir.
114 | """
115 |
116 | oci_job_template_path: str = ""
117 | project_uri: str = ""
118 | work_dir: str = ""
119 |
120 | def __post_init__(self):
121 | super()._validate()
122 | self._validate()
123 |
124 | def _validate(self):
125 |
126 | # project URI
127 | if not self.project_uri:
128 | raise ValueError("The `project_uri` is not provided.")
129 |
130 | # work dir
131 | if not self.work_dir:
132 | raise ValueError("The `work_dir` is not provided.")
133 | self.work_dir = os.path.abspath(os.path.expanduser(self.work_dir))
134 |
135 | # Job template path
136 | if not self.oci_job_template_path:
137 | raise ValueError(
138 | "The `oci_job_template_path` is not provided in `oci-config.json`."
139 | )
140 | self.oci_job_template_path = os.path.abspath(
141 | os.path.expanduser(
142 | self.oci_job_template_path.replace(WORK_DIR, self.work_dir)
143 | )
144 | )
145 |
146 | if not os.path.exists(self.oci_job_template_path):
147 | raise ValueError(f"The `{self.oci_job_template_path}` does not exist.")
148 |
149 | if not self.oci_job_template_path.lower().endswith((".yml", ".yaml")):
150 | raise ValueError(
151 | f"Unsupported file format for the `{self.oci_job_template_path}`. "
152 | "Allowed formats are: [.yaml, .yml]"
153 | )
154 |
155 |
156 | def generate_slug(name: str, version: str) -> str:
157 | return f"{name}_v{version}".replace(" ", "").replace(".", "_").lower()
158 |
159 |
160 | def generate_conda_pack_uri(
161 | name: str, version: str, conda_pack_os_prefix: str, slug: str, gpu: bool
162 | ) -> str:
163 | return os.path.join(
164 | conda_pack_os_prefix,
165 | "gpu" if gpu else "cpu",
166 | name,
167 | version,
168 | slug,
169 | )
170 |
171 |
172 | def create_conda(
173 | name: str,
174 | version: str = "1",
175 | environment_file: str = None,
176 | conda_pack_folder: str = None,
177 | gpu: bool = False,
178 | overwrite: bool = False,
179 | ) -> str:
180 | """
181 | Creates conda pack and returns slug name
182 | """
183 | logger.info("Creating conda environment with details - ")
184 | with open(environment_file) as ef:
185 | logger.info(ef.read())
186 | return _create(name, version, environment_file, conda_pack_folder, gpu, overwrite)
187 |
188 |
189 | # TODO: Move conda create and publish to ADS - https://jira.oci.oraclecorp.com/browse/ODSC-38641
190 | def publish(
191 | slug: str,
192 | conda_pack_os_prefix: str,
193 | conda_pack_folder: str,
194 | overwrite: bool,
195 | ads_config: str = DEFAULT_ADS_CONFIG_FOLDER,
196 | name: str = " ",
197 | version: str = "1",
198 | gpu: bool = False,
199 | ):
200 | """
201 | Publishes the conda pack to object storage
202 |
203 | TODO: Remove name and version parameter once ADS publish method is updated to return conda pack URI
204 | """
205 | logger.info(
206 | f"Publishing conda environment to object storage: {conda_pack_os_prefix}"
207 | )
208 | p = ConfigProcessor().step(ConfigMerger, ads_config=ads_config)
209 | exec_config = p.config["execution"]
210 | # By default the publish uses container to zip and upload the artifact.
211 | # Setting the environment variable to use host to upload the artifact.
212 | publish_option = os.environ.get("NO_CONTAINER")
213 | os.environ["NO_CONTAINER"] = "True"
214 | _publish(
215 | conda_slug=slug,
216 | conda_uri_prefix=conda_pack_os_prefix,
217 | conda_pack_folder=conda_pack_folder,
218 | overwrite=overwrite,
219 | oci_config=exec_config.get("oci_config"),
220 | oci_profile=exec_config.get("oci_profile"),
221 | auth_type=exec_config["auth"],
222 | )
223 | if publish_option:
224 | os.environ["NO_CONTAINER"] = publish_option
225 | else:
226 | os.environ.pop("NO_CONTAINER", None)
227 |
228 | return generate_conda_pack_uri(name, version, conda_pack_os_prefix, slug, gpu)
229 |
230 |
231 | def build_and_publish_conda_pack(
232 | name: str,
233 | version: str,
234 | environment_file: str,
235 | conda_pack_folder: str,
236 | conda_pack_os_prefix: str,
237 | gpu: bool = False,
238 | overwrite: bool = False,
239 | ads_config: str = DEFAULT_ADS_CONFIG_FOLDER,
240 | ):
241 | """
242 | * If overwrite then create and publish always
243 | * If not overwrite and conda_os_uri exists, skip create and publish. let user know
244 | * If not overwrite and conda_os_uri does not exsits, but local conda pack exists, found local environment, publishing from local copy
245 |
246 | """
247 | slug = generate_slug(name, version)
248 | conda_pack_uri = generate_conda_pack_uri(
249 | name=name,
250 | version=version,
251 | conda_pack_os_prefix=conda_pack_os_prefix,
252 | slug=slug,
253 | gpu=gpu,
254 | )
255 | fs = ocifs.OCIFileSystem(**default_signer())
256 | if fs.exists(conda_pack_uri) and not overwrite:
257 | logger.info(
258 | f"Conda pack exists at {conda_pack_uri}. Skipping build and publish. If you want to overwrite, set overwrite to true"
259 | )
260 | else:
261 | if os.path.exists(os.path.join(conda_pack_folder, slug)) and not overwrite:
262 | logger.info(
263 | f"Found an environment at {os.path.join(conda_pack_folder, slug)} which matches the name and version. Change version to create a new pack or set overwrite to true"
264 | )
265 | else:
266 | create_conda(
267 | name, version, environment_file, conda_pack_folder, gpu, overwrite
268 | )
269 | logger.info(
270 | f"Created conda pack at {os.path.join(conda_pack_folder, slug)}"
271 | )
272 | conda_pack_uri = publish(
273 | slug,
274 | conda_pack_os_prefix=conda_pack_os_prefix,
275 | conda_pack_folder=conda_pack_folder,
276 | overwrite=overwrite,
277 | ads_config=ads_config,
278 | gpu=gpu,
279 | name=name,
280 | version=version,
281 | )
282 | logger.info(f"Published conda pack at {conda_pack_uri}")
283 | return conda_pack_uri
284 |
285 |
286 | def resolve_python_version(conda_yaml_file: str) -> Union[str, None]:
287 | """
288 | Loops through the dependencies section inside the conda yaml file to search for python version.
289 |
290 | Limitation: Assumes pattern - python=version. Will fail if the yaml has python{><}=version
291 | """
292 | version = None
293 | with open(conda_yaml_file) as cf:
294 | env = yaml.load(cf, Loader=yaml.SafeLoader)
295 | python = [
296 | dep
297 | for dep in env["dependencies"]
298 | if isinstance(dep, str) and dep.startswith("python")
299 | ]
300 | version = python[0].split("=")[1] if len(python) > 0 else None
301 | return version
302 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | ## This file created and used instead of setup.py for building and installing ads package. This change is to
2 | ## follow best practive to "not invoke setup.py directly", see detailed explanation why here:
3 | ## https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html.
4 | ## Check README-development.md and Makefile for instruction how to install or build ADS locally.
5 |
6 | [build-system]
7 | # These are the assumed default build requirements from pip:
8 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support
9 | # PEP 517 – A build-system independent format for source trees - https://peps.python.org/pep-0517/
10 | requires = ["flit-core >= 3.8"]
11 | build-backend = "flit_core.buildapi"
12 |
13 |
14 | [project]
15 | # Declaring project metadata
16 | # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/
17 | # PEP 621 – Storing project metadata in pyproject.toml - https://peps.python.org/pep-0621/
18 | # PEP 518 – Specifying Minimum Build System Requirements for Python Projects https://peps.python.org/pep-0518/
19 |
20 | # Required
21 | name = "oci-mlflow" # the install (PyPI) name
22 | version = "1.0.2"
23 |
24 | # Optional
25 | description = "OCI MLflow plugin to use OCI resources within MLflow"
26 | readme = {file = "README.md", content-type = "text/markdown"}
27 | requires-python = ">=3.8"
28 | license = {file = "LICENSE.txt"}
29 | authors = [
30 | {name = "Oracle Cloud Infrastructure Data Science"}
31 | ]
32 | keywords = [
33 | "Oracle Cloud Infrastructure",
34 | "OCI",
35 | "Object Storage",
36 | "MLflow",
37 | ]
38 | classifiers = [
39 | "Development Status :: 5 - Production/Stable",
40 | "Intended Audience :: Developers",
41 | "License :: OSI Approved :: Universal Permissive License (UPL)",
42 | "Operating System :: OS Independent",
43 | "Programming Language :: Python :: 3.8",
44 | "Programming Language :: Python :: 3.9",
45 | "Programming Language :: Python :: 3.10",
46 | ]
47 |
48 | # PEP 508 – Dependency specification for Python Software Packages - https://peps.python.org/pep-0508/
49 | # In dependencies se "; platform_machine == 'aarch64'" to specify ARM underlying platform
50 | # Copied from install_requires list in setup.py, setup.py got removed in favor of this config file
51 | dependencies = [
52 | "mlflow>=2.8.0",
53 | "oracle-ads>=2.8.8",
54 | ]
55 |
56 | [project.urls]
57 | "Github" = "https://github.com/oracle/oci-mlflow"
58 | "Documentation" = "https://oci-mlflow.readthedocs.io/en/latest/index.html"
59 |
60 | # https://peps.python.org/pep-0621/#entry-points
61 | # Note the quotes around mlflow.artifact_repository in order to escape the dot .
62 | [project.entry-points."mlflow.artifact_repository"]
63 | oci = "oci_mlflow.oci_object_storage:OCIObjectStorageArtifactRepository"
64 |
65 | [project.entry-points."mlflow.project_backend"]
66 | oci-datascience = "oci_mlflow.project:OCIProjectBackend"
67 |
68 | [project.entry-points."mlflow.deployments"]
69 | oci-datascience = "oci_mlflow.deployment"
70 |
71 | [project.entry-points."mlflow.request_auth_provider"]
72 | oci-datascience = "oci_mlflow.auth_plugin:OCIMLFlowAuthRequestProvider"
73 |
74 | # Configuring Ruff (https://docs.astral.sh/ruff/configuration/)
75 | [tool.ruff]
76 | fix = true
77 |
78 | [tool.ruff.lint]
79 | exclude = ["*.yaml", "*jinja2"]
80 | # rules - https://docs.astral.sh/ruff/rules/
81 | extend-ignore = ["E402", "N806", "N803"]
82 | ignore = [
83 | "S101", # use of assert
84 | "B008", # function call in argument defaults
85 | "B017", # pytest.raises considered evil
86 | "B023", # function definition in loop (TODO: un-ignore this)
87 | "B028", # explicit stacklevel for warnings
88 | "C901", # function is too complex (TODO: un-ignore this)
89 | "E501", # from scripts/lint_backend.sh
90 | "PLR091", # complexity rules
91 | "PLR2004", # magic numbers
92 | "PLW2901", # `for` loop variable overwritten by assignment target
93 | "SIM105", # contextlib.suppress (has a performance cost)
94 | "SIM117", # multiple nested with blocks (doesn't look good with gr.Row etc)
95 | "UP006", # use `list` instead of `List` for type annotations (fails for 3.8)
96 | "UP007", # use X | Y for type annotations (TODO: can be enabled once Pydantic plays nice with them)
97 | ]
98 | extend-select = [
99 | "ARG",
100 | "B",
101 | "C",
102 | "E",
103 | "F",
104 | "I",
105 | "N",
106 | "PL",
107 | "S101",
108 | "SIM",
109 | "UP",
110 | "W",
111 | ]
112 |
113 | [tool.ruff.lint.per-file-ignores]
114 | "__init__.py" = ["F401"]
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = -v -p no:warnings --durations=5
3 | testpaths =
4 | tests
5 | authorizer/tests
6 | pythonpath = . oci_mlflow
7 | env =
8 | OCIFS_IAM_TYPE="api_key"
9 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = __init__.py
3 | max-line-length = 95
4 | ignore =
5 | E20, # Extra space in brackets
6 | E231,E241, # Multiple spaces around ","
7 | E26, # Comments
8 | E4, # Import formatting
9 | E721, # Comparing types instead of isinstance
10 | E731, # Assigning lambda expression
11 | E121, # continuation line under-indented for hanging indent
12 | E126, # continuation line over-indented for hanging indent
13 | E127, # continuation line over-indented for visual indent
14 | E128, # E128 continuation line under-indented for visual indent
15 | E702, # multiple statements on one line (semicolon)
16 | W503, # line break before binary operator
17 | E129, # visually indented line with same indent as next logical line
18 | E116, # unexpected indentation
19 | F811, # redefinition of unused 'loop' from line 10
20 | F841, # local variable is assigned to but never used
21 | E741 # Ambiguous variable names
22 | W504, # line break after binary operator
23 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 Oracle and/or its affiliates.
2 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3 |
4 | ### File setup.py obsolete and must not be used. Please update pyproject.toml instead.
5 | ### See detailed explanation why here:
6 | ### https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html.
7 | # PEP 621 – Storing project metadata in pyproject.toml - https://peps.python.org/pep-0621/
8 | # PEP 518 – Specifying Minimum Build System Requirements for Python Projects https://peps.python.org/pep-0518/
9 | # PEP 508 – Dependency specification for Python Software Packages - https://peps.python.org/pep-0508/
10 | # PEP 517 – A build-system independent format for source trees - https://peps.python.org/pep-0517/
11 |
--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | faker
3 | mock
4 | pip
5 | pytest
6 | pytest-codecov
7 | ruff
8 |
--------------------------------------------------------------------------------
/tests/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*--
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/artifacts/1.txt:
--------------------------------------------------------------------------------
1 | Test
2 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/artifacts/2.txt:
--------------------------------------------------------------------------------
1 | Test
2 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/artifacts/sub_folder/3.txt:
--------------------------------------------------------------------------------
1 | Test
2 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/artifacts/sub_folder/4.txt:
--------------------------------------------------------------------------------
1 | Test
2 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_auth.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 | from unittest.mock import patch
7 | from oci_mlflow.auth_plugin import OCIMLFlowAuthRequestProvider
8 |
9 |
10 | class TestOCIMLFlowAuth:
11 | def test_get_name(self):
12 | provider = OCIMLFlowAuthRequestProvider()
13 | assert provider.get_name() == "OCI_REQUEST_AUTH"
14 |
15 | @patch("oci_mlflow.auth_plugin.default_signer")
16 | def test_get_auth(self, mock_default_signer):
17 | mock_default_signer.return_value = {
18 | "config": {},
19 | "signer": "test_default_signer",
20 | "client_kwargs": {},
21 | }
22 | provider = OCIMLFlowAuthRequestProvider()
23 | auth = provider.get_auth()
24 | assert auth == "test_default_signer"
25 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_files/conda.yaml:
--------------------------------------------------------------------------------
1 | kind: job
2 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}"
3 | dependencies:
4 | - python=3.8
5 | spec:
6 | infrastructure:
7 | kind: infrastructure
8 | spec:
9 | compartmentId: ocid1.testCompartmentId
10 | driverShape: VM.Standard.E4.Flex
11 | driverShapeConfig:
12 | memory_in_gbs: 32
13 | ocpus: 2
14 | executorShape: VM.Standard.E4.Flex
15 | executorShapeConfig:
16 | memory_in_gbs: 32
17 | ocpus: 2
18 | language: PYTHON
19 | logsBucketUri: oci://test-log-bucket
20 | numExecutors: 1
21 | sparkVersion: 3.2.1
22 | privateEndpointId: ocid1.dataflowprivateendpoint
23 | type: dataFlow
24 | runtime:
25 | kind: runtime
26 | spec:
27 | configuration:
28 | spark.driverEnv.MLFLOW_TRACKING_URI: http://test-tracking-uri
29 | conda:
30 | type: published
31 | uri: oci://custom-conda-packs@ociodscdev/conda_environments/cpu/PySpark 3.2
32 | and Data Flow/2.0/pyspark32_p38_cpu_v2
33 | condaAuthType: resource_principal
34 | scriptBucket: oci://test-bucket
35 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}"
36 | type: dataFlow
37 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_files/invalid-file-type.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/invalid-file-type.txt
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_files/oci-datascience-template_test.yaml:
--------------------------------------------------------------------------------
1 | kind: job
2 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}"
3 | dependencies:
4 | - python=3.8
5 | spec:
6 | infrastructure:
7 | kind: infrastructure
8 | spec:
9 | compartmentId: ocid1.testCompartmentId
10 | driverShape: VM.Standard.E4.Flex
11 | driverShapeConfig:
12 | memory_in_gbs: 32
13 | ocpus: 2
14 | executorShape: VM.Standard.E4.Flex
15 | executorShapeConfig:
16 | memory_in_gbs: 32
17 | ocpus: 2
18 | language: PYTHON
19 | logsBucketUri: oci://test-log-bucket
20 | numExecutors: 1
21 | sparkVersion: 3.2.1
22 | privateEndpointId: ocid1.dataflowprivateendpoint
23 | type: dataFlow
24 | runtime:
25 | kind: runtime
26 | spec:
27 | configuration:
28 | spark.driverEnv.MLFLOW_TRACKING_URI: http://test-tracking-uri
29 | conda:
30 | type: published
31 | uri: oci://custom-conda-packs@ociodscdev/conda_environments/cpu/PySpark 3.2
32 | and Data Flow/2.0/pyspark32_p38_cpu_v2
33 | model_uri: oci://custom-conda-packs@ociodscdev/conda_environments/cpu/PySpark 3.2
34 | and Data Flow/2.0/pyspark32_p38_cpu_v2
35 | condaAuthType: resource_principal
36 | scriptBucket: oci://test-bucket
37 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}"
38 | type: dataFlow
39 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_files/test-empty-project:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/test-empty-project
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_files/test-model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/test-model
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_files/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/test.txt
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_oci_object_storage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | import os
5 | import tempfile
6 | from unittest.mock import MagicMock, Mock, patch
7 |
8 | # Copyright (c) 2023 Oracle and/or its affiliates.
9 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
10 | import pytest
11 | from mlflow.entities import FileInfo
12 |
13 | from oci_mlflow import oci_object_storage
14 | from oci_mlflow.oci_object_storage import (
15 | ArtifactUploader,
16 | OCIObjectStorageArtifactRepository,
17 | get_token_path,
18 | get_signer,
19 | DEFAULT_DELEGATION_TOKEN_PATH,
20 | )
21 | from oci import object_storage
22 |
23 |
24 | class DataObject:
25 | def __init__(self, name, size):
26 | self.name = name
27 | self.size = size
28 |
29 |
30 | class TestOCIObjectStorageArtifactRepository:
31 | def setup_class(cls):
32 | cls.curr_dir = os.path.dirname(os.path.abspath(__file__))
33 | oci_object_storage.OCI_PREFIX = ""
34 |
35 | @classmethod
36 | def teardown_class(cls):
37 | oci_object_storage.OCI_PREFIX = "oci://"
38 |
39 | @pytest.fixture()
40 | def oci_artifact_repo(self):
41 | return OCIObjectStorageArtifactRepository(
42 | artifact_uri="oci://my-bucket@my-namespace/my-artifact-path"
43 | )
44 |
45 | @pytest.fixture
46 | def mock_fsspec_open(self):
47 | with patch("fsspec.open") as mock_open:
48 | yield mock_open
49 |
50 | def test_parse_os_uri(self, oci_artifact_repo):
51 | bucket, namespace, path = oci_object_storage.parse_os_uri(
52 | "oci://my-bucket@my-namespace/my-artifact-path"
53 | )
54 | assert bucket == "my-bucket"
55 | assert namespace == "my-namespace"
56 | assert path == "my-artifact-path"
57 |
58 | def test_parse_os_uri_with_invalid_scheme(self, oci_artifact_repo):
59 | with pytest.raises(Exception):
60 | oci_object_storage.parse_os_uri("s3://my-bucket/my-artifact-path")
61 |
62 | def test_download_file(self, oci_artifact_repo):
63 | mock_fs = MagicMock()
64 | mock_fs.download.return_value = None
65 | oci_artifact_repo.get_fs = MagicMock(return_value=mock_fs)
66 | with tempfile.TemporaryDirectory() as tmp_dir:
67 | local_path = os.path.join(tmp_dir, "my_file.txt")
68 | remote_path = "my/remote/path/my_file.txt"
69 |
70 | oci_artifact_repo._download_file(
71 | remote_file_path=remote_path, local_path=local_path
72 | )
73 |
74 | mock_fs.download.assert_called_once_with(
75 | "oci://my-bucket@my-namespace/my-artifact-path/my/remote/path/my_file.txt",
76 | local_path,
77 | )
78 |
79 | @patch.object(ArtifactUploader, "upload")
80 | def test_log_artifact(self, mock_upload_file, oci_artifact_repo):
81 | local_file = "test_files/test.txt"
82 | artifact_path = "logs"
83 | oci_artifact_repo.log_artifact(local_file, artifact_path)
84 | expected_dest_path = (
85 | "oci://my-bucket@my-namespace/my-artifact-path/logs/test.txt"
86 | )
87 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path)
88 |
89 | @patch.object(ArtifactUploader, "upload")
90 | def test_log_artifact_with_empty_path(self, mock_upload_file, oci_artifact_repo):
91 | local_file = "test_files/test.txt"
92 | artifact_path = ""
93 | oci_artifact_repo.log_artifact(local_file, artifact_path)
94 | expected_dest_path = (
95 | "oci://my-bucket@my-namespace/my-artifact-path/test.txt"
96 | )
97 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path)
98 |
99 | def test_log_artifact_with_whitespace(self, oci_artifact_repo):
100 | local_file = "test_files/test.txt"
101 | artifact_path = " "
102 | with pytest.raises(
103 | ValueError,
104 | match="`artifact_path` must not be whitespace string."
105 | ):
106 | oci_artifact_repo.log_artifact(local_file, artifact_path)
107 |
108 | @patch.object(ArtifactUploader, "upload")
109 | def test_log_artifact_with_none(self, mock_upload_file, oci_artifact_repo):
110 | local_file = "test_files/test.txt"
111 | artifact_path = None
112 | oci_artifact_repo.log_artifact(local_file, artifact_path)
113 | expected_dest_path = (
114 | "oci://my-bucket@my-namespace/my-artifact-path/test.txt"
115 | )
116 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path)
117 |
118 | @patch.object(ArtifactUploader, "upload")
119 | def test_log_artifact_with_slash_ending_path(self, mock_upload_file, oci_artifact_repo):
120 | local_file = "test_files/test.txt"
121 | artifact_path = "logs/"
122 | oci_artifact_repo.log_artifact(local_file, artifact_path)
123 | expected_dest_path = (
124 | "oci://my-bucket@my-namespace/my-artifact-path/logs/test.txt"
125 | )
126 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path)
127 |
128 | @patch.object(ArtifactUploader, "upload")
129 | def test_log_artifacts(self, mock_upload_file, oci_artifact_repo):
130 | local_dir = os.path.join(self.curr_dir, "test_files")
131 | dest_path = "path/to/dest"
132 | oci_artifact_repo.log_artifacts(local_dir, dest_path)
133 | mock_upload_file.assert_called()
134 |
135 | @patch.object(OCIObjectStorageArtifactRepository, "get_fs")
136 | def test_delete_artifacts(self, mock_get_fs, oci_artifact_repo):
137 | mock_fs = Mock()
138 | mock_get_fs.return_value = mock_fs
139 | mock_fs.ls.return_value = ["test/file1", "test/file2", "test/folder/"]
140 | oci_artifact_repo.delete_artifacts("test")
141 | mock_fs.ls.assert_called_once_with(
142 | "oci://my-bucket@my-namespace/my-artifact-path/test", refresh=True
143 | )
144 | assert mock_fs.delete.call_count == 3
145 | mock_fs.delete.assert_any_call("test/file1")
146 | mock_fs.delete.assert_any_call("test/file2")
147 | mock_fs.delete.assert_any_call("test/folder/")
148 |
149 | def test_list_artifacts(self):
150 | print(os.path.join(self.curr_dir, "artifacts"))
151 |
152 | oci_artifact_repo = OCIObjectStorageArtifactRepository(
153 | artifact_uri=os.path.join(self.curr_dir, "artifacts")
154 | )
155 |
156 | artifacts = oci_artifact_repo.list_artifacts()
157 |
158 | expected_artifacts = [
159 | FileInfo("1.txt", False, 5),
160 | FileInfo("2.txt", False, 5),
161 | FileInfo("sub_folder", True, 0),
162 | ]
163 | assert artifacts == expected_artifacts
164 |
165 |
166 | class TestArtifactUploader:
167 | def test_init(self):
168 | """Ensures the ArtifactUploader instance can be initialized."""
169 | artifact_uploader = ArtifactUploader()
170 | assert isinstance(
171 | artifact_uploader.upload_manager, object_storage.UploadManager
172 | )
173 |
174 | @patch.object(object_storage.UploadManager, "upload_file")
175 | def test_upload(self, mock_upload_file):
176 | """Tests uploading model artifacts."""
177 | artifact_uploader = ArtifactUploader()
178 |
179 | local_file = "test_files/test.txt"
180 | dest_path = "oci://my-bucket@my-namespace/my-artifact-path/logs/test.txt"
181 | artifact_uploader.upload(local_file, dest_path)
182 |
183 | mock_upload_file.assert_called_with(
184 | namespace_name="my-namespace",
185 | bucket_name="my-bucket",
186 | object_name="my-artifact-path/logs/test.txt",
187 | file_path=local_file,
188 | )
189 |
190 |
191 | class TestUtils:
192 | """Test static methods in oci_object_storage.py."""
193 |
194 | @patch("os.path.exists")
195 | def test_get_token_path_in_df(self, mock_path):
196 | """Tests getting the token path in DF session."""
197 | mock_path.return_value = True
198 | assert get_token_path() == DEFAULT_DELEGATION_TOKEN_PATH
199 |
200 | @patch("os.path.exists")
201 | def test_get_token_path_locally(self, mock_path):
202 | """Tests getting the token path locally."""
203 | mock_path.return_value = False
204 | assert get_token_path() == None
205 |
206 | @patch("oci_mlflow.oci_object_storage.get_delegation_token_signer")
207 | @patch("ads.common.auth.set_auth")
208 | def test_get_signer_in_df(self, mock_set_auth, mock_get_signer):
209 | """Tests getting the storage options in DF session."""
210 | get_signer(token_path=DEFAULT_DELEGATION_TOKEN_PATH)
211 | mock_set_auth.assert_called_once_with(
212 | signer_callable=mock_get_signer,
213 | signer_kwargs={"token_path": DEFAULT_DELEGATION_TOKEN_PATH},
214 | )
215 |
216 | @patch("ads.common.auth.default_signer")
217 | @patch("ads.common.auth.set_auth")
218 | def test_get_signer_locally(self, mock_set_auth, mock_default_signer):
219 | """Tests getting the storage options locally."""
220 | expected_config = {"config": "value", "signer": "value2"}
221 | mock_default_signer.return_value = expected_config
222 |
223 | signer = get_signer(token_path=None)
224 | mock_set_auth.assert_not_called()
225 | assert signer == expected_config
226 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_telemetry.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import os
8 | from unittest.mock import patch
9 |
10 | import pytest
11 |
12 | from oci_mlflow.telemetry_logging import Telemetry
13 |
14 |
15 | class TestTelemetry:
16 | """Tests the Telemetry.
17 | Class to capture telemetry sequence into the environment variable.
18 | """
19 |
20 | def setup_method(self):
21 | self.telemetry = Telemetry("test.api")
22 |
23 | @patch.dict(os.environ, {}, clear=True)
24 | def test_init(self):
25 | """Ensures initializing Telemetry passes."""
26 | self.telemetry = Telemetry("test.api")
27 | assert self.telemetry.environ_variable in os.environ
28 | assert os.environ[self.telemetry.environ_variable] == ""
29 |
30 | @patch.dict(os.environ, {}, clear=True)
31 | def test_add(self):
32 | """Tests adding the new value to the telemetry."""
33 | self.telemetry.begin()
34 | self.telemetry.add("key=value").add("new_key=new_value")
35 | assert (
36 | os.environ[self.telemetry.environ_variable]
37 | == "test.api&key=value&new_key=new_value"
38 | )
39 |
40 | @patch.dict(os.environ, {}, clear=True)
41 | def test_begin(self):
42 | """Tests cleaning the value of the associated environment variable."""
43 | self.telemetry.begin("key=value")
44 | assert os.environ[self.telemetry.environ_variable] == "test.api&key=value"
45 |
46 | @patch.dict(os.environ, {}, clear=True)
47 | def test_clean(self):
48 | """Ensures that telemetry associated environment variable can be cleaned."""
49 | self.telemetry.begin()
50 | self.telemetry.add("key=value").add("new_key=new_value")
51 | assert (
52 | os.environ[self.telemetry.environ_variable]
53 | == "test.api&key=value&new_key=new_value"
54 | )
55 | self.telemetry.clean()
56 | assert os.environ[self.telemetry.environ_variable] == ""
57 |
58 | @pytest.mark.parametrize(
59 | "INPUT_DATA, EXPECTED_RESULT",
60 | [
61 | ("key=va~!@#$%^*()_+lue", "key=va____________lue"),
62 | ("key=va lue", "key=va_lue"),
63 | ("key=va123***lue", "key=va123___lue"),
64 | ],
65 | )
66 | @patch.dict(os.environ, {}, clear=True)
67 | def test__prepare(self, INPUT_DATA, EXPECTED_RESULT):
68 | """Tests replacing special characters in the telemetry input value."""
69 | self.telemetry.begin(INPUT_DATA)
70 | assert (
71 | os.environ[self.telemetry.environ_variable] == f"test.api&{EXPECTED_RESULT}"
72 | )
73 |
--------------------------------------------------------------------------------
/tests/plugins/unitary/test_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8; -*-
3 |
4 | # Copyright (c) 2023 Oracle and/or its affiliates.
5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6 |
7 | import os
8 | from unittest.mock import patch, ANY
9 |
10 | import pytest
11 |
12 | from oci_mlflow import utils
13 | from oci_mlflow.utils import (
14 | OCIBackendConfig,
15 | OCIProjectBackendConfig,
16 | UnsupportedAuthTypeError,
17 | generate_conda_pack_uri,
18 | generate_slug,
19 | create_conda,
20 | resolve_python_version,
21 | publish,
22 | )
23 |
24 |
25 | class TestOCIBackendConfig:
26 | """Tests the OCIBackendConfig."""
27 |
28 | def test_from_dict_with_empty_dict(self):
29 | config = {}
30 |
31 | result = OCIBackendConfig.from_dict(config)
32 |
33 | assert isinstance(result, OCIBackendConfig)
34 |
35 | def test_from_dict_with_valid_dict(self):
36 | config = {"oci_auth": "resource_principal", "oci_profile": "testVal2"}
37 |
38 | result = OCIBackendConfig.from_dict(config)
39 | assert isinstance(result, OCIBackendConfig)
40 | assert result.oci_auth == "resource_principal"
41 | assert result.oci_profile == "testVal2"
42 |
43 | def test_from_dict_with_invalid_dict(self):
44 | config = {"oci_auth": "test_ErrorVal", "oci_profile": "testVal2"}
45 |
46 | with pytest.raises(UnsupportedAuthTypeError):
47 | OCIProjectBackendConfig.from_dict(config)
48 |
49 |
50 | class TestOCIProjectBackendConfig:
51 | """Tests the OCIProjectBackendConfig."""
52 |
53 | def setup_class(cls):
54 | cls.curr_dir = os.path.dirname(os.path.abspath(__file__))
55 |
56 | def test_post_init_with_valid_values(self):
57 | config = {
58 | "oci_auth": "resource_principal",
59 | "oci_profile": "testVal2",
60 | "oci_job_template_path": "{work_dir}/test_files/oci-datascience-template_test.yaml",
61 | "project_uri": "/path/to/project",
62 | "work_dir": self.curr_dir,
63 | }
64 |
65 | project_backend_config = OCIProjectBackendConfig.from_dict(config)
66 |
67 | assert project_backend_config.oci_job_template_path == os.path.join(
68 | self.curr_dir, "test_files/oci-datascience-template_test.yaml"
69 | )
70 | assert project_backend_config.project_uri == "/path/to/project"
71 | assert project_backend_config.work_dir == self.curr_dir
72 |
73 | def test_post_init_with_missing_project_uri(self):
74 | config_dict = {
75 | "oci_auth": "resource_principal",
76 | "oci_job_template_path": "test_files/oci-datascience-template_test.yaml",
77 | "work_dir": "/path/to/work_dir",
78 | }
79 |
80 | with pytest.raises(ValueError):
81 | OCIProjectBackendConfig.from_dict(config_dict)
82 |
83 | def test_post_init_with_missing_work_dir(self):
84 | config_dict = {
85 | "oci_auth": "resource_principal",
86 | "oci_job_template_path": "test_files/oci-datascience-template_test.yaml",
87 | "project_uri": "/path/to/project",
88 | }
89 |
90 | with pytest.raises(ValueError):
91 | OCIProjectBackendConfig.from_dict(config_dict)
92 |
93 | def test_post_init_with_missing_oci_job_template_path(self):
94 | config_dict = {
95 | "oci_auth": "resource_principal",
96 | "project_uri": "/path/to/project",
97 | "work_dir": "/path/to/work_dir",
98 | }
99 |
100 | with pytest.raises(ValueError):
101 | OCIProjectBackendConfig.from_dict(config_dict)
102 |
103 | def test_post_init_with_invalid_oci_job_template_path(self):
104 | config_dict = {
105 | "oci_job_template_path": "test_files/invalid-file-type.html",
106 | "project_uri": "/path/to/project",
107 | "work_dir": "/path/to/work_dir",
108 | }
109 |
110 | with pytest.raises(ValueError):
111 | OCIProjectBackendConfig.from_dict(config_dict)
112 |
113 | def test_post_init_with_invalid_oci_job_template_extension(self):
114 | config_dict = {
115 | "oci_job_template_path": "{work_dir}/test_files/invalid-file-type.txt",
116 | "project_uri": "/path/to/project",
117 | "work_dir": self.curr_dir,
118 | }
119 |
120 | with pytest.raises(ValueError):
121 | OCIProjectBackendConfig.from_dict(config_dict)
122 |
123 |
124 | class TestUtils:
125 | """Tests the common methods in the utils module."""
126 |
127 | def setup_class(cls):
128 | cls.curr_dir = os.path.dirname(os.path.abspath(__file__))
129 |
130 | def test_generate_slug(self):
131 | assert generate_slug("Test Sth", "1.0.1") == "teststh_v1_0_1"
132 |
133 | def test_generate_conda_pack_uri(self):
134 | assert generate_conda_pack_uri(
135 | "test_package", "1.0", "/path/to/prefix", "test_package_v1", False
136 | ) == os.path.join(
137 | "/path/to/prefix", "cpu", "test_package", "1.0", "test_package_v1"
138 | )
139 |
140 | assert generate_conda_pack_uri(
141 | "test_package", "1.0", "/path/to/prefix", "test_package_v1_0", True
142 | ) == os.path.join(
143 | "/path/to/prefix", "gpu", "test_package", "1.0", "test_package_v1_0"
144 | )
145 |
146 | @patch("oci_mlflow.utils._create")
147 | def test_create_conda(self, mock_create):
148 | mock_create.return_value = "test_return_val"
149 | assert create_conda(
150 | "dummy_name",
151 | "1",
152 | os.path.join(
153 | self.curr_dir, "test_files/oci-datascience-template_test" ".yaml"
154 | )
155 | == "test_return_val",
156 | )
157 |
158 | def test_resolve_python_version(self):
159 | assert (
160 | resolve_python_version(
161 | os.path.join(
162 | self.curr_dir, "test_files/oci-datascience-template_test.yaml"
163 | )
164 | )
165 | == "3.8"
166 | )
167 |
168 | @patch("oci_mlflow.utils.ConfigProcessor")
169 | @patch.object(utils, "generate_conda_pack_uri")
170 | @patch("oci_mlflow.utils._publish")
171 | def test_publish(self, mock_publish, mock_generate_conda_pack, mock_config):
172 | slug = "test_slug"
173 | conda_pack_os_prefix = "test_prefix"
174 | conda_pack_folder = "test_folder"
175 | overwrite = False
176 | ads_config = "test_config"
177 | name = "test_name"
178 | version = "test_version"
179 | gpu = False
180 |
181 | publish(
182 | slug,
183 | conda_pack_os_prefix,
184 | conda_pack_folder,
185 | overwrite,
186 | ads_config,
187 | name,
188 | version,
189 | gpu,
190 | )
191 |
192 | mock_config.assert_called_once()
193 | mock_generate_conda_pack.assert_called_once_with(
194 | name, version, conda_pack_os_prefix, slug, gpu
195 | )
196 | mock_publish.assert_called_once_with(
197 | conda_slug=slug,
198 | conda_uri_prefix=conda_pack_os_prefix,
199 | conda_pack_folder=conda_pack_folder,
200 | overwrite=overwrite,
201 | oci_config=ANY,
202 | oci_profile=ANY,
203 | auth_type=ANY,
204 | )
205 |
--------------------------------------------------------------------------------