├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml └── workflows │ ├── publish-to-pypi.yml │ ├── publish-to-readthedocs.yml │ └── run-tests.yml ├── .gitignore ├── .gitleaks.toml ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── ISSUE_POLICY.md ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README-development.md ├── README.md ├── SECURITY.md ├── THIRD_PARTY_LICENSES.txt ├── authorizer ├── Dockerfile ├── func.yaml ├── requirements.txt ├── src │ ├── authorizer.py │ └── utils │ │ ├── auth_utils.py │ │ ├── header_utils.py │ │ └── identity_utils.py └── tests │ ├── test_auth_utils.py │ └── test_header_utils.py ├── container-image ├── Dockerfile ├── environment.yaml └── run │ ├── launch_mlflow.sh │ └── run.py ├── demos ├── deploy_mlflow_model │ ├── conda-runtime │ │ ├── README.md │ │ ├── elastic-net-deployment_build_conda.yaml │ │ └── elastic-net-deployment_prebuilt_conda.yaml │ └── container-runtime │ │ ├── README.md │ │ ├── container │ │ ├── Dockerfile.pyfunc │ │ └── nginx.conf │ │ ├── elastic-net-deployment-container.yaml │ │ └── input.json ├── pyspark_logistic_regression_dataflow_job │ ├── MLproject │ ├── README.md │ ├── logistic_regression.py │ ├── oci-datascience-config.json │ ├── oci-datascience-template.yaml │ ├── run.ipynb │ └── run.sh ├── sklearn_elasticnet_wine_notebook_job │ ├── MLproject │ ├── README.md │ ├── oci-datascience-config.json │ ├── oci-datascience-template.yaml │ ├── run.ipynb │ ├── run.sh │ └── train.ipynb └── sklearn_elasticnet_wine_script_job │ ├── README.md │ ├── oci-datascience-config.json │ ├── oci-datascience-template.yaml │ ├── run.ipynb │ └── run.sh ├── dev-requirements.txt ├── docs ├── Makefile ├── requirements.txt └── source │ ├── _static │ ├── logo-dark-mode.png │ └── logo-light-mode.png │ ├── concepts.rst │ ├── conf.py │ ├── demos_examples.rst │ ├── index.rst │ ├── model_deployment.rst │ ├── project.rst │ ├── quickstart.rst │ ├── release_notes.rst │ └── tracking_server.rst ├── oci_mlflow ├── __init__.py ├── auth_plugin.py ├── deployment.py ├── oci_object_storage.py ├── project.py ├── telemetry_logging.py ├── templates │ ├── __init__.py │ ├── project_description.jinja2 │ ├── runtime.yaml.jinja2 │ └── score.py.jinja2 └── utils.py ├── pyproject.toml ├── pytest.ini ├── setup.cfg ├── setup.py ├── test-requirements.txt └── tests └── plugins ├── __init__.py └── unitary ├── __init__.py ├── artifacts ├── 1.txt ├── 2.txt └── sub_folder │ ├── 3.txt │ └── 4.txt ├── test_auth.py ├── test_deployment.py ├── test_files ├── conda.yaml ├── invalid-file-type.txt ├── oci-datascience-template_test.yaml ├── test-empty-project ├── test-model └── test.txt ├── test_oci_object_storage.py ├── test_project.py ├── test_telemetry.py └── test_utils.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | **/*.jinja2 4 | **/__init__.py 5 | container-image/* 6 | docs/* 7 | tests/* 8 | 9 | [report] 10 | exclude_lines = 11 | pragma: no cover 12 | def __repr__ 13 | if __name__ == .__main__.: 14 | @(abc\.)?abstractmethod 15 | raise AssertionError 16 | raise NotImplementedError 17 | omit = 18 | **/*.jinja2 19 | **/__init__.py 20 | container-image/* 21 | docs/* 22 | tests/* 23 | show_missing = true 24 | skip_empty = true 25 | precision = 2 26 | 27 | [html] 28 | directory = htmlcov -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Bug observed in oci-mlflow library 3 | title: "[Bug]: " 4 | labels: [Bug, Backlog] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | If you think you've found a security vulnerability, don't raise a GitHub issue and follow the instructions 11 | in our [security policy](https://github.com/oracle/oci-mlflow/security/policy). 12 | 13 | --- 14 | 15 | Thank you for taking the time to file a bug report. 16 | - type: checkboxes 17 | id: checks 18 | attributes: 19 | label: oci-mlflow version used 20 | options: 21 | - label: > 22 | I have checked that this issue has not already been reported. 23 | required: true 24 | - label: > 25 | I have confirmed this bug exists on the 26 | [latest version](https://github.com/oracle/oci-mlflow/releases) of oci-mlflow. 27 | - label: > 28 | I have confirmed this bug exists on the main branch of oci-mlflow. 29 | - label: > 30 | I agree to follow the [Code of Conduct](https://github.com/oracle/.github/blob/main/CODE_OF_CONDUCT.md). 31 | required: true 32 | - type: textarea 33 | id: description 34 | attributes: 35 | label: Description 36 | description: > 37 | Please provide a brief description of the problem, describe setup used as that may be the key to the issue. 38 | validations: 39 | required: true 40 | - type: textarea 41 | id: how-to-reproduce 42 | attributes: 43 | label: How to Reproduce 44 | description: > 45 | Please provide a copy-pastable short code example. 46 | If possible provide an ordered list of steps on how to reproduce the problem. 47 | placeholder: > 48 | mlflow deployments help -t oci-datascience 49 | 50 | ... 51 | render: python 52 | validations: 53 | required: true 54 | - type: textarea 55 | id: what-was-observed 56 | attributes: 57 | label: What was Observed 58 | description: > 59 | Please provide snippets of output or describe wrong behavior. 60 | validations: 61 | required: true 62 | - type: textarea 63 | id: what-was-expected 64 | attributes: 65 | label: What was Expected 66 | description: > 67 | Please describe what should have happened and how it is different from what was observed. 68 | validations: 69 | required: true 70 | - type: textarea 71 | id: version 72 | attributes: 73 | label: Version 74 | description: > 75 | Please paste the output of ``pip freeze | grep oci-mlflow`` 76 | value: > 77 |
78 | 79 | Paste here the output of ``pip freeze | grep oci-mlflow`` 80 | 81 |
82 | validations: 83 | required: true -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Check the docs 4 | url: https://oci-mlflow.readthedocs.io 5 | about: If you need help with your first steps with oci-mlflow please check the docs. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Feature and enhancement proposals in oci-mlflow library 3 | title: "[FR]: " 4 | labels: [Task, Backlog] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Before proceeding, please review the [Contributing to this repository](https://github.com/oracle/oci-mlflow/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/oracle/.github/blob/main/CODE_OF_CONDUCT.md). 11 | 12 | --- 13 | 14 | Thank you for submitting a feature request. 15 | - type: dropdown 16 | id: contribution 17 | attributes: 18 | label: Willingness to contribute 19 | description: Would you or another member of your organization be willing to contribute an implementation of this feature? 20 | options: 21 | - Yes. I can contribute this feature independently. 22 | - Yes. I would be willing to contribute this feature with guidance from the oci-mlflow team. 23 | - No. I cannot contribute this feature at this time. 24 | validations: 25 | required: true 26 | - type: textarea 27 | attributes: 28 | label: Proposal Summary 29 | description: | 30 | In a few sentences, provide a clear, high-level description of the feature request 31 | validations: 32 | required: true 33 | - type: textarea 34 | attributes: 35 | label: Motivation 36 | description: | 37 | - What is the use case for this feature? 38 | - Why is this use case valuable to support for OCI DataScience users in general? 39 | - Why is this use case valuable to support for your project(s) or organization? 40 | - Why is it currently difficult to achieve this use case? 41 | value: | 42 | > #### What is the use case for this feature? 43 | 44 | > #### Why is this use case valuable to support for OCI DataScience users in general? 45 | 46 | > #### Why is this use case valuable to support for your project(s) or organization? 47 | 48 | > #### Why is it currently difficult to achieve this use case? 49 | validations: 50 | required: true 51 | - type: textarea 52 | attributes: 53 | label: Details 54 | description: | 55 | Use this section to include any additional information about the feature. If you have a proposal for how to implement this feature, please include it here. For implementation guidelines, please refer to the [Contributing to this repository](https://github.com/oracle/oci-mlflow/blob/main/CONTRIBUTING.md). 56 | validations: 57 | required: false -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: "[DO NOT TRIGGER] Publish to PyPI" 2 | 3 | # To run this workflow manually from the Actions tab 4 | on: workflow_dispatch 5 | 6 | jobs: 7 | build-n-publish: 8 | name: Build and publish Python 🐍 distribution 📦 to PyPI 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.x" 17 | - name: Build distribution 📦 18 | run: | 19 | pip install build 20 | make dist 21 | - name: Validate 22 | run: | 23 | pip install dist/*.whl 24 | python -c "import oci_mlflow;" 25 | # # To run publish to test PyPI, secret with token needs to be added to oracle/oci-mlflow project. 26 | # # This one - GH_OCI_MLFLOW_TESTPYPI_TOKEN - removed from project secrets after initial test. 27 | # - name: Publish distribution 📦 to Test PyPI 28 | # env: 29 | # TWINE_USERNAME: __token__ 30 | # TWINE_PASSWORD: ${{ secrets.GH_OCI_MLFLOW_TESTPYPI_TOKEN }} 31 | # run: | 32 | # pip install twine 33 | # twine upload -r testpypi dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD 34 | - name: Publish distribution 📦 to PyPI 35 | env: 36 | TWINE_USERNAME: __token__ 37 | TWINE_PASSWORD: ${{ secrets.GH_OCI_MLFLOW_PYPI_TOKEN }} 38 | run: | 39 | pip install twine 40 | twine upload dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD 41 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-readthedocs.yml: -------------------------------------------------------------------------------- 1 | name: "Publish Docs" 2 | 3 | on: 4 | # Auto-trigger this workflow on tag creation 5 | push: 6 | tags: 7 | - 'v*.*.*' 8 | 9 | env: 10 | RTDS_MLFLOW_PROJECT: https://readthedocs.org/api/v3/projects/accelerated-data-science 11 | RTDS_MLFLOW_TOKEN: ${{ secrets.RTDS_MLFLOW_TOKEN }} 12 | 13 | jobs: 14 | build-n-publish: 15 | name: Build and publish Docs 📖 to Readthedocs 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: When tag 🏷️ pushed - Trigger Readthedocs build 20 | if: github.event_name == 'push' && startsWith(github.ref_name, 'v') 21 | run: | 22 | # trigger build/publish of latest version 23 | curl \ 24 | -X POST \ 25 | -H "Authorization: Token $RTDS_MLFLOW_TOKEN" $RTDS_MLFLOW_PROJECT/versions/latest/builds/ 26 | # add 15 minutes wait time for readthedocs see freshly created tag 27 | sleep 15m 28 | # trigger build/publish of v*.*.* version 29 | curl \ 30 | -X POST \ 31 | -H "Authorization: Token $RTDS_MLFLOW_TOKEN" $RTDS_MLFLOW_PROJECT/versions/${{ github.ref_name }}/builds/ -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "container-image/**" 7 | - "oci_mlflow/**" 8 | - "tests/**" 9 | - "**requirements.txt" 10 | - pyproject.toml 11 | # To run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # Cancel in progress workflows on pull_requests. 15 | # https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 18 | cancel-in-progress: true 19 | 20 | permissions: 21 | contents: read 22 | pull-requests: write 23 | 24 | # hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359 25 | env: 26 | SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 27 | 28 | jobs: 29 | test: 30 | name: python ${{ matrix.python-version }} 31 | runs-on: ubuntu-latest 32 | timeout-minutes: 20 33 | 34 | strategy: 35 | fail-fast: false 36 | matrix: 37 | python-version: ["3.8", "3.9", "3.10"] 38 | include: 39 | - python-version: "3.9" 40 | cov-reports: --cov=oci_mlflow --cov-report=xml --cov-report=html 41 | 42 | steps: 43 | - uses: actions/checkout@v4 44 | 45 | # Caching python libraries installed with pip 46 | # https://github.com/actions/cache/blob/main/examples.md#python---pip 47 | - uses: actions/cache@v4 48 | with: 49 | path: ~/.cache/pip 50 | key: ${{ runner.os }}-pip-${{ hashFiles('**/test-requirements.txt') }} 51 | restore-keys: | 52 | ${{ runner.os }}-pip- 53 | - uses: actions/setup-python@v5 54 | with: 55 | python-version: ${{ matrix.python-version }} 56 | 57 | - name: "Test config setup" 58 | shell: bash 59 | env: 60 | HOME_RUNNER_DIR: /home/runner 61 | run: | 62 | set -x # print commands that are executed 63 | mkdir -p "$HOME_RUNNER_DIR"/.oci 64 | openssl genrsa -out $HOME_RUNNER_DIR/.oci/oci_mlflow_user.pem 2048 65 | cat <> "$HOME_RUNNER_DIR/.oci/config" 66 | [DEFAULT] 67 | user=ocid1.user.oc1..xxx 68 | fingerprint=00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00 69 | tenancy=ocid1.tenancy.oc1..xxx 70 | region=test_region 71 | key_file=$HOME_RUNNER_DIR/.oci/oci_mlflow_user.pem 72 | EOT 73 | ls -lha "$HOME_RUNNER_DIR"/.oci 74 | echo "Test config file:" 75 | cat $HOME_RUNNER_DIR/.oci/config 76 | 77 | - name: "Run tests" 78 | timeout-minutes: 5 79 | shell: bash 80 | run: | 81 | set -x # print commands that are executed 82 | $CONDA/bin/conda init 83 | source /home/runner/.bashrc 84 | pip install -r test-requirements.txt 85 | python -m pytest ${{ matrix.cov-reports }} tests 86 | 87 | - name: "Calculate coverage" 88 | if: ${{ success() }} && ${{ github.event.issue.pull_request }} 89 | run: | 90 | set -x # print commands that are executed 91 | 92 | # Prepare default cov body text 93 | COV_BODY_INTRO="📌 Overall coverage:\n\n" 94 | echo COV_BODY="$COV_BODY_INTRO No success to gather report. 😿" >> $GITHUB_ENV 95 | 96 | # Calculate overall coverage and update body message 97 | COV=$(grep -E 'pc_cov' htmlcov/index.html | cut -d'>' -f 2 | cut -d'%' -f 1) 98 | if [[ ! -z $COV ]]; then 99 | ROUNDED_COV=$(echo $COV | cut -d'.' -f 1) 100 | if [[ $ROUNDED_COV -lt 50 ]]; then COLOR=red; elif [[ $ROUNDED_COV -lt 80 ]]; then COLOR=yellow; else COLOR=green; fi 101 | echo COV_BODY="$COV_BODY_INTRO ![Coverage-$COV%](https://img.shields.io/badge/coverage-$COV%25-$COLOR)" >> $GITHUB_ENV 102 | fi 103 | 104 | - name: "Add comment with coverage info to PR" 105 | uses: actions/github-script@v7 106 | if: ${{ success() }} && ${{ github.event.issue.pull_request }} 107 | with: 108 | github-token: ${{ github.token }} 109 | script: | 110 | github.rest.issues.createComment({ 111 | issue_number: context.issue.number, 112 | owner: context.repo.owner, 113 | repo: context.repo.repo, 114 | body: '${{ env.COV_BODY }}' 115 | }) 116 | 117 | - name: "Save coverage files" 118 | uses: actions/upload-artifact@v4 119 | if: ${{ matrix.cov-reports }} 120 | with: 121 | name: cov-reports 122 | path: | 123 | htmlcov/ 124 | .coverage 125 | coverage.xml 126 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | docs/build/ 14 | docs/docs_html/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | ads_latest.zip 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | docs/build/ 71 | docs/dask-worker-space/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | 91 | # virtualenv 92 | .venv 93 | venv/ 94 | ENV/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | 109 | ### Linux ### 110 | *~ 111 | 112 | # IntelliJ/Pycharm settings 113 | *.iml 114 | .idea 115 | .vscode/ 116 | 117 | # Mac OS metadata 118 | .DS_Store 119 | .AppleDouble 120 | .LSOverride 121 | 122 | # dask related 123 | dask-worker-space 124 | 125 | #sqllite databases 126 | *.db 127 | 128 | Untitled*.ipynb 129 | *.lock 130 | *.dirlock 131 | 132 | 133 | # test results 134 | testresults.xml 135 | 136 | 137 | 138 | # html reports of covered code by pytests and 139 | # execution data collected by coverage.py 140 | *coverage_html_report* 141 | .coverage 142 | .coverage* 143 | **.sqlite 144 | 145 | extensions/**/node_modules/ 146 | extensions/**/*.tgz 147 | logs/ 148 | **/node_modules 149 | 150 | # side-effects of running notebooks 151 | **/.mlx_static/** 152 | 153 | # vim 154 | *.swp 155 | 156 | *.whl 157 | .env 158 | local_workarea 159 | mlruns 160 | tmp 161 | conda 162 | 163 | docs/build 164 | -------------------------------------------------------------------------------- /.gitleaks.toml: -------------------------------------------------------------------------------- 1 | title = "Gitleaks Config" 2 | 3 | # Gitleaks feature, extending the existing base config from: 4 | # https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml 5 | [extend] 6 | useDefault = true 7 | 8 | # Allowlist's 'stopwords' and 'regexes' excludes any secrets or mathching patterns from the current repository. 9 | # Paths listed in allowlist will not be scanned. 10 | [allowlist] 11 | description = "Global allow list" 12 | stopwords = ["test_password", "sample_key"] 13 | regexes = [ 14 | '''example-password''', 15 | '''this-is-not-the-secret''', 16 | '''''' 17 | ] 18 | 19 | # Describe rule to search real ocids 20 | [[rules]] 21 | description = "Real ocids" 22 | id = "ocid" 23 | regex = '''ocid[123]\.[a-z1-9A-Z]*\.oc\d\.[a-z1-9A-Z]*\.[a-z1-9A-Z]+''' 24 | keywords = [ 25 | "ocid" 26 | ] 27 | 28 | # Describe rule to search generic secrets 29 | [[rules]] 30 | description = "Generic secret" 31 | id = "generic-secret" 32 | regex = '''(?i)((key|api|token|secret|passwd|password|psw|pass|pswd)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z!@#$%^&*<>\\\-_.=]{3,100})['\"]''' 33 | entropy = 0 34 | secretGroup = 4 35 | keywords = [ 36 | "key","api","token","secret","passwd","password", "psw", "pass", "pswd" 37 | ] 38 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | # ruff 3 | - repo: https://github.com/astral-sh/ruff-pre-commit 4 | rev: v0.4.9 5 | hooks: 6 | - id: ruff 7 | types_or: [ python, pyi, jupyter ] 8 | args: [ --fix ] 9 | files: ^ads 10 | exclude: ^docs/ 11 | - id: ruff-format 12 | types_or: [ python, pyi, jupyter ] 13 | exclude: ^docs/ 14 | # Standard hooks 15 | - repo: https://github.com/pre-commit/pre-commit-hooks 16 | rev: v4.4.0 17 | hooks: 18 | - id: check-ast 19 | exclude: ^docs/ 20 | - id: check-docstring-first 21 | exclude: ^(docs/|tests/) 22 | - id: check-json 23 | - id: check-merge-conflict 24 | - id: check-yaml 25 | args: ['--allow-multiple-documents'] 26 | - id: detect-private-key 27 | - id: end-of-file-fixer 28 | - id: pretty-format-json 29 | args: ['--autofix'] 30 | - id: trailing-whitespace 31 | args: [ --markdown-linebreak-ext=md ] 32 | exclude: ^docs/ 33 | # Regex based rst files common mistakes detector 34 | - repo: https://github.com/pre-commit/pygrep-hooks 35 | rev: v1.10.0 36 | hooks: 37 | - id: rst-backticks 38 | files: ^docs/ 39 | - id: rst-inline-touching-normal 40 | files: ^docs/ 41 | # Hardcoded secrets and ocids detector 42 | - repo: https://github.com/gitleaks/gitleaks 43 | rev: v8.17.0 44 | hooks: 45 | - id: gitleaks 46 | # Oracle copyright checker 47 | - repo: https://github.com/oracle-samples/oci-data-science-ai-samples/ 48 | rev: cbe0136 49 | hooks: 50 | - id: check-copyright 51 | name: check-copyright 52 | entry: .pre-commit-scripts/check-copyright.py 53 | language: script 54 | types_or: ['python', 'shell', 'bash'] 55 | exclude: ^docs/ 56 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | 3 | # Required 4 | version: 2 5 | 6 | # Set the version of Python and other tools you might need 7 | build: 8 | os: ubuntu-22.04 9 | tools: 10 | python: "3.9" 11 | 12 | # Build documentation in the docs/ directory with Sphinx 13 | sphinx: 14 | configuration: docs/source/conf.py 15 | 16 | # Optionally declare the Python requirements required to build your docs 17 | python: 18 | install: 19 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this repository 2 | 3 | We welcome your contributions! There are multiple ways to contribute. 4 | 5 | ## Opening issues 6 | 7 | For bugs or enhancement requests, file a GitHub issue unless it's 8 | security related. When filing a bug, remember that the better written the bug is, 9 | the more likely it is to be fixed. If you think you've found a security 10 | vulnerability, don't raise a GitHub issue and follow the instructions in our 11 | [security policy](./SECURITY.md). 12 | 13 | ## Contributing code 14 | 15 | We welcome your code contributions. Before submitting code using a pull request, 16 | you must sign the [Oracle Contributor Agreement](https://oca.opensource.oracle.com) (OCA) and 17 | your commits need to include the following line using the name and e-mail 18 | address you used to sign the OCA: 19 | 20 | ```text 21 | Signed-off-by: Your Name 22 | ``` 23 | 24 | This can be automatically added to pull requests by committing with `--sign-off` 25 | or `-s`, for example: 26 | 27 | ```text 28 | git commit --signoff 29 | ``` 30 | 31 | Only pull requests from committers that can be verified as having signed the OCA 32 | are accepted. 33 | 34 | ## Pull request process 35 | 36 | 1. Ensure there is an issue created to track and discuss the fix or enhancement 37 | you intend to submit. 38 | 2. Fork this repository. 39 | 3. Create a branch in your fork to implement the changes. We recommend using 40 | the issue number as part of your branch name, for example `1234-fixes`. 41 | 4. Ensure that any documentation is updated with the changes that are required 42 | by your change. 43 | 5. Ensure that any samples are updated if the base image has been changed. 44 | 6. Submit the pull request. *Don't leave the pull request blank*. Explain exactly 45 | what your changes are meant to do and provide simple steps about how to validate 46 | your changes. Ensure that you reference the issue you created as well. 47 | 7. We assign the pull request to 2-3 people for review before it is merged. 48 | 49 | ## Code of conduct 50 | 51 | Follow the [Golden Rule](https://en.wikipedia.org/wiki/Golden_Rule). If you'd 52 | like more specific guidelines, see the 53 | [Contributor Covenant Code of Conduct](https://www.contributor-covenant.org/version/1/4/code-of-conduct/). 54 | -------------------------------------------------------------------------------- /ISSUE_POLICY.md: -------------------------------------------------------------------------------- 1 | # Issue Policy 2 | 3 | The OCI MLflow Issue Policy outlines the categories of OCI MLflow GitHub issues and discusses the guidelines and processes associated with each type of issue. 4 | 5 | Before filing an issue, make sure to [search for related issues](https://github.com/oracle/oci-mlflow/issues) and check if they address the same problem you're encountering. 6 | 7 | ## Issue Categories 8 | 9 | Our policy states that GitHub issues fall into the following categories: 10 | 11 | 1. Feature Requests 12 | 2. Bug Reports 13 | 3. Documentation Fixes 14 | 4. Installation Issues 15 | 16 | Each category has its own GitHub issue template. Please refrain from deleting the issue template unless you are certain that your issue does not fit within its scope. 17 | 18 | ### Feature Requests 19 | 20 | #### Guidelines 21 | 22 | To increase the likelihood of having a feature request accepted, please ensure that: 23 | 24 | - The request has a minimal scope (note that it's easier to add additional functionality later than to remove functionality). 25 | - The request has a significant impact on users and provides value that justifies the maintenance efforts required to support the feature in the future. 26 | 27 | #### Lifecycle 28 | 29 | Feature requests typically go through the following stages: 30 | 31 | 1. Submit a feature request GitHub Issue, providing a brief overview of the proposal and its motivation. If possible, include an implementation overview as well. 32 | 2. The issue will be triaged to determine if more information is needed from the author, assign a priority, and route the request to the appropriate committers. 33 | 3. Discuss the feature request with a committer who will provide input on the implementation overview or request a more detailed design if necessary. 34 | 4. Once there is agreement on the feature request and its implementation, an implementation owner will be assigned. 35 | 5. The implementation owner will start developing the feature and ultimately submit associated pull requests to the OCI MLflow Repository. 36 | 37 | ### Bug Reports 38 | 39 | #### Guidelines 40 | 41 | To ensure that maintainers can effectively assist with any reported bugs, please follow these guidelines: 42 | 43 | - Fill out the bug report template completely, providing appropriate levels of detail, especially in the "Code to reproduce issue" section. 44 | - Verify that the bug you are reporting meets one of the following criteria: 45 | - It is a regression where a recent release of OCI MLflow no longer supports an operation that was supported in an earlier release. 46 | - A documented feature or functionality does not work as intended when executing a provided example from the documentation. 47 | - Any raised exception is directly from OCI MLflow and not the result of an underlying package's exception. 48 | - Make an effort to diagnose and troubleshoot the issue before filing the report. 49 | - Ensure that the environment in which you encountered the bug is supported as defined in the documentation. 50 | - Validate that OCI MLflow supports the functionality you are experiencing issues with. Remember that the absence of a feature does not constitute a bug. 51 | - Read the documentation for the feature related to the issue you are reporting. If you are certain that you are following the documented guidelines, please file a bug report. 52 | 53 | #### Lifecycle 54 | 55 | Bug reports typically go through the following stages: 56 | 57 | 1. Submit a bug report GitHub Issue, providing a high-level description of the bug and all the necessary information to reproduce it. 58 | 2. The bug report will be triaged to determine if more information is required from the author, assign a priority, and route the issue to the appropriate committers. 59 | 3. An OCI MLflow committer will reproduce the bug and provide feedback on how to implement a fix. 60 | 4. Once an approach has been agreed upon, an owner for the fix will be assigned. For severe bugs, OCI MLflow committers may choose to take ownership to ensure a timely resolution. 61 | 5. The fix owner will start implementing the solution and ultimately submit associated pull requests. 62 | 63 | ### Documentation Fixes 64 | 65 | #### Lifecycle 66 | 67 | Documentation issues typically go through the following stages: 68 | 69 | 1. Submit a documentation GitHub Issue, describing the issue and indicating its location(s) in the OCI MLflow documentation. 70 | 2. The issue will be triaged to determine if more information is needed from the author, assign a priority, and route the request to the appropriate committers. 71 | 3. An OCI MLflow committer will confirm the documentation issue and provide feedback on how to implement a fix. 72 | 4. Once an approach has been agreed upon, an owner for the fix will be assigned. For severe documentation issues, OCI MLflow committers may choose to take ownership to ensure a timely resolution. 73 | 5. The fix owner will start implementing the solution and ultimately submit associated pull requests. 74 | 75 | ### Installation Issues 76 | 77 | #### Lifecycle 78 | 79 | Installation issues typically go through the following stages: 80 | 81 | 1. Submit an installation GitHub Issue, describing the issue and indicating the platforms it affects. 82 | 2. The issue will be triaged to determine if more information is needed from the author, assign a priority, and route the issue to the appropriate committers. 83 | 3. An OCI MLflow committer will confirm the installation issue and provide feedback on how to implement a fix. 84 | 4. Once an approach has been agreed upon, an owner for the fix will be assigned. For severe installation issues, OCI MLflow committers may choose to take ownership to ensure a timely resolution. 85 | 5. The fix owner will start implementing the solution and ultimately submit associated pull requests. 86 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Oracle and/or its affiliates. All rights reserved. 2 | 3 | The Universal Permissive License (UPL), Version 1.0 4 | 5 | Subject to the condition set forth below, permission is hereby granted to any 6 | person obtaining a copy of this software, associated documentation and/or data 7 | (collectively the "Software"), free of charge and under any and all copyright 8 | rights in the Software, and any and all patent rights owned or freely 9 | licensable by each licensor hereunder covering either (i) the unmodified 10 | Software as contributed to or provided by such licensor, or (ii) the Larger 11 | Works (as defined below), to deal in both 12 | 13 | (a) the Software, and 14 | (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if 15 | one is included with the Software (each a "Larger Work" to which the Software 16 | is contributed by such licensors), 17 | 18 | without restriction, including without limitation the rights to copy, create 19 | derivative works of, display, perform, and distribute the Software and make, 20 | use, sell, offer for sale, import, export, have made, and have sold the 21 | Software and the Larger Work(s), and to sublicense the foregoing rights on 22 | either these or other terms. 23 | 24 | This license is subject to the following condition: 25 | The above copyright notice and either this complete permission notice or at 26 | a minimum a reference to the UPL must be included in all copies or 27 | substantial portions of the Software. 28 | 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 | SOFTWARE. 36 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include THIRD_PARTY_LICENSES.txt 3 | include oci_mlflow/version.json 4 | include oci_mlflow/templates/*.jinja2 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | -include .env 2 | 3 | .PHONY: clean dist wheel 4 | 5 | TAG:=latest 6 | IMAGE_NAME:=oci-mlflow 7 | CONTAINERDIR:=container-image 8 | RND:=1 9 | 10 | clean: 11 | @rm -rf dist build oci_mlflow.egg-info $(CONTAINERDIR)/run/*.whl 12 | @find ./ -name '*.pyc' -exec rm -f {} \; 13 | @find ./ -name 'Thumbs.db' -exec rm -f {} \; 14 | @find ./ -name '*~' -exec rm -f {} \; 15 | 16 | dist: clean 17 | @python -m build 18 | 19 | build-image: 20 | docker build --network host --build-arg RND=$(RND) -t $(IMAGE_NAME):$(TAG) -f container-image/Dockerfile . 21 | $(MAKE) clean 22 | 23 | launch: build-image 24 | @docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --name oci-mlflow $(IMAGE_NAME):$(TAG) 25 | 26 | launch-shell: build-image 27 | @docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --entrypoint bash --name oci-mlflow-shell $(IMAGE_NAME):$(TAG) 28 | 29 | wheel: dist 30 | @cp dist/*.whl container-image/run/ 31 | -------------------------------------------------------------------------------- /README-development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | The target audience for this README is developers wanting to contribute to `OCI MLflow` plugins. If you want to use the OCI MLflow plugins with your own programs, see `README.md`. 3 | 4 | ## Setting Up Dependencies 5 | 6 | ``` 7 | python3 -m pip install -r dev-requirements.txt 8 | ``` 9 | 10 | # Generating the wheel 11 | The OCI MLflow plugins are packaged as a wheel. To generate the wheel, you can run: 12 | 13 | ``` 14 | make dist 15 | ``` 16 | 17 | Alternatively you can run - 18 | 19 | ``` 20 | python -m build 21 | ``` 22 | 23 | This wheel can then be installed using pip. 24 | 25 | 26 | ## Setting Up Tracking Server 27 | 28 | Create a file called `.env` in the root folder of the project with following contents - 29 | 30 | ``` 31 | # Defaults to resource_principal if not provided 32 | OCIFS_IAM_TYPE=api_key 33 | 34 | # Artifacts location. Can be local folder or OCI Object Storage bucket 35 | MLFLOW_ARTIFACTS_DESTINATION=oci://bucket@namespace/ 36 | MLFLOW_DEFAULT_ARTIFACT_ROOT=oci://bucket@namespace/ 37 | 38 | # Backend provider. Default is sqllite 39 | BACKEND_PROVIDER=sqllite 40 | 41 | # ------MySQL----------------------- 42 | # BACKEND_PROVIDER=mysql 43 | 44 | # The database credentials can be stored in the Vault service, or they can be provided in the config. 45 | # See more details how to save the credentials to the Vault - 46 | # https://accelerated-data-science.readthedocs.io/en/latest/user_guide/secrets/mysql.html 47 | 48 | # DB_SECRET_OCID=ocid1.vaultsecret.oc1.iad.. 49 | 50 | # ----OR------------------------------ 51 | # MLFLOW_BACKEND_STORE_URI=mysql+mysqlconnector://{username}:{password}@{host}:{db_port}/{db_name} 52 | # ------------------------------------ 53 | 54 | MLFLOW_SERVE_ARTIFACTS=1 55 | MLFLOW_GUNICORN_OPTS=--log-level debug 56 | MLFLOW_WORKERS=4 57 | MLFLOW_HOST=0.0.0.0 58 | ``` 59 | 60 | ### Building And Running Tracking Server 61 | 62 | To build an `oci-mlflow` container image run - 63 | 64 | ``` 65 | make clean build-image 66 | ``` 67 | 68 | Alternatively you can run - 69 | ``` 70 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile . 71 | ``` 72 | 73 | To build and launch tracking server run - 74 | 75 | ``` 76 | make clean launch 77 | ``` 78 | 79 | Alternatively you can run - 80 | ``` 81 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile . 82 | docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --name oci-mlflow:latest 83 | ``` 84 | 85 | To build `oci-mlflow` wheel file and then rebuild and launch the container image, run - 86 | 87 | ``` 88 | make clean wheel launch 89 | ``` 90 | 91 | Alternatively you can run - 92 | 93 | ``` 94 | python -m build 95 | cp dist/*.whl container-image/run/ 96 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile . 97 | docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --name oci-mlflow oci-mlflow:latest 98 | ``` 99 | 100 | To build and start a shell prompt within `oci-mlflow` container image, run - 101 | 102 | ``` 103 | make clean wheel launch-shell 104 | ``` 105 | 106 | Alternatively you can run - 107 | 108 | ``` 109 | python -m build 110 | cp dist/*.whl container-image/run/ 111 | docker build --network host --build-arg RND=1 -t oci-mlflow:latest -f container-image/Dockerfile . 112 | docker run --rm -it --net host -v ~/.oci:/root/.oci --env-file .env --entrypoint bash --name oci-mlflow-shell oci-mlflow:latest 113 | ``` 114 | 115 | # Running Tests 116 | The SDK uses pytest as its test framework. To run tests use: 117 | 118 | ``` 119 | python3 -m pytest tests/* 120 | ``` 121 | 122 | # Generating Documentation 123 | Sphinx is used for documentation. You can generate HTML locally with the following: 124 | 125 | ``` 126 | python3 -m pip install -r dev-requirements.txt 127 | cd docs 128 | make html 129 | ``` 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OCI Mlflow Plugin 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/oci-mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/oci-mlflow/) [![Python](https://img.shields.io/pypi/pyversions/oci-mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/oci-mlflow/) 4 | 5 | The OCI MLflow plugin enables OCI users to use OCI resources to manage their machine learning use case life cycle. This table below provides the mapping between the MLflow features and the OCI resources that are used. 6 | 7 | | MLflow Use Case | OCI Resource | 8 | | -------- | ------- | 9 | | User running machine learning experiments on notebook, logs model artifacts, model performance etc | Data Science Jobs, Object Storage, MySQL | 10 | | Batch workloads using spark | Data Flow, Object Storage, MySQL | 11 | | Model Catalog | Data Science Model Catalog | 12 | | Model Deployment | Data Science Model Deployment | 13 | | User running machine learning experiments on notebook, logs model artifacts, model performance etc | Object Storage, MySQL | 14 | 15 | 16 | ## Installation 17 | 18 | To install the `oci-mlflow` plugin call - 19 | 20 | ```bash 21 | python3 -m pip install oci-mlflow 22 | ``` 23 | 24 | To test the `oci-mlflow` plugin call - 25 | 26 | ```bash 27 | mlflow deployments help -t oci-datascience 28 | ``` 29 | 30 | ## Documentation 31 | - [OCI MLflow Documentation](https://oci-mlflow.readthedocs.io/en/latest/index.html) 32 | - [Getting started with Oracle Accelerated Data Science SDK](https://accelerated-data-science.readthedocs.io/en/latest/index.html) 33 | - [Getting started with OCI Data Science Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm) 34 | - [Getting started with Data Science Environments](https://docs.oracle.com/en-us/iaas/data-science/using/conda_environ_list.htm) 35 | - [Getting started with Custom Conda Environments](https://docs.oracle.com/en-us/iaas/data-science/using/conda_create_conda_env.htm) 36 | - [Getting started with Model Catalog](https://docs.oracle.com/en-us/iaas/data-science/using/models-about.htm) 37 | - [Getting started with Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm) 38 | - [Oracle AI & Data Science Blog](https://blogs.oracle.com/ai-and-datascience/) 39 | - [OCI Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) 40 | 41 | ## Examples 42 | ### Running MLflow projects on the OCI `Data Science jobs` and `Data Flow applications` - 43 | 44 | ```bash 45 | export MLFLOW_TRACKING_URI= 46 | mlflow run . --experiment-name My-Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json 47 | ``` 48 | 49 | ### Deploying MLflow models to the OCI Model Deployments - 50 | 51 | ```bash 52 | mlflow deployments help -t oci-datascience 53 | 54 | export MLFLOW_TRACKING_URI= 55 | 56 | mlflow deployments create --name -m models:// -t oci-datascience --config deploy-config-file=deployment_specification.yaml 57 | ``` 58 | 59 | 60 | ## Contributing 61 | 62 | This project welcomes contributions from the community. Before submitting a pull request, please[review our contribution guide](./CONTRIBUTING.md) 63 | 64 | Find Getting Started instructions for developers in [README-development.md](https://github.com/oracle/oci-mlflow/blob/main/README-development.md) 65 | 66 | ## Security 67 | 68 | Consult the security guide [SECURITY.md](https://github.com/oracle/oci-mlflow/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process. 69 | 70 | ## License 71 | 72 | Copyright (c) 2023 Oracle and/or its affiliates. Licensed under the [Universal Permissive License v1.0](https://oss.oracle.com/licenses/upl/) 73 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting security vulnerabilities 2 | 3 | Oracle values the independent security research community and believes that 4 | responsible disclosure of security vulnerabilities helps us ensure the security 5 | and privacy of all our users. 6 | 7 | Please do NOT raise a GitHub Issue to report a security vulnerability. If you 8 | believe you have found a security vulnerability, please submit a report to 9 | [secalert_us@oracle.com][1] preferably with a proof of concept. Please review 10 | some additional information on [how to report security vulnerabilities to Oracle][2]. 11 | We encourage people who contact Oracle Security to use email encryption using 12 | [our encryption key][3]. 13 | 14 | We ask that you do not use other channels or contact the project maintainers 15 | directly. 16 | 17 | Non-vulnerability related security issues including ideas for new or improved 18 | security features are welcome on GitHub Issues. 19 | 20 | ## Security updates, alerts and bulletins 21 | 22 | Security updates will be released on a regular cadence. Many of our projects 23 | will typically release security fixes in conjunction with the 24 | Oracle Critical Patch Update program. Additional 25 | information, including past advisories, is available on our [security alerts][4] 26 | page. 27 | 28 | ## Security-related information 29 | 30 | We will provide security related information such as a threat model, considerations 31 | for secure use, or any known security issues in our documentation. Please note 32 | that labs and sample code are intended to demonstrate a concept and may not be 33 | sufficiently hardened for production use. 34 | 35 | [1]: mailto:secalert_us@oracle.com 36 | [2]: https://www.oracle.com/corporate/security-practices/assurance/vulnerability/reporting.html 37 | [3]: https://www.oracle.com/security-alerts/encryptionkey.html 38 | [4]: https://www.oracle.com/security-alerts/ 39 | -------------------------------------------------------------------------------- /authorizer/Dockerfile: -------------------------------------------------------------------------------- 1 | # Provide the path of base OS image here. 2 | FROM ghcr.io/oracle/oraclelinux:8-slim 3 | 4 | # Install python and pip. 5 | RUN echo "-internal" > /etc/dnf/vars/ociregion &&\ 6 | microdnf repolist &&\ 7 | microdnf -y update &&\ 8 | microdnf install -y python38 &&\ 9 | microdnf install -y python38-pip &&\ 10 | microdnf clean all 11 | 12 | COPY requirements.txt ./ 13 | 14 | # Rest of this section contains commands to install python dependencies specified in requirement.txt and then setting up python path, docker CMD, Entrypoint etc. These commands should remain same except the pip install which might change depending upon command syntax and the PyPi repository. 15 | RUN /usr/bin/python3.8 -m pip install --default-timeout=100 --target /python/ -r requirements.txt 16 | 17 | WORKDIR /authorizer 18 | COPY src/authorizer.py /authorizer/ 19 | COPY src/utils /authorizer/utils 20 | 21 | RUN rm -fr ~/.cache/pip /tmp* .pip_cache /var/cache/ /requirements.txt 22 | RUN mkdir /tmp/ 23 | 24 | ENV FN_LISTENER=unix:/tmp/lsnr.sock 25 | ENV FN_FORMAT=http-stream 26 | ENV PYTHONPATH=/authorizer:/python:/authorizer/utils 27 | ENV RP_AUTH=true 28 | ENTRYPOINT ["/python/bin/fdk", "/authorizer/authorizer.py", "authorizer"] 29 | -------------------------------------------------------------------------------- /authorizer/func.yaml: -------------------------------------------------------------------------------- 1 | schema_version: 20180708 2 | name: authorizer 3 | version: 0.0.1 4 | runtime: docker 5 | entrypoint: /python/bin/fdk /function/authorizer.py authorizer 6 | memory: 256 7 | -------------------------------------------------------------------------------- /authorizer/requirements.txt: -------------------------------------------------------------------------------- 1 | fdk>=0.1.48 2 | oci>=2.85.0 -------------------------------------------------------------------------------- /authorizer/src/authorizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | import io 7 | import json 8 | import logging 9 | import copy 10 | 11 | import fdk.response 12 | from fdk import context, response 13 | 14 | from authorizer.src.utils.identity_utils import ( 15 | ExtendedIdentityDataPlaneClient, 16 | AuthenticationException, 17 | AuthorizationException 18 | ) 19 | 20 | from authorizer.src.utils.auth_utils import ( 21 | get_signer, 22 | SignerType, 23 | do_authn, 24 | do_authz, 25 | get_group_ids_from_config 26 | ) 27 | 28 | from authorizer.src.utils.header_utils import ( 29 | extract_and_validate_headers, 30 | AuthorizationHeaderMissingException, 31 | MissingRequiredHeadersException 32 | ) 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | # Initialising here so that this call is cached for future fn executions 37 | identity_client = ExtendedIdentityDataPlaneClient( 38 | config={}, 39 | signer=get_signer(SignerType.AUTO) 40 | ) 41 | 42 | # The rest api methods currently supported by mlflow 43 | # https://mlflow.org/docs/latest/rest-api.html 44 | MLFLOW_REST_API_METHODS = ["post", "get", "delete", "patch", "put"] 45 | 46 | 47 | def authorizer(ctx: context.InvokeContext, data: io.BytesIO = None) -> fdk.response.Response: 48 | """Performs authn and authz for given data. 49 | 50 | Parameters 51 | ---------- 52 | ctx: InvokeContext 53 | An instance of InvokeContext. 54 | data: BytesIO 55 | Data in BytesIO format. 56 | 57 | Returns 58 | ------- 59 | Response 60 | An instance of Response. 61 | """ 62 | try: 63 | headers = extract_and_validate_headers(data.getvalue()) 64 | except ( 65 | AuthorizationHeaderMissingException, 66 | MissingRequiredHeadersException 67 | ): 68 | return response.Response( 69 | ctx, status_code=401, response_data=json.dumps( 70 | { 71 | "active": False, 72 | "wwwAuthenticate": "Signature" 73 | } 74 | ) 75 | ) 76 | path_segment = copy.deepcopy(headers["(request-target)"]) 77 | principal = None 78 | for method in MLFLOW_REST_API_METHODS: 79 | headers["(request-target)"] = [method + " " + path_segment[0]] 80 | try: 81 | principal = do_authn(identity_client, headers) 82 | except AuthenticationException: 83 | pass 84 | 85 | if principal: 86 | try: 87 | do_authz( 88 | identity_client, 89 | principal, 90 | get_group_ids_from_config(ctx.Config()) 91 | ) 92 | 93 | return response.Response( 94 | ctx, status_code=200, response_data=json.dumps( 95 | { 96 | "active": True, 97 | "context": { 98 | "subjectId": principal.subject_id 99 | } 100 | } 101 | ) 102 | ) 103 | except AuthorizationException as ex: 104 | logger.error('Error occurred while performing authZ: %s', str(ex)) 105 | 106 | return response.Response( 107 | ctx, status_code=401, response_data=json.dumps( 108 | { 109 | "active": False, 110 | "wwwAuthenticate": "Signature" 111 | } 112 | ) 113 | ) 114 | -------------------------------------------------------------------------------- /authorizer/src/utils/auth_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | import os 7 | from enum import Enum 8 | from typing import Dict, List 9 | 10 | from oci.auth.signers import ( 11 | InstancePrincipalsSecurityTokenSigner, 12 | get_resource_principals_signer 13 | ) 14 | from oci.identity_data_plane.models import ( 15 | AuthenticateClientDetails, 16 | AuthenticateClientResult, 17 | FilterGroupMembershipDetails, 18 | FilterGroupMembershipResult 19 | ) 20 | from oci.identity_data_plane.models.principal import Principal 21 | 22 | from authorizer.src.utils.identity_utils import ( 23 | AuthenticationException, 24 | AuthorizationException, 25 | ExtendedIdentityDataPlaneClient 26 | ) 27 | 28 | ALLOWED_GROUP_IDS = "GROUP_IDS" 29 | 30 | 31 | class SignerType(Enum): 32 | AUTO = 0 33 | RESOURCE_PRINCIPAL = 1 34 | INSTANCE_PRINCIPAL = 2 35 | 36 | 37 | def get_signer(signer_type: SignerType = SignerType.AUTO): 38 | """Gets the corresponding signer from signer type. 39 | 40 | Parameters 41 | ---------- 42 | signer_type: integer 43 | The signer type 44 | 45 | Returns 46 | ------- 47 | Signer: 48 | An instance of Signer. 49 | """ 50 | if signer_type == SignerType.AUTO: 51 | if _get_env_bool("RP_AUTH", False): 52 | signer_type = SignerType.RESOURCE_PRINCIPAL 53 | else: 54 | signer_type = SignerType.INSTANCE_PRINCIPAL 55 | if signer_type == SignerType.RESOURCE_PRINCIPAL: 56 | return get_resource_principals_signer() 57 | else: 58 | return _get_internal_instance_principal_signer() 59 | 60 | 61 | def do_authn( 62 | identity_client: ExtendedIdentityDataPlaneClient, 63 | headers: Dict[str, List[str]] 64 | ) -> Principal: 65 | """Performs the authn validation from given headers. 66 | 67 | Parameters 68 | ---------- 69 | identity_client: ExtendedIdentityDataPlaneClient 70 | An instance of ExtendedIdentityDataPlaneClient 71 | headers: dict 72 | A dict of headers to be authenticated 73 | 74 | Returns 75 | ------- 76 | Principal: 77 | An instance of Principal 78 | """ 79 | client_details = AuthenticateClientDetails() 80 | client_details.request_headers = headers 81 | authenticate_response = identity_client.authenticate_client(authenticate_client_details=client_details) 82 | authenticate_result: AuthenticateClientResult = authenticate_response.data 83 | if authenticate_result.principal is None: 84 | raise AuthenticationException(authenticate_response.status, authenticate_result.error_message) 85 | return authenticate_result.principal 86 | 87 | 88 | def do_authz( 89 | identity_client: ExtendedIdentityDataPlaneClient, 90 | principal: Principal, 91 | expected_group_ids: List[str] 92 | ) -> List[str]: 93 | """Performs the authz validation from principal and expected group ids. 94 | 95 | Parameters 96 | ---------- 97 | identity_client: ExtendedIdentityDataPlaneClient 98 | An instance of ExtendedIdentityDataPlaneClient. 99 | principal: Principal 100 | An instance of Principal. 101 | expected_group_ids: list 102 | A list of allowed group ids. 103 | 104 | Returns 105 | ------- 106 | List: 107 | A list of allowed group ids. 108 | """ 109 | filter_group_membership_details = FilterGroupMembershipDetails() 110 | filter_group_membership_details.principal = principal 111 | filter_group_membership_details.group_ids = expected_group_ids 112 | membership_response = identity_client.filter_group_membership(filter_group_membership_details) 113 | membership_result: FilterGroupMembershipResult = membership_response.data 114 | if not set(expected_group_ids).intersection(membership_result.group_ids): 115 | raise AuthorizationException(membership_response.status, expected_group_ids, principal.subject_id) 116 | return membership_result.group_ids 117 | 118 | 119 | def get_group_ids_from_config(config: Dict) -> List[str]: 120 | """Gets group ids from config. 121 | 122 | Parameters 123 | ---------- 124 | config: dict 125 | A dict of configurations 126 | 127 | Returns 128 | ------- 129 | List 130 | A list of group ids seperated in the original string by ',' 131 | """ 132 | return config.get(ALLOWED_GROUP_IDS, "").replace(" ", "").split(",") 133 | 134 | 135 | def _get_internal_instance_principal_signer() -> InstancePrincipalsSecurityTokenSigner: 136 | """Overrides metadata url of InstancePrincipalSigner class""" 137 | override_metadata_url = os.getenv( 138 | "METADATA_OVERRIDE_URL", 139 | InstancePrincipalsSecurityTokenSigner.METADATA_URL_BASE 140 | ) 141 | InstancePrincipalsSecurityTokenSigner.METADATA_URL_BASE = override_metadata_url 142 | InstancePrincipalsSecurityTokenSigner.GET_REGION_URL = \ 143 | '{}/instance/region'.format(override_metadata_url) 144 | InstancePrincipalsSecurityTokenSigner.LEAF_CERTIFICATE_URL = \ 145 | '{}/identity/cert.pem'.format(override_metadata_url) 146 | InstancePrincipalsSecurityTokenSigner.LEAF_CERTIFICATE_PRIVATE_KEY_URL = \ 147 | '{}/identity/key.pem'.format(override_metadata_url) 148 | InstancePrincipalsSecurityTokenSigner.INTERMEDIATE_CERTIFICATE_URL = \ 149 | '{}/identity/intermediate.pem'.format(override_metadata_url) 150 | return InstancePrincipalsSecurityTokenSigner() 151 | 152 | 153 | def _get_env_bool(env_var: str, default: bool = False) -> bool: 154 | env_val = os.getenv(env_var) 155 | 156 | if env_val is None: 157 | return default 158 | 159 | return env_val.lower() in ("true","t","1") 160 | -------------------------------------------------------------------------------- /authorizer/src/utils/header_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | import json 7 | from typing import Dict, List 8 | from urllib import request 9 | 10 | HEADERS_JSON_KEY = "data" 11 | AUTHORIZATION_KEY = "authorization" 12 | SIGNATURE_HEADERS_KEY = "headers" 13 | 14 | 15 | class MissingRequiredHeadersException(Exception): 16 | def __init__( 17 | self, 18 | required_headers: List[str], 19 | provided_headers: List[str] 20 | ): 21 | self.required_headers = required_headers 22 | self.provided_headers = provided_headers 23 | 24 | def __str__(self): 25 | return "Headers required for authentication were not provided.\nProvided headers: {0}\nRequired headers: {1} \ 26 | \nMissing headers: {2}".format(self.provided_headers, self.required_headers, set(self.required_headers) 27 | .difference(set(self.provided_headers))) 28 | 29 | 30 | class AuthorizationHeaderMissingException(Exception): 31 | def __str__(self): 32 | return "Expected the authorization header to be present, but it was not found" 33 | 34 | 35 | class AuthorizationSigningHeadersMissingException(Exception): 36 | def __str__(self): 37 | return "Headers used to sign request was not present in authorization header" 38 | 39 | 40 | def extract_and_validate_headers(data: bytes) -> Dict[str, List[str]]: 41 | """ Extracts headers from json document passed by APIGW and outputs in format required 42 | by authenticate client api 43 | 44 | input: { 45 | "type": "USER_DEFINED", 46 | "data": { 47 | "": "", 48 | "": "", 49 | "": "" 50 | } 51 | } 52 | output: { 53 | "" : [], 54 | "": [] 55 | } 56 | 57 | Parameters 58 | ---------- 59 | data: bytes 60 | Input data in bytes. 61 | 62 | Returns 63 | ------- 64 | Dict: 65 | A string-list dict. 66 | """ 67 | headers = json.loads(data).get(HEADERS_JSON_KEY) 68 | headers = {str.lower(k): [v] for k, v in headers.items()} 69 | if not headers.get("date") and headers.get("x-date"): 70 | headers["date"] = headers["x-date"] 71 | try: 72 | required_headers = _get_required_headers_from_signature(headers.get(AUTHORIZATION_KEY)[0]) 73 | except TypeError: 74 | raise AuthorizationHeaderMissingException() 75 | try: 76 | return {key: headers[key] for key in required_headers} 77 | except KeyError: 78 | raise MissingRequiredHeadersException(required_headers, provided_headers=list(headers.keys())) 79 | 80 | 81 | def _get_required_headers_from_signature(auth_header: str) -> List[str]: 82 | """Extracts headers required to validate from authorization header: 83 | 84 | input: 'Signature algorithm="rsa-sha256", headers="date (request-target) host",keyId="", 85 | signature=""' 86 | output: ['date', '(request-target)', 'host', 'authorization'] 87 | 88 | Parameters 89 | ---------- 90 | auth_header: str 91 | The auth header string. 92 | 93 | Returns 94 | ------- 95 | List: 96 | A list of headers from auth string. 97 | """ 98 | kv = request.parse_keqv_list(request.parse_http_list(auth_header)) 99 | signing_headers = kv.get(SIGNATURE_HEADERS_KEY) 100 | if signing_headers is None: 101 | raise AuthorizationSigningHeadersMissingException() 102 | required_headers = signing_headers.split(" ") 103 | required_headers.append(AUTHORIZATION_KEY) 104 | return list(map(str.lower, required_headers)) 105 | -------------------------------------------------------------------------------- /authorizer/src/utils/identity_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | from typing import List 7 | 8 | import six 9 | from oci import retry 10 | from oci.identity_data_plane import DataplaneClient 11 | from oci.identity_data_plane.models import ( 12 | AuthenticateClientDetails, 13 | FilterGroupMembershipDetails 14 | ) 15 | from oci.response import Response 16 | 17 | 18 | class ExtendedIdentityDataPlaneClient(DataplaneClient): 19 | """Identity Dataplane Client with additional API support for authn/authz validation.""" 20 | 21 | def authenticate_client( 22 | self, 23 | authenticate_client_details: AuthenticateClientDetails, 24 | **kwargs 25 | ) -> Response: 26 | """Performs authn validation from oci backend. 27 | 28 | Parameters 29 | ---------- 30 | authenticate_client_details: AuthenticateClientDetails 31 | An instance of AuthenticateClientDetails to send to oci backend. 32 | kwargs: 33 | retry_strategy: obj 34 | A retry strategy to apply to all calls made by this service client (i.e. at the client level). 35 | There is no retry strategy applied by default. 36 | 37 | Returns 38 | ------- 39 | Response: 40 | An instance of Response 41 | """ 42 | resource_path = "/authentication/authenticateClient" 43 | method = "POST" 44 | operation_name = "authenticate_client" 45 | api_reference_link = "" 46 | expected_kwargs = ["retry_strategy"] 47 | extra_kwargs = [_key for _key in six.iterkeys(kwargs) if _key not in expected_kwargs] 48 | if extra_kwargs: 49 | raise ValueError( 50 | "authenticate_client got unknown kwargs: {!r}".format(extra_kwargs)) 51 | 52 | header_params = { 53 | "accept": "application/json", 54 | "content-type": "application/json" 55 | } 56 | 57 | retry_strategy = self.base_client.get_preferred_retry_strategy( 58 | operation_retry_strategy=kwargs.get('retry_strategy'), 59 | client_retry_strategy=self.retry_strategy 60 | ) 61 | 62 | if retry_strategy: 63 | if not isinstance(retry_strategy, retry.NoneRetryStrategy): 64 | self.base_client.add_opc_client_retries_header(header_params) 65 | retry_strategy.add_circuit_breaker_callback(self.circuit_breaker_callback) 66 | return retry_strategy.make_retrying_call( 67 | self.base_client.call_api, 68 | resource_path=resource_path, 69 | method=method, 70 | header_params=header_params, 71 | body=authenticate_client_details, 72 | response_type="AuthenticateClientResult", 73 | allow_control_chars=None, 74 | operation_name=operation_name, 75 | api_reference_link=api_reference_link) 76 | else: 77 | return self.base_client.call_api( 78 | resource_path=resource_path, 79 | method=method, 80 | header_params=header_params, 81 | body=authenticate_client_details, 82 | response_type="AuthenticateClientResult", 83 | allow_control_chars=None, 84 | operation_name=operation_name, 85 | api_reference_link=api_reference_link) 86 | 87 | def filter_group_membership( 88 | self, 89 | filter_membership_details: FilterGroupMembershipDetails, 90 | **kwargs 91 | ) -> Response: 92 | """Validates if given group ids are authorized from oci backend. 93 | 94 | Parameters 95 | ---------- 96 | filter_membership_details: FilterGroupMembershipDetails 97 | An instance of FilterGroupMembershipDetails to send to oci backend. 98 | kwargs: 99 | retry_strategy: obj 100 | A retry strategy to apply to all calls made by this service client (i.e. at the client level). 101 | There is no retry strategy applied by default. 102 | 103 | Returns 104 | ------- 105 | Response: 106 | An instance of Response 107 | """ 108 | resource_path = "/filterGroupMembership" 109 | method = "POST" 110 | operation_name = "filter_group_membership" 111 | api_reference_link = "" 112 | expected_kwargs = ["retry_strategy"] 113 | 114 | extra_kwargs = [_key for _key in six.iterkeys(kwargs) if _key not in expected_kwargs] 115 | if extra_kwargs: 116 | raise ValueError( 117 | "filter_group_membership got unknown kwargs: {!r}".format(extra_kwargs)) 118 | 119 | header_params = { 120 | "accept": "application/json", 121 | "content-type": "application/json" 122 | } 123 | 124 | retry_strategy = self.base_client.get_preferred_retry_strategy( 125 | operation_retry_strategy=kwargs.get('retry_strategy'), 126 | client_retry_strategy=self.retry_strategy 127 | ) 128 | 129 | if retry_strategy: 130 | if not isinstance(retry_strategy, retry.NoneRetryStrategy): 131 | self.base_client.add_opc_client_retries_header(header_params) 132 | retry_strategy.add_circuit_breaker_callback(self.circuit_breaker_callback) 133 | return retry_strategy.make_retrying_call( 134 | self.base_client.call_api, 135 | resource_path=resource_path, 136 | method=method, 137 | header_params=header_params, 138 | body=filter_membership_details, 139 | response_type="FilterGroupMembershipResult", 140 | allow_control_chars=None, 141 | operation_name=operation_name, 142 | api_reference_link=api_reference_link) 143 | else: 144 | return self.base_client.call_api( 145 | resource_path=resource_path, 146 | method=method, 147 | header_params=header_params, 148 | body=filter_membership_details, 149 | response_type="FilterGroupMembershipResult", 150 | allow_control_chars=None, 151 | operation_name=operation_name, 152 | api_reference_link=api_reference_link) 153 | 154 | 155 | class AuthenticationException(Exception): 156 | def __init__(self, status_code: int, error_msg: str): 157 | self.status_code = status_code 158 | self.error_msg = error_msg 159 | 160 | def __str__(self): 161 | return "Could not authenticate client: Status code: {0}, Error Message: {1}".format( 162 | self.status_code, self.error_msg) 163 | 164 | 165 | class AuthorizationException(Exception): 166 | def __init__(self, status_code: int, expected_group_ids: List[str], subject: str): 167 | self.expected_group_ids = expected_group_ids 168 | self.status_code = status_code 169 | self.subject = subject 170 | 171 | def __str__(self): 172 | return "Could not authorize client: Status code: {0}, Expected subject: {1} to be part any of the following " \ 173 | "groups:{2}".format(self.status_code, self.subject, self.expected_group_ids) 174 | -------------------------------------------------------------------------------- /authorizer/tests/test_auth_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | import os 7 | from unittest.mock import MagicMock, patch 8 | 9 | import oci.identity_data_plane.models 10 | import pytest 11 | 12 | from authorizer.src.utils import auth_utils 13 | from authorizer.src.utils.identity_utils import ( 14 | AuthenticationException, 15 | ExtendedIdentityDataPlaneClient 16 | ) 17 | 18 | MOCK_RET_VAL = "MOCK_RET_VAL" 19 | 20 | 21 | @patch('authorizer.src.utils.auth_utils._get_internal_instance_principal_signer') 22 | def test_get_ip_signer(get_ip_mock: MagicMock): 23 | get_ip_mock.return_value = MOCK_RET_VAL 24 | assert ( 25 | auth_utils.get_signer(auth_utils.SignerType.INSTANCE_PRINCIPAL) == MOCK_RET_VAL 26 | ) 27 | get_ip_mock.assert_called_once() 28 | 29 | def test_get_group_ids(): 30 | group_ids = auth_utils.get_group_ids_from_config({"GROUP_IDS": "id1, id2, id3, id4 "}) 31 | assert group_ids == ["id1", "id2", "id3", "id4"] 32 | 33 | @patch('authorizer.src.utils.auth_utils.get_resource_principals_signer') 34 | def test_get_rp_signer(rp_signer_mock: MagicMock): 35 | rp_signer_mock.return_value = MOCK_RET_VAL 36 | assert ( 37 | auth_utils.get_signer(auth_utils.SignerType.RESOURCE_PRINCIPAL) == MOCK_RET_VAL 38 | ) 39 | rp_signer_mock.assert_called_once() 40 | 41 | @patch('authorizer.src.utils.auth_utils._get_env_bool') 42 | def test_auto_ip_signer(get_env_mock: MagicMock): 43 | os.environ["RP_AUTH"] = "false" 44 | get_env_mock.return_value = False 45 | with patch('authorizer.src.utils.auth_utils.InstancePrincipalsSecurityTokenSigner') as ip_signer_mock: 46 | auth_utils.get_signer(auth_utils.SignerType.AUTO) 47 | assert ( 48 | auth_utils.get_signer(auth_utils.SignerType.AUTO) == ip_signer_mock.return_value 49 | ) 50 | ip_signer_mock.assert_called() 51 | get_env_mock.assert_called_with("RP_AUTH", False) 52 | 53 | @patch('authorizer.src.utils.auth_utils._get_env_bool') 54 | @patch('authorizer.src.utils.auth_utils.get_resource_principals_signer') 55 | def test_auto_rp_signer(rp_signer_mock: MagicMock, get_env_mock: MagicMock): 56 | os.environ["RP_AUTH"] = "true" 57 | rp_signer_mock.return_value = MOCK_RET_VAL 58 | get_env_mock.return_value = True 59 | assert ( 60 | auth_utils.get_signer(auth_utils.SignerType.AUTO) == MOCK_RET_VAL 61 | ) 62 | get_env_mock.assert_called_once_with("RP_AUTH", False) 63 | rp_signer_mock.assert_called_once() 64 | 65 | @patch('authorizer.src.utils.auth_utils.InstancePrincipalsSecurityTokenSigner') 66 | def test_get_internal_ip_signer(ip_signer_mock: MagicMock): 67 | ip_signer_mock.return_value = MOCK_RET_VAL 68 | test_override_url = "test" 69 | os.environ["METADATA_OVERRIDE_URL"] = test_override_url 70 | assert ( 71 | auth_utils._get_internal_instance_principal_signer() == MOCK_RET_VAL 72 | ) 73 | assert ( 74 | ip_signer_mock.GET_REGION_URL == test_override_url + "/instance/region" 75 | ) 76 | assert ( 77 | ip_signer_mock.METADATA_URL_BASE == test_override_url 78 | ) 79 | ip_signer_mock.assert_called_once() 80 | 81 | def test_get_env_bool_val_error(): 82 | os.environ["_test"] = "garbage" 83 | with pytest.raises(ValueError): 84 | auth_utils._get_env_bool("_test") 85 | 86 | def test_get_env_bool_val_none(): 87 | os.environ.pop('_test', None) 88 | assert (auth_utils._get_env_bool("_test", False) is False) 89 | assert (auth_utils._get_env_bool("_test", True) is True) 90 | 91 | def test_get_env_bool_val_true(): 92 | os.environ["_test"] = "tRuE" 93 | assert (auth_utils._get_env_bool("_test", False) is True) 94 | 95 | def test_get_env_bool_val_false(): 96 | os.environ["_test"] = "FALSe" 97 | assert (auth_utils._get_env_bool("_test", True) is False) 98 | 99 | def test_do_authn_fail(): 100 | authenticate_result = oci.identity_data_plane.models.AuthenticateClientResult() 101 | authenticate_result.principal = None 102 | authenticate_result.error_message = "authn failed" 103 | 104 | response = oci.response.Response( 105 | status=200, 106 | data=authenticate_result, 107 | headers=None, 108 | request=None 109 | ) 110 | response.data = authenticate_result 111 | 112 | headers = {"foo": ["test"]} 113 | 114 | auth_client_details = oci.identity_data_plane.models.AuthenticateClientDetails() 115 | auth_client_details.request_headers = headers 116 | 117 | idc_mock = MagicMock(type=ExtendedIdentityDataPlaneClient) 118 | idc_mock.authenticate_client = MagicMock() 119 | idc_mock.authenticate_client.return_value = response 120 | with pytest.raises(AuthenticationException): 121 | auth_utils.do_authn(idc_mock, headers) 122 | 123 | idc_mock.authenticate_client.assert_called_once_with( 124 | authenticate_client_details=auth_client_details 125 | ) 126 | 127 | def test_do_authn_pass(): 128 | authenticate_result = oci.identity_data_plane.models.AuthenticateClientResult() 129 | authenticate_result.principal = MOCK_RET_VAL 130 | authenticate_result.error_message = None 131 | 132 | response = oci.response.Response( 133 | status=200, 134 | data=authenticate_result, 135 | headers=None, 136 | request=None 137 | ) 138 | 139 | headers = {"foo": ["test"]} 140 | 141 | auth_client_details = oci.identity_data_plane.models.AuthenticateClientDetails() 142 | auth_client_details.request_headers = headers 143 | 144 | idc_mock = MagicMock(type=ExtendedIdentityDataPlaneClient) 145 | idc_mock.authenticate_client = MagicMock() 146 | idc_mock.authenticate_client.return_value = response 147 | assert ( 148 | auth_utils.do_authn(idc_mock, headers) == authenticate_result.principal 149 | ) 150 | 151 | idc_mock.authenticate_client.assert_called_once_with(authenticate_client_details=auth_client_details) 152 | 153 | def test_authz_pass(): 154 | principal = oci.identity_data_plane.models.Principal() 155 | expected_group_ids = ["g1", "g3"] 156 | idc = MagicMock(type=ExtendedIdentityDataPlaneClient) 157 | idc.filter_group_membership = MagicMock() 158 | 159 | filter_group_membership_details = oci.identity_data_plane.models.FilterGroupMembershipDetails() 160 | filter_group_membership_details.principal = principal 161 | filter_group_membership_details.group_ids = expected_group_ids 162 | 163 | filter_group_membership_result = oci.identity_data_plane.models.FilterGroupMembershipResult() 164 | filter_group_membership_result.group_ids = ["g1"] 165 | filter_group_membership_result.principal = principal 166 | 167 | response = oci.response.Response( 168 | status=200, 169 | data=filter_group_membership_result, 170 | headers=None, 171 | request=None 172 | ) 173 | idc.filter_group_membership.return_value = response 174 | 175 | assert ( 176 | auth_utils.do_authz(idc, principal, expected_group_ids) == filter_group_membership_result.group_ids 177 | ) 178 | idc.filter_group_membership.assert_called_once_with(filter_group_membership_details) 179 | 180 | def test_authz_fail(): 181 | principal = oci.identity_data_plane.models.Principal() 182 | expected_group_ids = ["g1", "g3"] 183 | idc = MagicMock(type=ExtendedIdentityDataPlaneClient) 184 | idc.filter_group_membership = MagicMock() 185 | 186 | filter_group_membership_details = oci.identity_data_plane.models.FilterGroupMembershipDetails() 187 | filter_group_membership_details.principal = principal 188 | filter_group_membership_details.group_ids = expected_group_ids 189 | 190 | filter_group_membership_result = oci.identity_data_plane.models.FilterGroupMembershipResult() 191 | filter_group_membership_result.group_ids = ["g2"] 192 | filter_group_membership_result.principal = principal 193 | 194 | response = oci.response.Response( 195 | status=401, 196 | data=filter_group_membership_result, 197 | headers=None, 198 | request=None 199 | ) 200 | idc.filter_group_membership.return_value = response 201 | 202 | with pytest.raises(auth_utils.AuthorizationException): 203 | auth_utils.do_authz(idc, principal, expected_group_ids) 204 | idc.filter_group_membership.assert_called_once_with(filter_group_membership_details) 205 | -------------------------------------------------------------------------------- /authorizer/tests/test_header_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | import json 7 | from unittest.mock import MagicMock, patch 8 | 9 | import pytest 10 | 11 | from authorizer.src.utils.header_utils import ( 12 | AuthorizationHeaderMissingException, 13 | AuthorizationSigningHeadersMissingException, 14 | MissingRequiredHeadersException, 15 | _get_required_headers_from_signature, 16 | extract_and_validate_headers 17 | ) 18 | 19 | 20 | def generate_apigw_json_payload(headers: dict) -> bytes: 21 | headers = {k.lower(): v for k, v in headers.items()} 22 | pl = {"type": "USER_DEFINED", 23 | "data": headers} 24 | return bytes(json.dumps(pl), 'utf-8') 25 | 26 | @patch('authorizer.src.utils.header_utils._get_required_headers_from_signature') 27 | def test_extract_validate_headers_pass(_get_required_headers_from_signature: MagicMock): 28 | headers = {"FoO": "test", "method": "get", "authorization": "authz"} 29 | expected_response = {"foo": ["test"]} 30 | _get_required_headers_from_signature.return_value = ["foo"] 31 | assert (extract_and_validate_headers(generate_apigw_json_payload(headers)) == expected_response) 32 | _get_required_headers_from_signature.assert_called_once_with("authz") 33 | 34 | def test_extract_validate_headers_missing_authz(): 35 | headers = {"FoO": "test", "method": "get"} 36 | with pytest.raises(AuthorizationHeaderMissingException): 37 | extract_and_validate_headers(generate_apigw_json_payload(headers)) 38 | 39 | @patch('authorizer.src.utils.header_utils._get_required_headers_from_signature') 40 | def test_extract_validate_headers_missing_headers(_get_required_headers_from_signature: MagicMock): 41 | headers = {"FoO": "test", "method": "get", "authorization": "authz"} 42 | _get_required_headers_from_signature.return_value = ["foo1"] 43 | 44 | with pytest.raises(MissingRequiredHeadersException): 45 | extract_and_validate_headers(generate_apigw_json_payload(headers)) 46 | _get_required_headers_from_signature.assert_called_once_with("authz") 47 | 48 | def test_get_required_headers_from_signature_pass(): 49 | authz_header = 'Signature algorithm="rsa-sha256", headers="Date (request-target) host", signature=""' 50 | expected_resp = ["date", "(request-target)", "host", "authorization"] 51 | assert (_get_required_headers_from_signature(authz_header) == expected_resp) 52 | 53 | def test_get_required_headers_from_signature_fail(): 54 | authz_header = 'Signature algorithm="rsa-sha256", signature=""' 55 | with pytest.raises(AuthorizationSigningHeadersMissingException): 56 | _get_required_headers_from_signature(authz_header) 57 | -------------------------------------------------------------------------------- /container-image/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Oracle and/or its affiliates. 2 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 3 | 4 | FROM ghcr.io/oracle/oraclelinux8-instantclient:21 5 | 6 | RUN rm -rf /var/cache/yum/* && yum clean all && yum install -y gcc make patch vim iproute net-tools git && rm -rf /var/cache/yum/* 7 | RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh 8 | RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh; 9 | ENV PATH="/miniconda/bin:$PATH" 10 | 11 | USER root 12 | # Create sync directory and expose as environment variable 13 | 14 | ARG CONDA_ENV_FILE=environment.yaml 15 | ARG CONDA_ENV_NAME=oci-mlflow 16 | ARG MLFLOW_DIR=/etc/mlflow 17 | ARG CONTAINER_ARTIFACT_DIR=container-image 18 | ARG RND 19 | 20 | COPY ${CONTAINER_ARTIFACT_DIR}/${CONDA_ENV_FILE} /opt/env.yaml 21 | RUN conda install -y conda-forge::mamba && mamba env create -f /opt/env.yaml --name ${CONDA_ENV_NAME} && conda clean -afy 22 | ENV PATH="/miniconda/envs/${CONDA_ENV_NAME}}/bin:$PATH" 23 | 24 | RUN conda init bash && source ~/.bashrc && conda activate ${CONDA_ENV_NAME} 25 | 26 | RUN mkdir ${MLFLOW_DIR} 27 | COPY ${CONTAINER_ARTIFACT_DIR}/run/* ${MLFLOW_DIR}/ 28 | RUN chmod a+x ${MLFLOW_DIR}/launch_mlflow.sh 29 | 30 | ENV MLFLOW_DIR=${MLFLOW_DIR} 31 | 32 | EXPOSE 5000 33 | HEALTHCHECK --interval=30s CMD curl -f -sI http://localhost:5000 || exit 1 34 | 35 | RUN if [ -f ${MLFLOW_DIR}/oci_mlflow*.whl ]; then \ 36 | local_whl=$(find ${MLFLOW_DIR} -name "*.whl" -exec basename {} \; | head -n 1 ); \ 37 | source ~/.bashrc && conda activate ${CONDA_ENV_NAME} && pip install ${MLFLOW_DIR}/$local_whl; \ 38 | fi 39 | 40 | RUN echo "conda activate oci-mlflow">>/root/.bashrc 41 | SHELL ["/bin/bash", "--login", "-c"] 42 | 43 | ENTRYPOINT [ "bash", "--login" , "-c"] 44 | CMD ["python $MLFLOW_DIR/run.py"] 45 | -------------------------------------------------------------------------------- /container-image/environment.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - main::python=3.9 3 | - main::pip 4 | - pip: 5 | - oracledb 6 | - mlflow 7 | - oracle-ads>=2.8.5 8 | - mysql-connector-python 9 | - oci-mlflow 10 | -------------------------------------------------------------------------------- /container-image/run/launch_mlflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash --login 2 | # Copyright (c) 2023 Oracle and/or its affiliates. 3 | # Licensed under the Universal Permissive License v 1.0 as shown at 4 | # https://oss.oracle.com/licenses/upl/ 5 | 6 | set -m -e -o pipefail 7 | 8 | conda activate oci-mlflow 9 | 10 | echo "========== MLflow server is launchng... ==========" 11 | 12 | mlflow server $@ 13 | 14 | echo "========== MLflow server is shutting down... ==========" 15 | 16 | exit $LastExitCode 17 | -------------------------------------------------------------------------------- /container-image/run/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import ads 8 | import logging 9 | import os 10 | import sys 11 | import subprocess 12 | import shlex 13 | 14 | logger = logging.getLogger(__name__) 15 | handler = logging.StreamHandler(sys.stdout) 16 | handler.setLevel(logging.INFO) 17 | logger.addHandler(handler) 18 | 19 | BACKEND_PROVIDER = os.environ.get("BACKEND_PROVIDER", "sqlite") 20 | MYSQL = "mysql" 21 | SQLITE = "sqlite" 22 | OBJECT_STORAGE = "object_storage" 23 | DEFAULT_DB_NAME = "mlflow" 24 | ARTIFACT_STORE_URI = "ARTIFACT_STORE_URI" 25 | EXTRA_MLFLOW_OPTIONS = "EXTRA_MLFLOW_OPTIONS" 26 | MLFLOW_LAUNCH_SCRIPT = "/etc/mlflow/launch_mlflow.sh" 27 | DB_SECRET_OCID = "DB_SECRET_OCID" 28 | PATCH_SCRIPT_PATH = "/etc/mlflow/patches" 29 | 30 | 31 | # Default authentication is resource_principal. To switch to other authentications forms such as `api_key` or `instance_principal` 32 | # set the environment variable - `OCIFS_IAM_TYPE` 33 | 34 | AUTH_TYPE = os.environ.get("OCIFS_IAM_TYPE", "resource_principal") 35 | 36 | 37 | class BackendStore: 38 | def uri(): 39 | pass 40 | 41 | 42 | class MySQLBackendStore(BackendStore): 43 | DEFAULT_PORT = "3306" 44 | 45 | def uri(): 46 | """ 47 | Fetch credentials from vault using secret ocid. This requires the credentials to be saved in vault using oracle-ads provided API. 48 | More information - https://accelerated-data-science.readthedocs.io/en/latest/user_guide/secrets/mysql.html 49 | 50 | If vault ocid is not supplied, retrieve db credentials from the environment variable. 51 | """ 52 | if os.environ.get(DB_SECRET_OCID): 53 | from ads.secrets.mysqldb import MySQLDBSecretKeeper 54 | 55 | secret_ocid = os.environ[DB_SECRET_OCID] 56 | logger.info( 57 | f"Found environment variable {DB_SECRET_OCID}. Retrieving secret using auth type: {AUTH_TYPE}" 58 | ) 59 | 60 | ads.set_auth(AUTH_TYPE) 61 | 62 | mysqldb_secret = MySQLDBSecretKeeper.load_secret(secret_ocid).to_dict() 63 | username = mysqldb_secret["user_name"] 64 | password = mysqldb_secret["password"] 65 | host = mysqldb_secret["host"] 66 | db_port = mysqldb_secret.get("port", MySQLBackendStore.DEFAULT_PORT) 67 | db_name = mysqldb_secret.get( 68 | "database" 69 | ) # if database was not saved in the secret, the value for 'database' will be None 70 | if db_name is None: 71 | db_name = DEFAULT_DB_NAME 72 | else: 73 | username = os.environ.get("DBUSERNAME") 74 | password = os.environ.get("DBPASSWORD") 75 | host = os.environ.get("DBHOST") 76 | db_name = os.environ.get("DBNAME", DEFAULT_DB_NAME) 77 | db_port = os.environ.get("DBPORT", MySQLBackendStore.DEFAULT_PORT) 78 | 79 | return ( 80 | f"mysql+mysqlconnector://{username}:{password}@{host}:{db_port}/{db_name}" 81 | ) 82 | 83 | 84 | class SQLiteBackendStore(BackendStore): 85 | def uri(): 86 | """ 87 | Reference: 88 | ---------- 89 | 90 | https://mlflow.org/docs/latest/tracking.html#scenario-3-mlflow-on-localhost-with-tracking-server 91 | 92 | """ 93 | return "sqlite:///mydb.sqlite" 94 | 95 | 96 | class BackendStoreFactory: 97 | providers = {MYSQL: MySQLBackendStore, SQLITE: SQLiteBackendStore} 98 | 99 | @classmethod 100 | def handler(cls, name): 101 | return cls.providers.get(name) 102 | 103 | 104 | def generate_backend_store_uri(provider): 105 | return BackendStoreFactory.handler(provider).uri() 106 | 107 | 108 | def configure_mlflow_environment(): 109 | mlflow_options = {} 110 | if not os.environ.get("MLFLOW_BACKEND_STORE_URI"): 111 | backend_store_uri = generate_backend_store_uri(BACKEND_PROVIDER) 112 | mlflow_options = {"backend-store-uri": backend_store_uri} 113 | 114 | mlflow_cmd_option = " ".join([f"--{k} {mlflow_options[k]}" for k in mlflow_options]) 115 | return mlflow_cmd_option 116 | 117 | 118 | def launch_mlflow(): 119 | try: 120 | mlflow_cmd_option = configure_mlflow_environment() 121 | except Exception as e: 122 | logger.error(e) 123 | raise Exception("Failed to create MLFlow configuration") 124 | 125 | # shlex.split can cause issues for "--gunicorn-opts". 126 | # It is better to pass extra args through environment variables. 127 | # More info - https://mlflow.org/docs/latest/cli.html#mlflow-server 128 | cmd_split = shlex.split(mlflow_cmd_option) 129 | subprocess.run( 130 | [MLFLOW_LAUNCH_SCRIPT] + cmd_split, # capture_output=True 131 | ) 132 | 133 | 134 | if __name__ == "__main__": 135 | launch_mlflow() 136 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/conda-runtime/README.md: -------------------------------------------------------------------------------- 1 | # Conda Environment based Deployment 2 | 3 | This example demonstrates how to use a conda pack based on the conda.yaml in the MLflow model to deploy a model. MLflow model consists of conda.yaml which captures the required dependencies for running the model. 4 | 5 | ## Create a model and register 6 | 7 | 1. Build Model 8 | 9 | Run the `sklearn_elasticnet_wine `__ in the project demos 10 | 11 | 2. Deploy Model 12 | 13 | There are two example specification in the folder - 14 | * ``elastic-net-deployment_build_conda.yaml``: This will be build conda environment and export it as conda pack, uploads to object storage and deploy 15 | * ``elastic-net-deployment_prebuilt_conda.yaml``: This will use the conda pack that is already saved in the object storage 16 | 17 | Update the yaml file to reflect correct values for - 18 | 19 | * logId 20 | * logGroupId 21 | * projectId 22 | * compartmentId 23 | * uri with the right bucket name and namespace 24 | 25 | 26 | 27 | ``` 28 | MLFLOW_TRACKING_URI= \ 29 | OCIFS_IAM_TYPE=api_key \ 30 | mlflow deployments \ 31 | create --name elasticnet_test_deploy -m models:/ElasticnetWineModel/1 \ 32 | -t oci-datascience \ 33 | --config deploy-config-file=elastic-net-deployment_build_conda.yaml 34 | 35 | ``` 36 | 37 | 1. Invoke Prediction Endpoint 38 | 39 | ``` 40 | import requests 41 | import oci 42 | from oci.signer import Signer 43 | 44 | body = { 45 | "columns": [ 46 | "fixed acidity", 47 | "volatile acidity", 48 | "citric acid", 49 | "residual sugar", 50 | "chlorides", 51 | "free sulfur dioxide", 52 | "total sulfur dioxide", 53 | "density", 54 | "pH", 55 | "sulphates", 56 | "alcohol", 57 | ], 58 | "data": [[7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8]], 59 | "index": [0], 60 | } 61 | 62 | 63 | 64 | config = oci.config.from_file() 65 | auth = Signer( 66 | tenancy=config['tenancy'], 67 | user=config['user'], 68 | fingerprint=config['fingerprint'], 69 | private_key_file_location=config['key_file'],) 70 | 71 | endpoint = 'https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad./predict' 72 | 73 | 74 | requests.post(endpoint, json=body, auth=auth, headers={}).json() 75 | ``` 76 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/conda-runtime/elastic-net-deployment_build_conda.yaml: -------------------------------------------------------------------------------- 1 | kind: deployment 2 | spec: 3 | infrastructure: 4 | kind: infrastructure 5 | type: modelDeployment 6 | spec: 7 | logGroupId: ocid1.loggroup.oc1.iad.. 8 | logId: ocid1.log.oc1.iad.. 9 | projectId: ocid1.datascienceproject.oc1.iad.. 10 | compartmentId: ocid1.compartment.oc1.. 11 | shapeName: VM.Standard.E3.Flex 12 | shapeConfigDetails: 13 | memoryInGBs: 32 14 | ocpus: 4 15 | blockStorageSize: 50 16 | replica: 1 17 | runtime: 18 | kind: runtime 19 | type: conda 20 | spec: 21 | uri: oci://bucketname@namespace/path/to/conda 22 | pythonVersion: 23 | 24 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/conda-runtime/elastic-net-deployment_prebuilt_conda.yaml: -------------------------------------------------------------------------------- 1 | kind: deployment 2 | spec: 3 | infrastructure: 4 | kind: infrastructure 5 | type: modelDeployment 6 | spec: 7 | logGroupId: ocid1.loggroup.oc1.iad.. 8 | logId: ocid1.log.oc1.iad.. 9 | projectId: ocid1.datascienceproject.oc1.iad.. 10 | compartmentId: ocid1.compartment.oc1.. 11 | shapeName: VM.Standard.E3.Flex 12 | shapeConfigDetails: 13 | memoryInGBs: 32 14 | ocpus: 4 15 | blockStorageSize: 50 16 | replica: 1 17 | runtime: 18 | kind: runtime 19 | type: conda 20 | spec: 21 | uri: 22 | name: elasticnet_v1 23 | destination: oci://bucket@namespace/mlflow-conda-envs/ 24 | gpu: false 25 | overwrite: false 26 | keepLocal: true 27 | localCondaDir: ./conda 28 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/container-runtime/README.md: -------------------------------------------------------------------------------- 1 | # Container based deployment 2 | 3 | ## Overview 4 | 5 | This demo shows how to use containers for deploying models stored in MLflow registry. 6 | 7 | 1. Build Model 8 | 9 | Run the `sklearn_elasticnet_wine `__ in the project demos 10 | 11 | 2. Build Container image 12 | 13 | To install conda dependencies on container image, copy over `conda.yaml` from the mlflow artifact and save it in the same folder as the `Dockefile.pyfunc`. The artifacts to build a container image is available in ``../container`` folder. 14 | 15 | ``` 16 | docker build -t {region}.ocir.io//mlflow-model-runtime/sklearn:v1 -f Dockerfile.pyfunc . 17 | ``` 18 | 19 | ### Push the container to OCIR 20 | 21 | ``` 22 | docker push {region}.ocir.io//mlflow-model-runtime/sklearn:v1 23 | ``` 24 | 25 | 26 | ### Create Endpoint 27 | 28 | Update ``elastic-net-deployment_container.yaml`` to reflect correct values for - 29 | 30 | * logId 31 | * logGroupId 32 | * logId 33 | * projectId 34 | * compartmentId 35 | * image 36 | 37 | 38 | ``` 39 | MLFLOW_TRACKING_URI= \ 40 | OCIFS_IAM_TYPE=api_key \ 41 | mlflow deployments \ 42 | create --name elasticnet_test_deploy_container -m models:/ElasticnetWineModel/1 \ 43 | -t oci-datascience \ 44 | --config deploy-config-file=elastic-net-deployment-container.yaml 45 | ``` 46 | 47 | 3. Invoke Prediction Endpoint 48 | 49 | 3.1 Using Python SDK 50 | 51 | ``` 52 | import requests 53 | import oci 54 | from oci.signer import Signer 55 | 56 | body = { 57 | "dataframe_split": { 58 | "columns": [ 59 | "fixed acidity", 60 | "volatile acidity", 61 | "citric acid", 62 | "residual sugar", 63 | "chlorides", 64 | "free sulfur dioxide", 65 | "total sulfur dioxide", 66 | "density", 67 | "pH", 68 | "sulphates", 69 | "alcohol", 70 | ], 71 | "data": [[7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8]], 72 | "index": [0], 73 | } 74 | } 75 | 76 | 77 | 78 | config = oci.config.from_file() 79 | auth = Signer( 80 | tenancy=config['tenancy'], 81 | user=config['user'], 82 | fingerprint=config['fingerprint'], 83 | private_key_file_location=config['key_file'],) 84 | 85 | endpoint = 'https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad../predict' 86 | 87 | 88 | requests.post(endpoint, json=body, auth=auth, headers={}).json() 89 | 90 | ``` 91 | 92 | 3.1 Using MLflow CLI 93 | 94 | ``` 95 | 96 | cat < input.json 97 | { 98 | "dataframe_split": { 99 | "columns": [ 100 | "fixed acidity", 101 | "volatile acidity", 102 | "citric acid", 103 | "residual sugar", 104 | "chlorides", 105 | "free sulfur dioxide", 106 | "total sulfur dioxide", 107 | "density", 108 | "pH", 109 | "sulphates", 110 | "alcohol" 111 | ], 112 | "data": [[7, 0.27, 0.36, 20.7, 0.045, 45, 170, 1.001, 3, 0.45, 8.8]], 113 | "index": [0] 114 | } 115 | } 116 | EOF 117 | 118 | mlflow deployments predict --name ocid1.datasciencemodeldeployment.oc1.iad.. -t oci-datascience -I ./input.json 119 | ``` 120 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/container-runtime/container/Dockerfile.pyfunc: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Oracle and/or its affiliates. 2 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 3 | 4 | FROM iad.ocir.io/namespace/image:tag 5 | 6 | RUN yum install -y --setopt=skip_missing_names_on_install=False maven java-11-openjdk wget curl nginx sudo 7 | 8 | # Data Science service extracts the model to /opt/ds/model/deployed_model 9 | RUN mkdir -p /opt/ds/model/deployed_model && \ 10 | mkdir -p /opt/ml && \ 11 | ln -s /opt/ml/model /opt/ds/model/deployed_model 12 | 13 | RUN export JAVA_HOME=/usr/lib/jvm/$(ls /usr/lib/jvm/| grep java-11-openjdk*) 14 | ENV GUNICORN_CMD_ARGS="--timeout 60 -k gevent" 15 | # Set up the program in the image 16 | WORKDIR /opt/mlflow 17 | 18 | RUN mvn --batch-mode dependency:copy -Dartifact=org.mlflow:mlflow-scoring:2.1.1:pom -DoutputDirectory=/opt/java 19 | RUN mvn --batch-mode dependency:copy -Dartifact=org.mlflow:mlflow-scoring:2.1.1:jar -DoutputDirectory=/opt/java/jars 20 | RUN cp /opt/java/mlflow-scoring-2.1.1.pom /opt/java/pom.xml 21 | RUN cd /opt/java && mvn --batch-mode dependency:copy-dependencies -DoutputDirectory=/opt/java/jars 22 | 23 | ENV MLFLOW_DISABLE_ENV_CREATION="true" 24 | ENV DISABLE_NGINX=true 25 | 26 | COPY conda.yaml /opt/conda.yaml 27 | RUN mamba env update --name oci-mlflow -f /opt/conda.yaml && pip install gevent 28 | 29 | ENV NGINX_ROOT=/etc/nginx 30 | ENV NGINX_PID=/var/run/nginx.pid 31 | ENV NGINX_BIN=/usr/sbin/nginx 32 | ENV NGINX_USER=root 33 | 34 | 35 | EXPOSE 5001 36 | 37 | COPY nginx.conf /etc/nginx/nginx.conf 38 | ENTRYPOINT [ "/bin/bash", "--login", "-c" ] 39 | CMD ["nginx -p $PWD && mlflow models serve -p 8080 -h 0.0.0.0 -m /opt/ds/model/deployed_model --env-manager local"] 40 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/container-runtime/container/nginx.conf: -------------------------------------------------------------------------------- 1 | user root; 2 | worker_processes auto; 3 | error_log /dev/stdout info; 4 | pid /var/run/nginx.pid; 5 | 6 | 7 | events { 8 | } 9 | 10 | http { 11 | log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 12 | '$status $body_bytes_sent "$http_referer" ' 13 | '"$http_user_agent" "$http_x_forwarded_for"'; 14 | 15 | access_log /dev/stdout main; 16 | 17 | tcp_nopush on; 18 | tcp_nodelay on; 19 | keepalive_timeout 65; 20 | types_hash_max_size 2048; 21 | 22 | include /etc/nginx/mime.types; 23 | default_type application/octet-stream; 24 | 25 | 26 | server { 27 | listen 5001; 28 | client_body_temp_path /tmp/client_body_temp; 29 | proxy_temp_path /tmp/proxy_temp; 30 | 31 | 32 | location /predict { 33 | proxy_pass http://127.0.0.1:8080/invocations; 34 | } 35 | location /health { 36 | proxy_pass http://127.0.0.1:8080/health; 37 | } 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/container-runtime/elastic-net-deployment-container.yaml: -------------------------------------------------------------------------------- 1 | kind: deployment 2 | spec: 3 | infrastructure: 4 | kind: infrastructure 5 | type: modelDeployment 6 | spec: 7 | logGroupId: ocid1.loggroup.oc1.iad.. 8 | logId: ocid1.log.oc1.iad.. 9 | projectId: ocid1.datascienceproject.oc1.iad.. 10 | compartmentId: ocid1.compartment.oc1.. 11 | shapeName: VM.Standard.E3.Flex 12 | shapeConfigDetails: 13 | memoryInGBs: 32 14 | ocpus: 4 15 | blockStorageSize: 50 16 | replica: 1 17 | runtime: 18 | kind: runtime 19 | type: container 20 | spec: 21 | image: iad.ocir.io//mlflow-model-runtime/sklearn:v1 22 | serverPort: 5001 23 | healthCheckPort: 5001 24 | -------------------------------------------------------------------------------- /demos/deploy_mlflow_model/container-runtime/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataframe_split": { 3 | "columns": [ 4 | "mean radius", 5 | "mean texture", 6 | "mean perimeter", 7 | "mean area", 8 | "mean smoothness", 9 | "mean compactness", 10 | "mean concavity", 11 | "mean concave points" 12 | ], 13 | "data": [ 14 | [ 15 | 17.99, 16 | 10.38, 17 | 122.8, 18 | 1001.0, 19 | 0.1184, 20 | 0.2776, 21 | 0.3001, 22 | 0.1471 23 | ] 24 | ], 25 | "index": [ 26 | 0 27 | ] 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/MLproject: -------------------------------------------------------------------------------- 1 | name: pyspark_logistic_regression_dataflow_job 2 | 3 | entry_points: 4 | main: 5 | parameters: 6 | seed: { type: float, default: 24 } 7 | command: "logistic_regression.py --seed {seed}" 8 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/README.md: -------------------------------------------------------------------------------- 1 | ## Run MLflow project on the Data Flow cluster 2 | --- 3 | 4 | This demo shows how to run an MLflow project on the Data Flow cluster. This directory contains an MLflow project file that trains a logistic regression model on the Iris dataset. 5 | 6 | ## Prerequisites 7 | - First, install MLflow library 8 | ``` 9 | pip install mlflow 10 | ``` 11 | - Set the tracking server endpoint 12 | ``` 13 | export MLFLOW_TRACKING_URI= 14 | ``` 15 | - Install the oci-mlflow package 16 | ``` 17 | pip install oci-mlflow 18 | ``` 19 | 20 | ## Running this example on the Data Flow cluster 21 | - Prepare the OCI config, which is a JSON file containing the authentication information and also path to the job configuration YAML file 22 | ``` 23 | { 24 | "oci_auth": "api_key", 25 | "oci_job_template_path": "./oci-datascience-template.yaml" 26 | } 27 | ``` 28 | - Prepare the job configuration file 29 | ``` 30 | kind: job 31 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}" 32 | spec: 33 | infrastructure: 34 | kind: infrastructure 35 | spec: 36 | compartmentId: ocid1.compartment.oc1.. 37 | driverShape: VM.Standard.E4.Flex 38 | driverShapeConfig: 39 | memory_in_gbs: 32 40 | ocpus: 2 41 | executorShape: VM.Standard.E4.Flex 42 | executorShapeConfig: 43 | memory_in_gbs: 32 44 | ocpus: 2 45 | language: PYTHON 46 | logsBucketUri: 47 | numExecutors: 1 48 | sparkVersion: 3.2.1 49 | privateEndpointId: ocid1.dataflowprivateendpoint.oc1.. 50 | type: dataFlow 51 | runtime: 52 | kind: runtime 53 | spec: 54 | configuration: 55 | spark.driverEnv.MLFLOW_TRACKING_URI: 56 | conda: 57 | type: published 58 | uri: oci://bucket@namespace/prefix 59 | condaAuthType: resource_principal 60 | scriptBucket: 61 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}" 62 | overwrite: True 63 | type: dataFlow 64 | ``` 65 | In the config file we also specify a Private Endpoint, which allows the cluster to reach out the tracking server URI, in case if tracking server deployed in the private network. However the private endpoint is not necessary for the case when the tracking server has public Ip address. More details about the Private Endpoint can be found in the official [documentation](https://docs.oracle.com/en-us/iaas/data-flow/using/private-network.htm). 66 | 67 | - Run the example project using CLI 68 | ``` 69 | mlflow run . --param-list seed=24 --experiment-name My_Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json 70 | ``` 71 | - Run the example project using SDK 72 | ``` 73 | import mlflow 74 | 75 | mlflow.set_tracking_uri("") 76 | 77 | mlflow.run(".", 78 | experiment_name="My_Experiment", 79 | backend="oci-datascience", 80 | backend_config="oci-datascience-config.json" 81 | ) 82 | ``` 83 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/logistic_regression.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Oracle and/or its affiliates. 2 | # Licensed under the Universal Permissive License v 1.0 as shown at 3 | # https://oss.oracle.com/licenses/upl/ 4 | 5 | import click 6 | import mlflow 7 | from pyspark.ml.classification import LogisticRegression 8 | from pyspark.ml.feature import VectorAssembler 9 | from pyspark.sql import SparkSession 10 | from sklearn.datasets import load_iris 11 | 12 | 13 | @click.command() 14 | @click.option("--seed", "-s", help="The seed for sampling.", default=20, required=False) 15 | def main(seed): 16 | spark = SparkSession.builder.getOrCreate() 17 | 18 | df = load_iris(as_frame=True).frame.rename(columns={"target": "label"}) 19 | df = spark.createDataFrame(df) 20 | df = VectorAssembler(inputCols=df.columns[:-1], outputCol="features").transform(df) 21 | train, test = df.randomSplit([0.8, 0.2], seed) 22 | 23 | mlflow.pyspark.ml.autolog() 24 | 25 | with mlflow.start_run(): 26 | lor = LogisticRegression(maxIter=5) 27 | lorModel = lor.fit(train) 28 | mlflow.log_param("randomSplit", [0.8, 0.2]) 29 | mlflow.log_param("Seed", seed) 30 | 31 | pred = lorModel.transform(test) 32 | pred.select(lorModel.getPredictionCol()).show(10) 33 | spark.stop() 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/oci-datascience-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "oci_auth": "api_key", 3 | "oci_job_template_path": "{work_dir}/oci-datascience-template.yaml" 4 | } 5 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/oci-datascience-template.yaml: -------------------------------------------------------------------------------- 1 | kind: job 2 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}" 3 | spec: 4 | infrastructure: 5 | kind: infrastructure 6 | spec: 7 | compartmentId: ocid1.compartment.oc1.. 8 | driverShape: VM.Standard.E4.Flex 9 | driverShapeConfig: 10 | memory_in_gbs: 32 11 | ocpus: 2 12 | executorShape: VM.Standard.E4.Flex 13 | executorShapeConfig: 14 | memory_in_gbs: 32 15 | ocpus: 2 16 | language: PYTHON 17 | logsBucketUri: oci://bucket@namespace/ 18 | numExecutors: 1 19 | sparkVersion: 3.2.1 20 | privateEndpointId: ocid1.dataflowprivateendpoint.oc1.. 21 | type: dataFlow 22 | runtime: 23 | kind: runtime 24 | spec: 25 | configuration: 26 | spark.driverEnv.MLFLOW_TRACKING_URI: 27 | conda: 28 | type: published 29 | uri: oci://bucket@namespace/prefix 30 | condaAuthType: resource_principal 31 | scriptBucket: oci://bucket@namespace/dataflow/script 32 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}" 33 | overwrite: True 34 | type: dataFlow 35 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/run.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n", 10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n", 11 | "# https://oss.oracle.com/licenses/upl/\n", 12 | "\n", 13 | "import mlflow\n", 14 | "\n", 15 | "mlflow.set_tracking_uri(\"/\")\n", 16 | "\n", 17 | "mlflow.run(\".\",\n", 18 | " experiment_name=\"spark/logistic_regression\",\n", 19 | " backend=\"oci-datascience\",\n", 20 | " backend_config=\"oci-datascience-config.json\"\n", 21 | ")" 22 | ] 23 | } 24 | ], 25 | "metadata": { 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "language": "python", 29 | "name": "python3" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 3 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython3", 41 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]" 42 | }, 43 | "vscode": { 44 | "interpreter": { 45 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd" 46 | } 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 4 51 | } 52 | -------------------------------------------------------------------------------- /demos/pyspark_logistic_regression_dataflow_job/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2023 Oracle and/or its affiliates. 3 | # Licensed under the Universal Permissive License v 1.0 as shown at 4 | # https://oss.oracle.com/licenses/upl/ 5 | 6 | export MLFLOW_TRACKING_URI= 7 | mlflow run . --param-list seed=24 --experiment-name spark/logistic_regression --backend oci-datascience --backend-config ./oci-datascience-config.json 8 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/MLproject: -------------------------------------------------------------------------------- 1 | # The source of this workload is a Jupyter Notebook which will be run on the DataScience job using MLflow CLI/SDK. 2 | 3 | name: sklearn_elasticnet_wine_notebook_job 4 | 5 | entry_points: 6 | main: 7 | command: "train.ipynb" 8 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/README.md: -------------------------------------------------------------------------------- 1 | ## Run MLflow project on the Data Science job 2 | --- 3 | 4 | This demo shows how to run an MLflow project on the Data Science job within a Notebook runtime. This directory contains an MLflow project that trains a linear regression model on the UC Irvine Wine Quality Dataset. 5 | 6 | ## Prerequisites 7 | - First, install MLflow library 8 | ``` 9 | pip install mlflow 10 | ``` 11 | - Set the tracking server endpoint. 12 | ``` 13 | export MLFLOW_TRACKING_URI= 14 | ``` 15 | - Install the oci-mlflow package 16 | ``` 17 | pip install oci-mlflow 18 | ``` 19 | 20 | ## Running this example on the Data Science job 21 | To run this example on the Data Science job, the custom conda environment was prepared and published to the Object Storage bucket. The `generalml_p38_cpu_v1` service conda environment was used as a base environment for the custom one. 22 | 23 | - Install the OCI MLflow package 24 | ``` 25 | pip install oci-mlflow 26 | ``` 27 | - Prepare the OCI config, which is a JSON file containing the authentication information and also path to the job configuration YAML file 28 | ``` 29 | { 30 | "oci_auth": "api_key", 31 | "oci_job_template_path": "./oci-datascience-template.yaml" 32 | } 33 | ``` 34 | - Prepare the job configuration file 35 | ``` 36 | kind: job 37 | name: "{Job name. For MLflow, it will be replaced with the Project name}" 38 | spec: 39 | infrastructure: 40 | kind: infrastructure 41 | spec: 42 | blockStorageSize: 50 43 | subnetId: ocid1.subnet.oc1.iad.. 44 | compartmentId: ocid1.compartment.oc1.. 45 | projectId: ocid1.datascienceproject.oc1.iad.. 46 | logGroupId: ocid1.loggroup.oc1.iad.. 47 | logId: ocid1.log.oc1.iad.. 48 | shapeConfigDetails: 49 | memoryInGBs: 20 50 | ocpus: 2 51 | shapeName: VM.Standard.E3.Flex 52 | type: dataScienceJob 53 | runtime: 54 | kind: runtime 55 | spec: 56 | args: [] 57 | conda: 58 | type: published 59 | uri: 60 | env: 61 | - name: TEST 62 | value: TEST_VALUE 63 | entrypoint: "{Entry point notebook. For MLflow, it will be replaced with the CMD}" 64 | source: "{Path to the source code directory. For MLflow, it will be replaced with path to the project}" 65 | notebookEncoding: utf-8 66 | type: notebook 67 | ``` 68 | 69 | - Run the example project using CLI 70 | 71 | ``` 72 | mlflow run . --experiment-name My_Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json 73 | ``` 74 | 75 | - Run the example project using SDK 76 | ``` 77 | import mlflow 78 | 79 | mlflow.set_tracking_uri("") 80 | 81 | mlflow.run(".", 82 | experiment_name="MyExperiment", 83 | backend="oci-datascience", 84 | backend_config="oci-datascience-config.json" 85 | ) 86 | ``` 87 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/oci-datascience-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "oci_auth": "api_key", 3 | "oci_job_template_path": "{work_dir}/oci-datascience-template.yaml" 4 | } 5 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/oci-datascience-template.yaml: -------------------------------------------------------------------------------- 1 | kind: job 2 | name: "{Job name. For MLflow, it will be replaced with the Project name}" 3 | spec: 4 | infrastructure: 5 | kind: infrastructure 6 | spec: 7 | blockStorageSize: 50 8 | compartmentId: ocid1.compartment.oc1.. 9 | jobInfrastructureType: ME_STANDALONE 10 | jobType: DEFAULT 11 | logGroupId: ocid1.loggroup.oc1.iad.. 12 | logId: ocid1.log.oc1.iad.. 13 | projectId: ocid1.datascienceproject.oc1.iad.. 14 | shapeConfigDetails: 15 | memoryInGBs: 20 16 | ocpus: 2 17 | shapeName: VM.Standard.E3.Flex 18 | subnetId: ocid1.subnet.oc1.iad.. 19 | type: dataScienceJob 20 | runtime: 21 | kind: runtime 22 | spec: 23 | args: [] 24 | conda: 25 | type: published 26 | uri: oci://bucket@namespace/prefix 27 | env: 28 | - name: TEST 29 | value: TEST_VALUE 30 | entrypoint: "{Entry point notebook. For MLflow, it will be replaced with the CMD}" 31 | source: "{Path to the source code directory. For MLflow, it will be replaced with path to the project}" 32 | notebookEncoding: utf-8 33 | type: notebook 34 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/run.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n", 10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n", 11 | "# https://oss.oracle.com/licenses/upl/\n", 12 | "\n", 13 | "import mlflow\n", 14 | "\n", 15 | "mlflow.set_tracking_uri(\"/\")\n", 16 | "\n", 17 | "mlflow.run(\".\",\n", 18 | " experiment_name=\"sklearn/elastic_net\",\n", 19 | " backend=\"oci-datascience\",\n", 20 | " backend_config=\"oci-datascience-config.json\"\n", 21 | ")" 22 | ] 23 | } 24 | ], 25 | "metadata": { 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "language": "python", 29 | "name": "python3" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 3 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython3", 41 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]" 42 | }, 43 | "vscode": { 44 | "interpreter": { 45 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd" 46 | } 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 4 51 | } 52 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2023 Oracle and/or its affiliates. 3 | # Licensed under the Universal Permissive License v 1.0 as shown at 4 | # https://oss.oracle.com/licenses/upl/ 5 | 6 | export MLFLOW_TRACKING_URI= 7 | mlflow run . --experiment-name sklearn/elastic_net --backend oci-datascience --backend-config ./oci-datascience-config.json 8 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_notebook_job/train.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n", 10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n", 11 | "# https://oss.oracle.com/licenses/upl/\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "import numpy as np\n", 15 | "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n", 16 | "from sklearn.model_selection import train_test_split\n", 17 | "from sklearn.linear_model import ElasticNet\n", 18 | "\n", 19 | "import mlflow\n", 20 | "import mlflow.sklearn\n", 21 | "\n", 22 | "np.random.seed(40)\n", 23 | "\n", 24 | "alpha = 0.7\n", 25 | "l1_ratio = 0.06\n", 26 | "\n", 27 | "print(\"#\" * 20)\n", 28 | "print(\"ARGS:\")\n", 29 | "print(f\"args.alpha: {alpha}\")\n", 30 | "print(f\"args.l1_ratio: {l1_ratio}\")\n", 31 | "print(\"#\" * 20)\n", 32 | "\n", 33 | "data = pd.read_csv(\n", 34 | " \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\",\n", 35 | " delimiter=\";\",\n", 36 | ")\n", 37 | "\n", 38 | "\n", 39 | "# Split the data into training and test sets. (0.75, 0.25) split.\n", 40 | "train, test = train_test_split(data)\n", 41 | "\n", 42 | "# The predicted column is \"quality\" which is a scalar from [3, 9]\n", 43 | "train_x = train.drop([\"quality\"], axis=1)\n", 44 | "test_x = test.drop([\"quality\"], axis=1)\n", 45 | "train_y = train[[\"quality\"]]\n", 46 | "test_y = test[[\"quality\"]]\n", 47 | "\n", 48 | "\n", 49 | "with mlflow.start_run():\n", 50 | " lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n", 51 | " lr.fit(train_x, train_y)\n", 52 | "\n", 53 | " predicted_qualities = lr.predict(test_x)\n", 54 | " \n", 55 | " rmse = np.sqrt(mean_squared_error(test_y, predicted_qualities))\n", 56 | " mae = mean_absolute_error(test_y, predicted_qualities)\n", 57 | " r2 = r2_score(test_y, predicted_qualities)\n", 58 | "\n", 59 | " print(\"Elasticnet model (alpha={:f}, l1_ratio={:f}):\".format(alpha, l1_ratio))\n", 60 | " \n", 61 | " print(\"#\" * 50)\n", 62 | " print(f\"RMSE: {rmse}\")\n", 63 | " print(f\"MAE: {mae}\")\n", 64 | " print(f\"R2: {r2}\")\n", 65 | " print(\"#\" * 50)\n", 66 | "\n", 67 | " mlflow.log_param(\"alpha\", alpha)\n", 68 | " mlflow.log_param(\"l1_ratio\", l1_ratio)\n", 69 | " mlflow.log_metric(\"rmse\", rmse)\n", 70 | " mlflow.log_metric(\"r2\", r2)\n", 71 | " mlflow.log_metric(\"mae\", mae)\n", 72 | "\n", 73 | " mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\")" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3.9.15 ('mlflow-oci')", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]" 94 | }, 95 | "orig_nbformat": 4, 96 | "vscode": { 97 | "interpreter": { 98 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd" 99 | } 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_script_job/README.md: -------------------------------------------------------------------------------- 1 | ## Run MLflow project on the Data Science job 2 | --- 3 | 4 | This demo shows how to run an MLflow project locally as well as on the Data Science job within a Python runtime. This directory contains only the configuration files which are necessary to run the project on the Data Science job. The project by itself will be downloaded form the [GIT](https://github.com/mlflow/mlflow-example) repository. The project trains a linear regression model on the UC Irvine Wine Quality Dataset. 5 | 6 | ## Prerequisites 7 | - First, install MLflow library 8 | ``` 9 | pip install mlflow 10 | ``` 11 | - Set the tracking server endpoint. 12 | ``` 13 | export MLFLOW_TRACKING_URI= 14 | ``` 15 | - Install the oci-mlflow package 16 | ``` 17 | pip install oci-mlflow 18 | ``` 19 | 20 | ## Running this example locally 21 | The project will be executed on the local instance and the result will be added to the tracking URI specified above. 22 | - Run the example project using CLI 23 | 24 | ``` 25 | mlflow run https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine --experiment-name My_Experiment 26 | ``` 27 | - Run the example project using SDK 28 | ``` 29 | import mlflow 30 | 31 | mlflow.set_tracking_uri("") 32 | 33 | mlflow.run("https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine", 34 | experiment_name="My_Experiment", 35 | ) 36 | ``` 37 | 38 | ## Running this example on the Data Science job 39 | To run this example on the Data Science job, the custom conda environment was prepared and published to the Object Storage bucket. The custom conda environment contains all the required packages provided in the [conda.yaml](https://github.com/mlflow/mlflow-example/blob/master/conda.yaml) as well as the `oci-mlflow` library. The `generalml_p38_cpu_v1` service conda environment was used as a base environment for the custom one. 40 | - Install the OCI MLflow package 41 | ``` 42 | pip install oci-mlflow 43 | ``` 44 | - Prepare and publish a custom conda environment which should contain `mlflow`, `oci-mlfow` and all libraries from the [conda.yaml](https://github.com/mlflow/mlflow-example/blob/master/conda.yaml) 45 | 46 | - Prepare the OCI config, which is a JSON file containing the authentication information and also path to the job configuration YAML file. Note, that the project folder already contains the all necessary files to run this example. 47 | ``` 48 | { 49 | "oci_auth": "api_key", 50 | "oci_job_template_path": "./oci-datascience-template.yaml" 51 | } 52 | ``` 53 | - Prepare the job configuration file. 54 | ``` 55 | kind: job 56 | name: "{Job name. For MLflow, it will be replaced with the Project name}" 57 | spec: 58 | infrastructure: 59 | kind: infrastructure 60 | spec: 61 | blockStorageSize: 50 62 | subnetId: ocid1.subnet.oc1.iad.. 63 | compartmentId: ocid1.compartment.oc1.. 64 | projectId: ocid1.datascienceproject.oc1.iad.. 65 | logGroupId: ocid1.loggroup.oc1.iad.. 66 | logId: ocid1.log.oc1.iad.. 67 | shapeName: VM.Standard.E3.Flex 68 | shapeConfigDetails: 69 | memoryInGBs: 20 70 | ocpus: 2 71 | type: dataScienceJob 72 | runtime: 73 | kind: runtime 74 | spec: 75 | args: [] 76 | conda: 77 | type: published 78 | uri: 79 | env: 80 | - name: TEST 81 | value: TEST_VALUE 82 | entrypoint: "{Entry point script. For the MLFlow will be replaced with the CMD}" 83 | scriptPathURI: "{Path to the script. For the MLFlow will be replaced with path to the project}" 84 | type: python 85 | 86 | ``` 87 | - Run the example project using CLI 88 | 89 | ``` 90 | mlflow run https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine --experiment-name My_Experiment --backend oci-datascience --backend-config ./oci-datascience-config.json 91 | ``` 92 | - Run the example project using SDK 93 | ``` 94 | import mlflow 95 | 96 | mlflow.set_tracking_uri("/") 97 | 98 | mlflow.run("https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine", 99 | experiment_name="My_Experiment", 100 | backend="oci-datascience", 101 | backend_config="oci-datascience-config.json" 102 | ) 103 | ``` 104 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_script_job/oci-datascience-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "oci_auth": "api_key", 3 | "oci_job_template_path": "./oci-datascience-template.yaml" 4 | } 5 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_script_job/oci-datascience-template.yaml: -------------------------------------------------------------------------------- 1 | kind: job 2 | name: "{Job name. For MLflow, it will be replaced with the Project name}" 3 | spec: 4 | infrastructure: 5 | kind: infrastructure 6 | spec: 7 | blockStorageSize: 50 8 | subnetId: ocid1.subnet.oc1.iad.. 9 | compartmentId: ocid1.compartment.oc1.. 10 | projectId: ocid1.datascienceproject.oc1.iad.. 11 | logGroupId: ocid1.loggroup.oc1.iad.. 12 | logId: ocid1.log.oc1.iad.. 13 | shapeName: VM.Standard.E3.Flex 14 | shapeConfigDetails: 15 | memoryInGBs: 20 16 | ocpus: 2 17 | jobInfrastructureType: ME_STANDALONE 18 | jobType: DEFAULT 19 | type: dataScienceJob 20 | runtime: 21 | kind: runtime 22 | spec: 23 | args: [] 24 | conda: 25 | type: published 26 | uri: oci://bucket@namespace/prefix 27 | env: 28 | - name: TEST 29 | value: TEST_VALUE 30 | entrypoint: "{Entry point script. For the MLFlow will be replaced with the CMD}" 31 | scriptPathURI: "{Path to the script. For the MLFlow will be replaced with path to the project}" 32 | type: python 33 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_script_job/run.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (c) 2023 Oracle and/or its affiliates.\n", 10 | "# Licensed under the Universal Permissive License v 1.0 as shown at\n", 11 | "# https://oss.oracle.com/licenses/upl/\n", 12 | "\n", 13 | "import mlflow\n", 14 | "\n", 15 | "mlflow.set_tracking_uri(\"/\")\n", 16 | "\n", 17 | "mlflow.run(\"https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine\",\n", 18 | " experiment_name=\"/sklearn/elastic_net\",\n", 19 | " backend=\"oci-datascience\",\n", 20 | " backend_config=\"oci-datascience-config.json\"\n", 21 | ")" 22 | ] 23 | } 24 | ], 25 | "metadata": { 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "language": "python", 29 | "name": "python3" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 3 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython3", 41 | "version": "3.9.15 (main, Nov 24 2022, 08:29:02) \n[Clang 14.0.6 ]" 42 | }, 43 | "vscode": { 44 | "interpreter": { 45 | "hash": "befd0945c435790bd7f22b73b168cdc559c23014061f63180a80847874ce09cd" 46 | } 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 4 51 | } 52 | -------------------------------------------------------------------------------- /demos/sklearn_elasticnet_wine_script_job/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2023 Oracle and/or its affiliates. 3 | # Licensed under the Universal Permissive License v 1.0 as shown at 4 | # https://oss.oracle.com/licenses/upl/ 5 | 6 | export MLFLOW_TRACKING_URI= 7 | mlflow run https://github.com/mlflow/mlflow#examples/sklearn_elasticnet_wine --experiment-name /sklearn/elastic_net --backend oci-datascience --backend-config ./oci-datascience-config.json 8 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | -r test-requirements.txt 2 | -r docs/requirements.txt 3 | build 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | livehtml: 23 | sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | 25 | clean: 26 | rm -rf $(BUILDDIR)/* 27 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | autodoc 2 | furo 3 | nbsphinx 4 | oci-mlflow 5 | sphinx 6 | sphinx_copybutton 7 | sphinx_code_tabs 8 | sphinx-autobuild 9 | sphinx-autorun 10 | sphinx-design 11 | -------------------------------------------------------------------------------- /docs/source/_static/logo-dark-mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/docs/source/_static/logo-dark-mode.png -------------------------------------------------------------------------------- /docs/source/_static/logo-light-mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/docs/source/_static/logo-light-mode.png -------------------------------------------------------------------------------- /docs/source/concepts.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Concepts 3 | ======== 4 | 5 | MLflow is a framework that enables engineering teams to easily move workflows from R&D to staging to 6 | production, overcoming one of the common data science problems of model reproducibility and productionalization. 7 | 8 | For a detailed view of the key concepts of MLflow please refer to their 9 | documentation: `https://mlflow.org/docs/latest/concepts.html `_ 10 | 11 | **Benefits Of Using MLflow** 12 | 13 | 14 | - Open Source tool for MLops, removing vendor lock-in, works from laptop to cloud all with the same CLI/SDK 15 | - Supports many Tools and Frameworks, for example Spark, Keras, Pytorch, Tensorflow, XGBoost, etc. When using one of these 16 | frameworks, you can use MLflow to track your experiments, store your models, and deploy them to a variety of platforms where much of this happens 17 | automatically for you. Using `mlflow..autolog `_ 18 | the framework will automatically log parameters & metrics. 19 | - Highly Customizable, thanks to Conda and Containers models and training workloads are extremely flexible. 20 | - It is ideal for data science projects, because the workflow enabled by MLflow scales from a data scientist tinkering 21 | on the weekend with some new ideas, to running a reproducible training experiment on large scale data in the cloud. 22 | - Focuses on the entire Machine learning lifecycle, by providing tools for data preparation, model training, 23 | model evaluation, model serving, and model deployment MLflow is a complete solution for the entire ML lifecycle, 24 | working together with Oracle OCI Data Science to scale and deploy highly available models in the cloud. 25 | - Custom Visualization, the MLflow interface allows you to create custom visualizations for an experiment to compare 26 | different runs and models. 27 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, 2023 Oracle and/or its affiliates. 2 | # Licensed under the Universal Permissive License v 1.0 as shown at 3 | # https://oss.oracle.com/licenses/upl/ 4 | 5 | # -- Path setup -------------------------------------------------------------- 6 | 7 | import datetime 8 | import os 9 | import sys 10 | 11 | autoclass_content = "both" 12 | 13 | sys.path.insert(0, os.path.abspath("../../")) 14 | 15 | import oci_mlflow 16 | 17 | version = oci_mlflow.__version__ 18 | release = version 19 | 20 | 21 | # -- Project information ----------------------------------------------------- 22 | 23 | project = "OCI MLflow" 24 | copyright = ( 25 | f"2022, {datetime.datetime.now().year} Oracle and/or its affiliates. " 26 | f"Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/" 27 | ) 28 | author = "Oracle Data Science" 29 | 30 | # -- General configuration --------------------------------------------------- 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | "sphinx.ext.napoleon", 37 | "sphinx.ext.autodoc", 38 | "sphinx.ext.doctest", 39 | "sphinx.ext.ifconfig", 40 | "sphinx.ext.todo", 41 | "sphinx.ext.extlinks", 42 | "sphinx.ext.intersphinx", 43 | "nbsphinx", 44 | "sphinx_code_tabs", 45 | "sphinx_copybutton", 46 | "sphinx.ext.duration", 47 | "sphinx.ext.autosummary", 48 | "sphinx.ext.intersphinx", 49 | "sphinx.ext.viewcode", 50 | "sphinx_autorun", 51 | ] 52 | 53 | intersphinx_mapping = { 54 | "python": ("https://docs.python.org/3/", None), 55 | "sphinx": ("https://www.sphinx-doc.org/en/master/", None), 56 | } 57 | intersphinx_disabled_domains = ["std"] 58 | 59 | 60 | # Add any paths that contain templates here, relative to this directory. 61 | templates_path = ["_templates"] 62 | 63 | # Get version 64 | import oci_mlflow 65 | 66 | version = oci_mlflow.__version__ 67 | release = version 68 | 69 | # Unless we want to expose real buckets and namespaces 70 | nbsphinx_allow_errors = True 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | # This pattern also affects html_static_path and html_extra_path. 75 | exclude_patterns = ["build", "**.ipynb_checkpoints", "Thumbs.db", ".DS_Store"] 76 | 77 | 78 | # -- Options for HTML output ------------------------------------------------- 79 | 80 | # The theme to use for HTML and HTML Help pages. See the documentation for 81 | # a list of builtin themes. 82 | # 83 | html_theme = "furo" 84 | language = "en" 85 | 86 | # Disable the generation of the various indexes 87 | html_use_modindex = False 88 | html_use_index = False 89 | 90 | html_theme_options = { 91 | "light_logo": "logo-light-mode.png", 92 | "dark_logo": "logo-dark-mode.png", 93 | } 94 | 95 | # Add any paths that contain custom static files (such as style sheets) here, 96 | # relative to this directory. They are copied after the builtin static files, 97 | # so a file named "default.css" will overwrite the builtin "default.css". 98 | html_static_path = ["_static"] 99 | -------------------------------------------------------------------------------- /docs/source/demos_examples.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Demos & Examples 3 | ================ 4 | 5 | Please note that the demo videos shared below may not represent Oracle's official views. They have been posted by individual users on their personal channels. 6 | 7 | .. admonition:: Examples 8 | :class: note 9 | 10 | .. list-table:: 11 | :widths: 50 50 12 | :header-rows: 1 13 | 14 | * - Demo 15 | - Description 16 | 17 | * - `Run MLflow project on the local environment `__ 18 | - | 1. `Installing `__ `GML for CPUs` conda environment 19 | | 2. Introduction to the `MLflow projects `__ 20 | | 3. Running `sklearn_elasticnet_wine `__ example 21 | | 4. Checking the experiment's result 22 | 23 | * - `Run MLflow project on the OCI DSC Jobs `__ 24 | - | 1. `OCI Data Science Jobs `__ 25 | | 2. `Publishing `__ `GML for CPUs` conda environment 26 | | 3. Preparing :ref:`configuration files ` 27 | | 4. Running `sklearn_elasticnet_wine `__ example 28 | | 5. Checking the experiment's result 29 | 30 | * - `Run MLflow project on the DSC Jobs with BYOC `__ 31 | - | 1. `OCI Data Science Jobs `__ 32 | | 2. `Bring Your Own Container `__ 33 | | 3. `Publishing a container image `__ to the OCR 34 | | 4. Preparing :ref:`configuration files ` 35 | | 5. Running `docker `__ project example 36 | | 6. Checking the experiment's result 37 | 38 | * - `Run MLflow project on the Data Flow service `__ 39 | - | 1. `Running Spark Application on OCI Data Flow `__ 40 | | 2. `Publishing `__ `PySpark 3.2 and Data Flow` conda environment 41 | | 3. Preparing :ref:`configuration files ` 42 | | 4. `Allowing Data Flow to Access a Private Network `__ 43 | | 5. Running `pyspark_ml_autologging `__ project example 44 | | 6. Checking the experiment's result 45 | 46 | * - `Deploy MLflow model on OCI Data Science `__ 47 | - | 1. `Deploy Model on OCI Data Science `__ 48 | | 2. `Publishing `__ `GML for CPUs` conda environment 49 | | 3. Preparing :ref:`configuration files ` 50 | | 4. Running `sklearn_elasticnet_wine `__ project example 51 | | 5. Checking the experiment's result 52 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | OCI MLflow Plugins 3 | ================== 4 | 5 | The OCI MLflow plugins allow to utilize Oracle Cloud Infrastructure (OCI) resources to run MLflow experiments. 6 | 7 | |PyPI|_ |Python|_ 8 | 9 | .. |PyPI| image:: https://img.shields.io/pypi/v/oci-mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white 10 | .. _PyPI: https://pypi.org/project/oci-mlflow/ 11 | .. |Python| image:: https://img.shields.io/pypi/pyversions/oci-mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white 12 | .. _Python: https://pypi.org/project/oci-mlflow/ 13 | 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | :caption: Contents: 18 | 19 | release_notes 20 | quickstart 21 | concepts 22 | tracking_server 23 | project 24 | model_deployment 25 | demos_examples 26 | -------------------------------------------------------------------------------- /docs/source/model_deployment.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Deploy MLflow models 3 | ==================== 4 | 5 | OCI Data Science supports two forms of runtime environment for running 6 | inference - 7 | 8 | - Conda Environment packaged using `conda 9 | pack `__ 10 | - Container image 11 | 12 | MLflow CLI and SDK can be used for deploying models on OCI Data Science. 13 | The CLI and the SDK API accepts 14 | `target `__ 15 | parameter to specify the deployment target. To deploy on OCI, the 16 | specify ``oci-datascience`` as the target. 17 | 18 | .. admonition:: Prerequisites 19 | :class: note 20 | 21 | - pip install oci-mlflow 22 | - pip install oracle-ads[opctl] 23 | - A model is registered in MLflow Tracking server 24 | - The conda pack used for model deployment must have ``mlflow`` 25 | 26 | CLI help 27 | -------- 28 | 29 | Check CLI options for ``mlfow deploments`` supported with 30 | ``oci-datascience`` target by running - 31 | 32 | :: 33 | 34 | mlflow deployments help -t oci-datascience 35 | 36 | Create Inference Endpoint Using Conda Environments 37 | -------------------------------------------------- 38 | 39 | In conda based deployment, the dependencies required to run the model is 40 | packaged using `conda pack `__. OCI Data Science provides curated conda environments to support wide variety of popular machine learning frameworks. To use conda runtime, you would 41 | choose from one of the following options: - 42 | 43 | 1. The service provided conda pack 44 | 2. Build your own conda environment and package it using `conda pack `__ and upload 45 | to object storage bucket . More details on how to manage your own conda packs can be 46 | found `here `__ 47 | 3. MLflow CLI with help of ``oci-mlflow`` plugin, can build conda environment from ``conda.yaml`` available in the model 48 | artifact and push it to the object storage. The ``conda.yaml`` file is auto generated by ``mlflow`` 49 | when you log/register a model. Before using the autogenerated ``conday.yaml``, verify that it has all the required dependencies. You could create the right ``conda.yaml`` while logging the model by providing the conda dependencies as dictionary input. More information is available in the `API docs `__ 50 | 51 | 52 | 53 | Deployment Specification 54 | ~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | Create a model deployment specification in YAML file. Refer [schema] for 57 | YAML specification. Copy one of the templates below and customize it as 58 | per your requirement - 59 | 60 | 1. Template to build conda pack on the fly using ``conda.yaml`` stored 61 | in the model artifact. 62 | 63 | .. admonition:: Prerequisites 64 | :class: note 65 | 66 | - Build a local ``OCI Data Science Job`` `compatible docker image `__ 67 | - Connect to Object Storage through the Internet 68 | 69 | .. tabs:: 70 | 71 | .. code-tab:: yaml 72 | 73 | kind: deployment 74 | spec: 75 | infrastructure: 76 | kind: infrastructure 77 | type: modelDeployment 78 | spec: 79 | logGroupId: ocid1.loggroup.oc1.iad.. 80 | logId: ocid1.log.oc1.iad.. 81 | projectId: ocid1.datascienceproject.oc1.iad.. 82 | compartmentId: ocid1.compartment.oc1.. 83 | shapeName: VM.Standard.E3.Flex 84 | shapeConfigDetails: 85 | memoryInGBs: 32 86 | ocpus: 4 87 | blockStorageSize: 50 88 | runtime: 89 | kind: runtime 90 | type: conda 91 | spec: 92 | uri: 93 | name: bc_sklearn_conda 94 | destination: oci://mayoor-dev@ociodscdev/mlflow-conda-envs/ 95 | gpu: false 96 | overwrite: false 97 | keepLocal: true 98 | localCondaDir: ./conda 99 | #scoreCode: path/to/score.py [optional: This is required if you want to customize score.py] 100 | 101 | 2. Template to deploy using previously published conda pack. 102 | 103 | .. tabs:: 104 | 105 | .. code-tab:: yaml 106 | 107 | kind: deployment 108 | spec: 109 | infrastructure: 110 | kind: infrastructure 111 | type: modelDeployment 112 | spec: 113 | logGroupId: ocid1.loggroup.oc1.iad.. 114 | logId: ocid1.log.oc1.iad.. 115 | projectId: ocid1.datascienceproject.oc1.iad.. 116 | compartmentId: ocid1.compartment.oc1.. 117 | shapeName: VM.Standard.E3.Flex 118 | shapeConfigDetails: 119 | memoryInGBs: 32 120 | ocpus: 4 121 | blockStorageSize: 50 122 | runtime: 123 | kind: runtime 124 | type: conda 125 | spec: 126 | uri: oci://bucket@namespace/path/to/conda-env 127 | pythonVersion: 3.9.15 128 | #scoreCode: path/to/score.py [optional: This is required if you want to customize score.py] 129 | 130 | Create Deployment 131 | ~~~~~~~~~~~~~~~~~ 132 | 133 | Use MLflow CLI/SDK to create a deployment. Once the deployment 134 | specification is created, pass it as input to the ``mlflow deployments`` 135 | command using 136 | ``--config deploy-config-file=`` and 137 | ``oci-datascience`` as the target. 138 | 139 | 140 | .. code-block:: bash 141 | 142 | export MLFLOW_TRACKING_URI= 143 | 144 | mlflow deployments create --name -m models:// -t oci-datascience --config deploy-config-file=deployment_specification.yaml 145 | 146 | Invoke Inference Endpoint 147 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 148 | 149 | Invoke the endpoint through code or CLI. Here is an example of invoking 150 | an endpoint using ``oci raw-request`` CLI command - 151 | 152 | :: 153 | 154 | data='{"columns":["mean radius","mean texture","mean perimeter","mean area","mean smoothness","mean compactness","mean concavity","mean concave points"],"index":[0],"data":[[17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471]]}' 155 | 156 | oci raw-request --http-method POST --target-uri https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad../predict --request-body "$data" 157 | 158 | { 159 | "data": { 160 | "predictions": [ 161 | 0 162 | ] 163 | }, 164 | "headers": { 165 | "Connection": "keep-alive", 166 | "Content-Length": "20", 167 | "Content-Type": "application/json", 168 | "Date": "Wed, 15 Feb 2023 04:26:18 GMT", 169 | "Server": "nginx/1.14.1", 170 | "X-Content-Type-Options": "nosniff", 171 | "opc-request-id": "72BD2656826241C586FD29D9F03EA2E1/D95ADB6267CD5390F9E6D26108E60AF9/907E1377442682A9A72AB1D797056240" 172 | }, 173 | "status": "200 OK" 174 | } 175 | 176 | Create Inference endpoint Using Container Images 177 | ------------------------------------------------ 178 | 179 | Container image allows you to not just bundle the runtime dependencies, 180 | but also allows you to use the inference serving framework of your 181 | choice. The container has to adhere to following requirements - 182 | 183 | 1. Provides ``/predict`` endpoint for prediction 184 | 2. Provides ``/health`` endpoint for health check 185 | 3. Is published to ``ocir`` registry in your tenancy and the policies 186 | are setup such that OCI Data Science service can pull the image from 187 | your registry. More infomration 188 | `here `__ 189 | 190 | In order to adhere to these requirements, you will have to add a reverse 191 | proxy on your container which will map the default endpoint offered by 192 | your model serving framework to ``/predict`` and health endpoint to 193 | ``/health`` 194 | 195 | Refer `how to produce a container 196 | image `__ 197 | that uses ``mlflow models serve`` framework for model 198 | 199 | .. _prerequisites-1: 200 | 201 | .. admonition:: Prerequisites 202 | :class: note 203 | 204 | - pip install oci-mlflow 205 | - pip install oracle-ads[opctl] 206 | - A model is registered in MLflow Tracking server 207 | - Container image is published to ``ocir`` 208 | 209 | .. _deployment-specification-1: 210 | 211 | Deployment Specification 212 | ~~~~~~~~~~~~~~~~~~~~~~~~ 213 | 214 | Create a model deployment specification in YAML file. Refer [schema] for 215 | YAML specification. Copy the template below and customize it as per your 216 | requirement - 217 | 218 | 219 | .. tabs:: 220 | 221 | .. code-tab:: yaml 222 | 223 | kind: deployment 224 | spec: 225 | infrastructure: 226 | kind: infrastructure 227 | type: modelDeployment 228 | spec: 229 | logGroupId: ocid1.loggroup.oc1.iad.. 230 | logId: ocid1.log.oc1.iad.. 231 | projectId: ocid1.datascienceproject.oc1.iad.. 232 | compartmentId: ocid1.compartment.oc1.. 233 | shapeName: VM.Standard.E3.Flex 234 | shapeConfigDetails: 235 | memoryInGBs: 32 236 | ocpus: 4 237 | blockStorageSize: 50 238 | replica: 1 239 | runtime: 240 | kind: runtime 241 | type: container 242 | spec: 243 | image: {region}.ocir.io// 244 | serverPort: 5001 245 | healthCheckPort: 5001 246 | 247 | .. _create-deployment-1: 248 | 249 | Create Deployment 250 | ~~~~~~~~~~~~~~~~~ 251 | 252 | Use MLflow CLI/SDK to create a deployment. Once the deployment 253 | specification is created, pass it as input to the ``mlflow deployments`` 254 | command using 255 | ``--config deploy-config-file=`` and 256 | ``oci-datascience`` as the target. 257 | 258 | .. tabs:: 259 | 260 | .. code-tab:: shell 261 | 262 | export MLFLOW_TRACKING_URI= 263 | 264 | mlflow deployments create --name -m models:// -t oci-datascience --config deploy-config-file=deployment_specification.yaml 265 | 266 | .. _invoke-inference-endpoint-1: 267 | 268 | Invoke Inference Endpoint 269 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 270 | 271 | Invoke the endpoint through code or CLI. Here is an example of invoking 272 | an endpoint using ``oci raw-request`` CLI command - 273 | 274 | :: 275 | 276 | data='{"dataframe_split": {"columns":["mean radius","mean texture","mean perimeter","mean area","mean smoothness","mean compactness","mean concavity","mean concave points"],"index":[0],"data":[[17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471]]}}' 277 | 278 | oci raw-request --http-method POST --target-uri https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad../predict --request-body "$data" 279 | 280 | { 281 | "data": { 282 | "predictions": [ 283 | 0 284 | ] 285 | }, 286 | "headers": { 287 | "Connection": "keep-alive", 288 | "Content-Length": "20", 289 | "Content-Type": "application/json", 290 | "Date": "Wed, 15 Feb 2023 04:26:18 GMT", 291 | "Server": "nginx/1.14.1", 292 | "X-Content-Type-Options": "nosniff", 293 | "opc-request-id": "72BD2656826241C586FD29D9F03EA2E1/D95ADB6267CD5390F9E6D26108E60AF9/907E1377442682A9A72AB1D797056240" 294 | }, 295 | "status": "200 OK" 296 | } 297 | 298 | Update Model Deployment Details 299 | ------------------------------- 300 | 301 | To update model deployment configuration use the YAML specification file 302 | that was created for deployment and then make changes to the attributes 303 | that you want to change. Use mlflow CLI/SDK and provide the OCID of the 304 | model deployment for name parameter and use 305 | ``--config deploy-config-file=`` option. 306 | 307 | :: 308 | 309 | mlflow deployments update --name ocid1.datasciencemodeldeployment.oc1.. -t oci-datascience --config deploy-config-file=./deployment_update_config.yaml 310 | 311 | **Note:** You may not be able to change all the configuration in one 312 | pass. Check `Editing Model 313 | Deployments `__ 314 | for more details. 315 | 316 | Get Model Deployment Information 317 | -------------------------------- 318 | 319 | Fetch Model deployment information for any OCI by providing OCID of the 320 | model deployment for name parameter. 321 | 322 | :: 323 | 324 | mlflow deployments get -t oci-datascience --name ocid1.datasciencemodeldeployment.oc1.. 325 | -------------------------------------------------------------------------------- /docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | ########## 2 | Quickstart 3 | ########## 4 | 5 | `MLflow `_ is a popular open source platform to manage the ML lifecycle, including 6 | experimentation, reproducibility, deployment, and a central model registry. MLflow currently offers four components: 7 | 8 | - MLflow Tracking *(experiment tracking)* 9 | - MLflow Projects *(code packaging format for reproducible runs using Conda on Data Science Jobs and Data Flow)* 10 | - MLflow Models *(package models for deployment in real time scoring, and batch scoring)* 11 | - Model Registry *(manage models)* 12 | 13 | Using MLflow with `Oracle Cloud Infrastructure (OCI) Data Science `_ you will first need to install the Oracle OCI MLflow plugin: 14 | 15 | .. note:: 16 | 17 | The OCI MLflow plugin will also install (if necessary) the ``mlflow`` and ``oracle-ads`` packages 18 | 19 | .. list-table:: 20 | :widths: 25 75 21 | :header-rows: 1 22 | :align: left 23 | 24 | * - Package Name 25 | - Latest Version 26 | * - MLflow 27 | - .. image:: https://img.shields.io/pypi/v/mlflow.svg?style=for-the-badge&logo=pypi&logoColor=white 28 | * - oracle-ads 29 | - .. image:: https://img.shields.io/pypi/v/oracle-ads.svg?style=for-the-badge&logo=pypi&logoColor=white 30 | 31 | 32 | - Install the ``oci-mlflow`` plugin 33 | 34 | .. code-block:: shell 35 | 36 | pip install oci-mlflow 37 | 38 | - Test ``oci-mlflow`` plugin setup 39 | 40 | .. code-block:: shell 41 | 42 | mlflow deployments help -t oci-datascience 43 | 44 | 45 | Background reading to understand the concepts of MLflow and OCI Data Science: 46 | 47 | - Getting started with `OCI Data Science Jobs `__ 48 | - Getting started with `Oracle Accelerated Data Science SDK `__ to simplify `creating `__ and `running `__ Jobs 49 | - Getting started with `Data Science Environments `__ 50 | - Getting started with `Custom Conda Environments `__ 51 | 52 | **Authentication and Policies:** 53 | 54 | - Getting started with `OCI Data Science Policies `__ 55 | - `API Key-Based Authentication `__ - ``api_key`` 56 | - `Resource Principal Authentication `__ - ``resource_principal`` 57 | - `Instance Principal Authentication `__ - ``instance_principal`` 58 | 59 | **OCI Integration Points** 60 | 61 | The ``oci_mlflow`` plugin enables OCI users to use OCI resources to manage their machine learning usecase life cycle. This 62 | table below provides the mapping between the MLflow features and the OCI resources that are used. 63 | 64 | .. note:: 65 | .. list-table:: 66 | :widths: 15 10 67 | :header-rows: 1 68 | :align: left 69 | 70 | * - MLflow Use Case 71 | - OCI Resource 72 | * - User running machine learning experiments on notebook, logs model artifacts, model performance etc 73 | - Data Science Jobs, Object Storage, MySQL 74 | * - Batch workloads using spark 75 | - Data Flow, Object Storage, MySQL 76 | * - Model Deployment 77 | - Data Science Model Deployment 78 | * - User running machine learning experiments on notebook, logs model artifacts, model performance etc 79 | - Object Storage, MySQL 80 | -------------------------------------------------------------------------------- /docs/source/release_notes.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | 1.0.2 6 | ----- 7 | Release date: Jul 27, 2023 8 | 9 | **New Features and Enhancements:** 10 | 11 | * Changed the default authentication to the resource principal. 12 | 13 | 14 | 1.0.1 15 | ----- 16 | Release date: Jun 15, 2023 17 | 18 | **New Features and Enhancements:** 19 | 20 | * Updated the ``README-development.md`` file for better clarity and ease of use. 21 | * Improved the ``Dockerfile`` to provide the option of running the tracking server using a local ``oci-mlflow`` wheel. 22 | * Refactored Object Storage (OS) plugin to leverage Oracle Cloud Infrastructure (OCI) `OS UploadManager `__, enhancing the functionality and improving performance. 23 | 24 | **Bug Fixes:** 25 | 26 | * Fixed the issue with ``launch_mlflow.sh`` where the copyright information was added in the wrong place, resulting in an error when running ``launch_mlflow.sh``. -------------------------------------------------------------------------------- /oci_mlflow/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*-- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import json 8 | import logging 9 | import os 10 | 11 | logger = logging.getLogger("oci.mlflow") 12 | logger.setLevel(logging.INFO) 13 | 14 | # https://packaging.python.org/en/latest/guides/single-sourcing-package-version/#single-sourcing-the-package-version 15 | from importlib import metadata 16 | 17 | __version__ = metadata.version("oci_mlflow") 18 | 19 | 20 | def setup_default_auth(): 21 | """Setup default auth.""" 22 | if os.environ.get("OCIFS_IAM_TYPE") and os.environ.get("OCI_IAM_TYPE"): 23 | return 24 | if os.environ.get("OCIFS_IAM_TYPE"): 25 | os.environ["OCI_IAM_TYPE"] = os.environ["OCIFS_IAM_TYPE"] 26 | elif os.environ.get("OCI_IAM_TYPE"): 27 | os.environ["OCIFS_IAM_TYPE"] = os.environ["OCI_IAM_TYPE"] 28 | elif os.environ.get("OCI_RESOURCE_PRINCIPAL_VERSION"): 29 | os.environ["OCIFS_IAM_TYPE"] = "resource_principal" 30 | os.environ["OCI_IAM_TYPE"] = "resource_principal" 31 | else: 32 | os.environ["OCIFS_IAM_TYPE"] = "api_key" 33 | os.environ["OCI_IAM_TYPE"] = "api_key" 34 | 35 | 36 | setup_default_auth() 37 | -------------------------------------------------------------------------------- /oci_mlflow/auth_plugin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*-- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | from ads.common.auth import default_signer 8 | from mlflow.tracking.request_auth.abstract_request_auth_provider import RequestAuthProvider 9 | 10 | OCI_REQUEST_AUTH = "OCI_REQUEST_AUTH" 11 | 12 | 13 | class OCIMLFlowAuthRequestProvider(RequestAuthProvider): 14 | 15 | def get_name(self): 16 | """ 17 | Get the name of the request auth provider. 18 | 19 | :return: str of request auth name 20 | """ 21 | return OCI_REQUEST_AUTH 22 | 23 | def get_auth(self): 24 | """ 25 | Generate oci signer based on oci environment variable. 26 | 27 | :return: OCI MLFlow signer 28 | """ 29 | return default_signer()["signer"] 30 | -------------------------------------------------------------------------------- /oci_mlflow/oci_object_storage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*-- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import os 8 | from typing import List 9 | from urllib.parse import urlparse 10 | 11 | import fsspec 12 | from ads.common import auth 13 | from ads.common.oci_client import OCIClientFactory 14 | from mlflow.entities import FileInfo 15 | from mlflow.store.artifact.artifact_repo import ArtifactRepository 16 | from mlflow.utils.file_utils import relative_path_to_artifact_path 17 | from oci import object_storage 18 | from oci.auth.signers import InstancePrincipalsDelegationTokenSigner 19 | from ocifs import OCIFileSystem 20 | 21 | from oci_mlflow import logger 22 | 23 | OCI_SCHEME = "oci" 24 | OCI_PREFIX = f"{OCI_SCHEME}://" 25 | DEFAULT_DELEGATION_TOKEN_PATH = "/opt/spark/delegation-secrets/delegation.jwt" 26 | DELEGATION_TOKEN_PATH = "DELEGATION_TOKEN_PATH" 27 | 28 | 29 | def parse_os_uri(uri: str): 30 | """ 31 | Parse an OCI object storage URI, returning tuple (bucket, namespace, path). 32 | 33 | Parameters 34 | ---------- 35 | uri: str 36 | The OCI Object Storage URI. 37 | 38 | Returns 39 | ------- 40 | Tuple 41 | The (bucket, ns, type) 42 | 43 | Raise 44 | ----- 45 | Exception 46 | If provided URI is not an OCI OS bucket URI. 47 | """ 48 | parsed = urlparse(uri) 49 | if parsed.scheme.lower() != OCI_SCHEME: 50 | raise Exception("Not an OCI object storage URI: %s" % uri) 51 | path = parsed.path 52 | 53 | if path.startswith("/"): 54 | path = path[1:] 55 | 56 | bucket, ns = parsed.netloc.split("@") 57 | 58 | return bucket, ns, path 59 | 60 | 61 | def get_token_path(): 62 | """ 63 | Gets delegation token path. 64 | 65 | Return 66 | ------ 67 | str 68 | The delegation token path. 69 | """ 70 | token_path = ( 71 | DEFAULT_DELEGATION_TOKEN_PATH 72 | if os.path.exists(DEFAULT_DELEGATION_TOKEN_PATH) 73 | else os.environ.get(DELEGATION_TOKEN_PATH) 74 | ) 75 | return token_path 76 | 77 | 78 | def get_delegation_token_signer(token_path: str): 79 | """ 80 | Generate delegation token signer. 81 | 82 | Parameters 83 | ---------- 84 | token_path: str 85 | The delegation token path. 86 | 87 | Return 88 | ------ 89 | oci.auth.signers.InstancePrincipalsDelegationTokenSigner 90 | The delegation token signer. 91 | 92 | """ 93 | with open(token_path) as fd: 94 | delegation_token = fd.read() 95 | signer = InstancePrincipalsDelegationTokenSigner(delegation_token=delegation_token) 96 | return signer 97 | 98 | 99 | def get_signer(token_path: str = None): 100 | """ 101 | Generate default_signer. If running in Data Flow, use InstancePrincipalsDelegationTokenSigner. 102 | If running locally, use default signer. 103 | 104 | Parameters 105 | ---------- 106 | token_path: str 107 | Defaults to None. The delegation token path. 108 | 109 | Return 110 | ------ 111 | dict 112 | Contains keys - config, signer and client_kwargs. 113 | 114 | - The config contains the config loaded from the configuration loaded from the default location if the default 115 | auth mode is API keys, otherwise it is empty dictionary. 116 | - The signer contains the signer object created from default auth mode. 117 | - client_kwargs contains the `client_kwargs` that was passed in as input parameter. 118 | 119 | """ 120 | if token_path: 121 | auth.set_auth( 122 | signer_callable=get_delegation_token_signer, 123 | signer_kwargs={"token_path": token_path}, 124 | ) 125 | return auth.default_signer() 126 | 127 | 128 | class ArtifactUploader: 129 | """ 130 | The class helper to upload model artifacts. 131 | 132 | Attributes 133 | ---------- 134 | upload_manager: UploadManager 135 | The uploadManager simplifies interaction with the Object Storage service. 136 | """ 137 | 138 | def __init__(self): 139 | """Initializes `ArtifactUploader` instance.""" 140 | self.upload_manager = object_storage.UploadManager( 141 | OCIClientFactory(**get_signer(token_path=get_token_path())).object_storage 142 | ) 143 | 144 | def upload(self, file_path: str, dst_path: str): 145 | """Uploads model artifacts. 146 | 147 | Parameters 148 | ---------- 149 | file_path: str 150 | The source file path. 151 | dst_path: str 152 | The destination path. 153 | """ 154 | bucket_name, namespace_name, object_name = parse_os_uri(dst_path) 155 | logger.debug(f"{bucket_name=}, {namespace_name=}, {object_name=}") 156 | response = self.upload_manager.upload_file( 157 | namespace_name=namespace_name, 158 | bucket_name=bucket_name, 159 | object_name=object_name, 160 | file_path=file_path, 161 | ) 162 | logger.debug(response) 163 | 164 | 165 | class OCIObjectStorageArtifactRepository(ArtifactRepository): 166 | """MLFlow Plugin implementation for storing artifacts to OCI Object Storage.""" 167 | 168 | def _download_file(self, remote_file_path, local_path): 169 | if not remote_file_path.startswith(self.artifact_uri): 170 | full_path = os.path.join(self.artifact_uri, remote_file_path) 171 | else: 172 | full_path = remote_file_path 173 | fs: OCIFileSystem = self.get_fs() 174 | logger.info(f"{full_path}, {remote_file_path}") 175 | fs.download(full_path, str(local_path)) 176 | 177 | def log_artifact(self, local_file: str, artifact_path: str = None): 178 | """ 179 | Logs a local file as an artifact, optionally taking an ``artifact_path`` to place it in 180 | within the run's artifacts. Run artifacts can be organized into directories, so you can 181 | place the artifact in a directory this way. 182 | 183 | Parameters 184 | ---------- 185 | local_file:str 186 | Path to artifact to log. 187 | artifact_path:str 188 | Directory within the run's artifact directory in which to log the artifact. 189 | """ 190 | # Since the object storage path should contain "/", the code below needs to use concatenation "+" instead of 191 | # os.path.join(). The latter can introduce "\" in Windows which can't be recognized by object storage as a valid prefix. 192 | # `artifact_path` must not be space character like " " or " ". 193 | if isinstance(artifact_path, str) and artifact_path.isspace(): 194 | raise ValueError("`artifact_path` must not be whitespace string.") 195 | artifact_path = artifact_path.rstrip("/") + "/" if artifact_path else "" 196 | dest_path = self.artifact_uri.rstrip("/") + "/" + artifact_path + os.path.basename(local_file) 197 | ArtifactUploader().upload(local_file, dest_path) 198 | 199 | def log_artifacts(self, local_dir: str, artifact_path: str = None): 200 | """ 201 | Logs the files in the specified local directory as artifacts, optionally taking 202 | an ``artifact_path`` to place them in within the run's artifacts. 203 | 204 | Parameters 205 | ---------- 206 | local_dir:str 207 | Directory of local artifacts to log. 208 | artifact_path:str 209 | Directory within the run's artifact directory in which to log the artifacts. 210 | """ 211 | artifact_uploader = ArtifactUploader() 212 | # Since the object storage path should contain "/", the code below needs to use concatenation "+" instead of 213 | # os.path.join(). The latter can introduce "\" in Windows which can't be recognized by object storage as a valid prefix. 214 | # `artifact_path` must not be space character like " " or " ". 215 | if isinstance(artifact_path, str) and artifact_path.isspace(): 216 | raise ValueError("`artifact_path` must not be whitespace string.") 217 | artifact_path = artifact_path.rstrip("/") + "/" if artifact_path else "" 218 | dest_path = self.artifact_uri.rstrip("/") + "/" + artifact_path 219 | local_dir = os.path.abspath(local_dir) 220 | 221 | for root, _, filenames in os.walk(local_dir): 222 | upload_path = dest_path 223 | if root != local_dir: 224 | rel_path = os.path.relpath(root, local_dir) 225 | rel_path = relative_path_to_artifact_path(rel_path) 226 | upload_path = dest_path + rel_path 227 | for f in filenames: 228 | artifact_uploader.upload( 229 | file_path=os.path.join(root, f), 230 | dst_path=upload_path + f 231 | ) 232 | 233 | def get_fs(self): 234 | """ 235 | Gets fssepc filesystem based on the uri scheme. 236 | """ 237 | self.fs = fsspec.filesystem( 238 | urlparse(self.artifact_uri).scheme, 239 | **get_signer(token_path=get_token_path()), 240 | ) # FileSystem class corresponding to the URI scheme. 241 | 242 | return self.fs 243 | 244 | def list_artifacts(self, path: str = "") -> List[FileInfo]: 245 | """ 246 | Return all the artifacts for this run_id directly under path. If path is a file, returns 247 | an empty list. Will error if path is neither a file nor directory. 248 | 249 | Parameters 250 | ---------- 251 | path:str 252 | Relative source path that contains desired artifacts 253 | 254 | Returns 255 | ------- 256 | List[FileInfo] 257 | List of artifacts as FileInfo listed directly under path. 258 | """ 259 | result = [] 260 | dest_path = self.artifact_uri 261 | if path: 262 | dest_path = os.path.join(dest_path, path) 263 | 264 | logger.debug(f"{path=}, {self.artifact_uri=}, {dest_path=}") 265 | 266 | fs = self.get_fs() 267 | files = ( 268 | os.path.relpath(f"{OCI_PREFIX}{f}", self.artifact_uri) 269 | for f in fs.glob(f"{dest_path}/*") 270 | ) 271 | 272 | for file in files: 273 | file_isdir = fs.isdir(os.path.join(self.artifact_uri, file)) 274 | size = 0 275 | if not file_isdir: 276 | size = fs.info(os.path.join(self.artifact_uri, file)).get("size", 0) 277 | result.append(FileInfo(file, file_isdir, size)) 278 | 279 | logger.debug(f"{result=}") 280 | 281 | result.sort(key=lambda f: f.path) 282 | return result 283 | 284 | def delete_artifacts(self, artifact_path: str = None): 285 | """ 286 | Delete the artifacts at the specified location. 287 | Supports the deletion of a single file or of a directory. Deletion of a directory 288 | is recursive. 289 | 290 | Parameters 291 | ---------- 292 | artifact_path: str 293 | Path of the artifact to delete. 294 | """ 295 | dest_path = self.artifact_uri 296 | if artifact_path: 297 | dest_path = os.path.join(self.artifact_uri, artifact_path) 298 | fs = self.get_fs() 299 | files = fs.ls(dest_path, refresh=True) 300 | for to_delete_obj in files: 301 | fs.delete(to_delete_obj) 302 | -------------------------------------------------------------------------------- /oci_mlflow/telemetry_logging.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import re 8 | import os 9 | from dataclasses import dataclass 10 | from typing import Any, Callable 11 | from functools import wraps 12 | 13 | TELEMETRY_ARGUMENT_NAME = "telemetry" 14 | 15 | 16 | def telemetry( 17 | entry_point: str = "", 18 | name: str = "oci.mlflow", 19 | environ_variable: str = "EXTRA_USER_AGENT_INFO", 20 | ) -> Callable: 21 | """The telemetry decorator. 22 | 23 | Parameters 24 | ---------- 25 | entry_point: str 26 | The entry point of the telemetry. 27 | Example: "plugin=project&action=run" 28 | name: str 29 | The name of the telemetry. 30 | environ_variable: (str, optional). Defaults to `EXTRA_USER_AGENT_INFO`. 31 | The name of the environment variable to capture the telemetry sequence. 32 | 33 | Examples 34 | -------- 35 | >>> @telemetry(entry_point="plugin=project&action=run",name="oci.mlflow") 36 | ... def test_function(**kwargs) 37 | ... print(kwargs.pop("telemetry")) 38 | """ 39 | 40 | def decorator(func: Callable) -> Callable: 41 | @wraps(func) 42 | def wrapper(*args, **kwargs) -> Any: 43 | telemetry = Telemetry(name=name, environ_variable=environ_variable).begin( 44 | entry_point 45 | ) 46 | try: 47 | # Injects the telemetry object to the kwargs arguments of the decorated function. 48 | # This is necessary to be able to add some extra information to the telemetry 49 | # from the decorated function. 50 | return func(*args, **{**kwargs, **{TELEMETRY_ARGUMENT_NAME: telemetry}}) 51 | except: 52 | raise 53 | finally: 54 | telemetry.clean() 55 | 56 | return wrapper 57 | 58 | return decorator 59 | 60 | 61 | @dataclass 62 | class Telemetry: 63 | """Class to capture telemetry sequence into the environment variable. 64 | It is doing nothing but adding the telemetry sequence in the specified environment variable. 65 | 66 | Attributes 67 | ---------- 68 | name: str 69 | The name of the telemetry. 70 | environ_variable: (str, optional). Defaults to `EXTRA_USER_AGENT_INFO`. 71 | The name of the environment variable to capture the telemetry sequence. 72 | """ 73 | 74 | name: str 75 | environ_variable: str = "EXTRA_USER_AGENT_INFO" 76 | 77 | def __post_init__(self): 78 | self.name = self._prepare(self.name) 79 | os.environ[self.environ_variable] = "" 80 | 81 | def clean(self) -> "Telemetry": 82 | """Cleans the associated environment variable. 83 | 84 | Returns 85 | ------- 86 | self: Telemetry 87 | An instance of the Telemetry. 88 | """ 89 | os.environ[self.environ_variable] = "" 90 | return self 91 | 92 | def _begin(self): 93 | self.clean() 94 | os.environ[self.environ_variable] = self.name 95 | 96 | def begin(self, value: str = "") -> "Telemetry": 97 | """The method that needs to be invoked in the beginning of the capturing telemetry sequence. 98 | It resets the value of the associated environment variable. 99 | 100 | Parameters 101 | ---------- 102 | value: str 103 | The value that need to be added to the telemetry. 104 | 105 | Returns 106 | ------- 107 | self: Telemetry 108 | An instance of the Telemetry. 109 | """ 110 | return self.clean().add(self.name).add(value) 111 | 112 | def add(self, value: str) -> "Telemetry": 113 | """Adds the new value to the telemetry. 114 | 115 | Parameters 116 | ---------- 117 | value: str 118 | The value that need to be added to the telemetry. 119 | 120 | Returns 121 | ------- 122 | self: Telemetry 123 | An instance of the Telemetry. 124 | """ 125 | if not os.environ.get(self.environ_variable): 126 | self._begin() 127 | 128 | if value: 129 | current_value = os.environ.get(self.environ_variable, "") 130 | new_value = self._prepare(value) 131 | if new_value not in current_value: 132 | os.environ[self.environ_variable] = f"{current_value}&{new_value}" 133 | return self 134 | 135 | def print(self) -> None: 136 | """Prints the telemetry sequence from environment variable.""" 137 | print(f"{self.environ_variable} = {os.environ.get(self.environ_variable)}") 138 | 139 | def _prepare(self, value: str): 140 | """Replaces the special characters with the `_` in the input string.""" 141 | return ( 142 | re.sub("[^a-zA-Z0-9\.\-\_\&\=]", "_", re.sub(r"\s+", " ", value)) 143 | if value 144 | else "" 145 | ) 146 | -------------------------------------------------------------------------------- /oci_mlflow/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/oci_mlflow/templates/__init__.py -------------------------------------------------------------------------------- /oci_mlflow/templates/project_description.jinja2: -------------------------------------------------------------------------------- 1 | {% if job_info["spec"]["infrastructure"]["type"] == "dataScienceJob" %} 2 | ## OCI Data Science Job Details 3 | #### **Job:** [{{job_info["spec"]["name"]}}](https://cloud.oracle.com/data-science/jobs/{{job_info["spec"]["id"]}}) 4 | #### **Job Run:** [{{job_run_info["displayName"]}}](https://cloud.oracle.com/data-science/job-runs/{{job_run_info["id"]}}) 5 | #### **Compute instance shape:** {{job_info["spec"]["infrastructure"]["spec"]["shapeName"]}} 6 | #### **Storage:** {{job_info["spec"]["infrastructure"]["spec"]["blockStorageSize"]}} GB 7 | {% else %} 8 | ## OCI Data Flow Application Details 9 | #### **Application:** [{{job_info["spec"]["name"]}}](https://cloud.oracle.com/data-flow/apps/details/{{job_info["spec"]["id"]}}) 10 | #### **Application Run:** [{{job_run_info["displayName"]}}](https://cloud.oracle.com/data-flow/runs/details/{{job_run_info["id"]}}) 11 | #### **Driver shape:** {{job_info["spec"]["infrastructure"]["spec"]["driverShape"]}} 12 | #### **Executor shape:** {{job_info["spec"]["infrastructure"]["spec"]["executorShape"]}} 13 | #### **Number of executors:** {{job_info["spec"]["infrastructure"]["spec"]["numExecutors"]}} 14 | #### **Spark version:** {{job_info["spec"]["infrastructure"]["spec"]["sparkVersion"]}} 15 | {% endif %} 16 | -------------------------------------------------------------------------------- /oci_mlflow/templates/runtime.yaml.jinja2: -------------------------------------------------------------------------------- 1 | MODEL_ARTIFACT_VERSION: '3.0' 2 | MODEL_DEPLOYMENT: 3 | INFERENCE_CONDA_ENV: 4 | INFERENCE_ENV_PATH: {{conda_pack_uri}} 5 | INFERENCE_PYTHON_VERSION: {{python_version}} 6 | -------------------------------------------------------------------------------- /oci_mlflow/templates/score.py.jinja2: -------------------------------------------------------------------------------- 1 | import mlflow.pyfunc 2 | import pandas as pd 3 | import os 4 | import numpy as np 5 | 6 | 7 | def load_model(): 8 | cur_dir = os.path.dirname(os.path.realpath(__file__)) 9 | return mlflow.pyfunc.load_model(cur_dir) 10 | 11 | 12 | def predict(data, model=load_model()): 13 | if isinstance(data, dict): 14 | payload = pd.DataFrame(**data) 15 | else: 16 | payload = np.array(data) 17 | pred = model.predict(payload).tolist() 18 | return {'prediction': pred} 19 | -------------------------------------------------------------------------------- /oci_mlflow/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import inspect 8 | import os 9 | from dataclasses import dataclass 10 | from typing import Dict, Union 11 | 12 | import ads 13 | import ocifs 14 | import yaml 15 | from ads.common.auth import AuthType, default_signer 16 | from ads.opctl.conda.cmds import _create, _publish 17 | from ads.opctl.config.base import ConfigProcessor 18 | from ads.opctl.config.merger import ConfigMerger 19 | from ads.opctl.constants import DEFAULT_ADS_CONFIG_FOLDER 20 | from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE 21 | 22 | from oci_mlflow import __version__, logger 23 | 24 | OCIFS_IAM_TYPE = "OCIFS_IAM_TYPE" 25 | WORK_DIR = "{work_dir}" 26 | 27 | DEFAULT_TAGS = {"oracle_ads": ads.__version__, "oci_mlflow": __version__} 28 | 29 | 30 | class UnsupportedAuthTypeError(Exception): 31 | def __init__(self, auth_type: str): 32 | super().__init__( 33 | f"The provided authentication type: {auth_type} is not supported. " 34 | f"Allowed values are: {AuthType.values()}" 35 | ) 36 | 37 | 38 | @dataclass 39 | class OCIBackendConfig: 40 | """Class representing OCI config. 41 | 42 | Attributes 43 | ---------- 44 | oci_auth: str 45 | OCI auth type. 46 | oci_config_path: str 47 | Path to the OCI auth config. 48 | oci_profile: str 49 | The OCI auth profile. 50 | """ 51 | 52 | oci_auth: str = "" 53 | oci_config_path: str = "" 54 | oci_profile: str = "" 55 | 56 | def __post_init__(self): 57 | self._validate() 58 | 59 | def _validate(self): 60 | 61 | # authentication type 62 | self.oci_auth = ( 63 | self.oci_auth 64 | or os.environ.get(OCIFS_IAM_TYPE) 65 | or AuthType.RESOURCE_PRINCIPAL 66 | ) 67 | if self.oci_auth not in AuthType: 68 | raise UnsupportedAuthTypeError(self.oci_auth) 69 | 70 | # OCI AUTH config path 71 | self.oci_config_path = self.oci_config_path or DEFAULT_LOCATION 72 | 73 | # OCI AUTH profile 74 | self.oci_profile = self.oci_profile or DEFAULT_PROFILE 75 | 76 | @classmethod 77 | def from_dict(cls, config: Dict[str, str]) -> "OCIBackendConfig": 78 | """Creates an instance of the OCIBackendConfig class from a dictionary. 79 | 80 | Parameters 81 | ---------- 82 | config: Dict[str, str] 83 | List of properties and values in dictionary format. 84 | 85 | Returns 86 | ------- 87 | OCIBackendConfig 88 | Instance of the OCIBackendConfig. 89 | """ 90 | if not config: 91 | return cls() 92 | 93 | return cls( 94 | **{ 95 | k: v 96 | for k, v in config.items() 97 | if k in inspect.signature(cls).parameters 98 | } 99 | ) 100 | 101 | 102 | @dataclass 103 | class OCIProjectBackendConfig(OCIBackendConfig): 104 | """Class representing OCI project backend config. 105 | 106 | Attributes 107 | ---------- 108 | oci_job_template_path: str 109 | Path to the Job template YAML. 110 | project_uri: str 111 | The project content location. 112 | work_dir: str 113 | The project work dir. 114 | """ 115 | 116 | oci_job_template_path: str = "" 117 | project_uri: str = "" 118 | work_dir: str = "" 119 | 120 | def __post_init__(self): 121 | super()._validate() 122 | self._validate() 123 | 124 | def _validate(self): 125 | 126 | # project URI 127 | if not self.project_uri: 128 | raise ValueError("The `project_uri` is not provided.") 129 | 130 | # work dir 131 | if not self.work_dir: 132 | raise ValueError("The `work_dir` is not provided.") 133 | self.work_dir = os.path.abspath(os.path.expanduser(self.work_dir)) 134 | 135 | # Job template path 136 | if not self.oci_job_template_path: 137 | raise ValueError( 138 | "The `oci_job_template_path` is not provided in `oci-config.json`." 139 | ) 140 | self.oci_job_template_path = os.path.abspath( 141 | os.path.expanduser( 142 | self.oci_job_template_path.replace(WORK_DIR, self.work_dir) 143 | ) 144 | ) 145 | 146 | if not os.path.exists(self.oci_job_template_path): 147 | raise ValueError(f"The `{self.oci_job_template_path}` does not exist.") 148 | 149 | if not self.oci_job_template_path.lower().endswith((".yml", ".yaml")): 150 | raise ValueError( 151 | f"Unsupported file format for the `{self.oci_job_template_path}`. " 152 | "Allowed formats are: [.yaml, .yml]" 153 | ) 154 | 155 | 156 | def generate_slug(name: str, version: str) -> str: 157 | return f"{name}_v{version}".replace(" ", "").replace(".", "_").lower() 158 | 159 | 160 | def generate_conda_pack_uri( 161 | name: str, version: str, conda_pack_os_prefix: str, slug: str, gpu: bool 162 | ) -> str: 163 | return os.path.join( 164 | conda_pack_os_prefix, 165 | "gpu" if gpu else "cpu", 166 | name, 167 | version, 168 | slug, 169 | ) 170 | 171 | 172 | def create_conda( 173 | name: str, 174 | version: str = "1", 175 | environment_file: str = None, 176 | conda_pack_folder: str = None, 177 | gpu: bool = False, 178 | overwrite: bool = False, 179 | ) -> str: 180 | """ 181 | Creates conda pack and returns slug name 182 | """ 183 | logger.info("Creating conda environment with details - ") 184 | with open(environment_file) as ef: 185 | logger.info(ef.read()) 186 | return _create(name, version, environment_file, conda_pack_folder, gpu, overwrite) 187 | 188 | 189 | # TODO: Move conda create and publish to ADS - https://jira.oci.oraclecorp.com/browse/ODSC-38641 190 | def publish( 191 | slug: str, 192 | conda_pack_os_prefix: str, 193 | conda_pack_folder: str, 194 | overwrite: bool, 195 | ads_config: str = DEFAULT_ADS_CONFIG_FOLDER, 196 | name: str = " ", 197 | version: str = "1", 198 | gpu: bool = False, 199 | ): 200 | """ 201 | Publishes the conda pack to object storage 202 | 203 | TODO: Remove name and version parameter once ADS publish method is updated to return conda pack URI 204 | """ 205 | logger.info( 206 | f"Publishing conda environment to object storage: {conda_pack_os_prefix}" 207 | ) 208 | p = ConfigProcessor().step(ConfigMerger, ads_config=ads_config) 209 | exec_config = p.config["execution"] 210 | # By default the publish uses container to zip and upload the artifact. 211 | # Setting the environment variable to use host to upload the artifact. 212 | publish_option = os.environ.get("NO_CONTAINER") 213 | os.environ["NO_CONTAINER"] = "True" 214 | _publish( 215 | conda_slug=slug, 216 | conda_uri_prefix=conda_pack_os_prefix, 217 | conda_pack_folder=conda_pack_folder, 218 | overwrite=overwrite, 219 | oci_config=exec_config.get("oci_config"), 220 | oci_profile=exec_config.get("oci_profile"), 221 | auth_type=exec_config["auth"], 222 | ) 223 | if publish_option: 224 | os.environ["NO_CONTAINER"] = publish_option 225 | else: 226 | os.environ.pop("NO_CONTAINER", None) 227 | 228 | return generate_conda_pack_uri(name, version, conda_pack_os_prefix, slug, gpu) 229 | 230 | 231 | def build_and_publish_conda_pack( 232 | name: str, 233 | version: str, 234 | environment_file: str, 235 | conda_pack_folder: str, 236 | conda_pack_os_prefix: str, 237 | gpu: bool = False, 238 | overwrite: bool = False, 239 | ads_config: str = DEFAULT_ADS_CONFIG_FOLDER, 240 | ): 241 | """ 242 | * If overwrite then create and publish always 243 | * If not overwrite and conda_os_uri exists, skip create and publish. let user know 244 | * If not overwrite and conda_os_uri does not exsits, but local conda pack exists, found local environment, publishing from local copy 245 | 246 | """ 247 | slug = generate_slug(name, version) 248 | conda_pack_uri = generate_conda_pack_uri( 249 | name=name, 250 | version=version, 251 | conda_pack_os_prefix=conda_pack_os_prefix, 252 | slug=slug, 253 | gpu=gpu, 254 | ) 255 | fs = ocifs.OCIFileSystem(**default_signer()) 256 | if fs.exists(conda_pack_uri) and not overwrite: 257 | logger.info( 258 | f"Conda pack exists at {conda_pack_uri}. Skipping build and publish. If you want to overwrite, set overwrite to true" 259 | ) 260 | else: 261 | if os.path.exists(os.path.join(conda_pack_folder, slug)) and not overwrite: 262 | logger.info( 263 | f"Found an environment at {os.path.join(conda_pack_folder, slug)} which matches the name and version. Change version to create a new pack or set overwrite to true" 264 | ) 265 | else: 266 | create_conda( 267 | name, version, environment_file, conda_pack_folder, gpu, overwrite 268 | ) 269 | logger.info( 270 | f"Created conda pack at {os.path.join(conda_pack_folder, slug)}" 271 | ) 272 | conda_pack_uri = publish( 273 | slug, 274 | conda_pack_os_prefix=conda_pack_os_prefix, 275 | conda_pack_folder=conda_pack_folder, 276 | overwrite=overwrite, 277 | ads_config=ads_config, 278 | gpu=gpu, 279 | name=name, 280 | version=version, 281 | ) 282 | logger.info(f"Published conda pack at {conda_pack_uri}") 283 | return conda_pack_uri 284 | 285 | 286 | def resolve_python_version(conda_yaml_file: str) -> Union[str, None]: 287 | """ 288 | Loops through the dependencies section inside the conda yaml file to search for python version. 289 | 290 | Limitation: Assumes pattern - python=version. Will fail if the yaml has python{><}=version 291 | """ 292 | version = None 293 | with open(conda_yaml_file) as cf: 294 | env = yaml.load(cf, Loader=yaml.SafeLoader) 295 | python = [ 296 | dep 297 | for dep in env["dependencies"] 298 | if isinstance(dep, str) and dep.startswith("python") 299 | ] 300 | version = python[0].split("=")[1] if len(python) > 0 else None 301 | return version 302 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | ## This file created and used instead of setup.py for building and installing ads package. This change is to 2 | ## follow best practive to "not invoke setup.py directly", see detailed explanation why here: 3 | ## https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html. 4 | ## Check README-development.md and Makefile for instruction how to install or build ADS locally. 5 | 6 | [build-system] 7 | # These are the assumed default build requirements from pip: 8 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support 9 | # PEP 517 – A build-system independent format for source trees - https://peps.python.org/pep-0517/ 10 | requires = ["flit-core >= 3.8"] 11 | build-backend = "flit_core.buildapi" 12 | 13 | 14 | [project] 15 | # Declaring project metadata 16 | # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ 17 | # PEP 621 – Storing project metadata in pyproject.toml - https://peps.python.org/pep-0621/ 18 | # PEP 518 – Specifying Minimum Build System Requirements for Python Projects https://peps.python.org/pep-0518/ 19 | 20 | # Required 21 | name = "oci-mlflow" # the install (PyPI) name 22 | version = "1.0.2" 23 | 24 | # Optional 25 | description = "OCI MLflow plugin to use OCI resources within MLflow" 26 | readme = {file = "README.md", content-type = "text/markdown"} 27 | requires-python = ">=3.8" 28 | license = {file = "LICENSE.txt"} 29 | authors = [ 30 | {name = "Oracle Cloud Infrastructure Data Science"} 31 | ] 32 | keywords = [ 33 | "Oracle Cloud Infrastructure", 34 | "OCI", 35 | "Object Storage", 36 | "MLflow", 37 | ] 38 | classifiers = [ 39 | "Development Status :: 5 - Production/Stable", 40 | "Intended Audience :: Developers", 41 | "License :: OSI Approved :: Universal Permissive License (UPL)", 42 | "Operating System :: OS Independent", 43 | "Programming Language :: Python :: 3.8", 44 | "Programming Language :: Python :: 3.9", 45 | "Programming Language :: Python :: 3.10", 46 | ] 47 | 48 | # PEP 508 – Dependency specification for Python Software Packages - https://peps.python.org/pep-0508/ 49 | # In dependencies se "; platform_machine == 'aarch64'" to specify ARM underlying platform 50 | # Copied from install_requires list in setup.py, setup.py got removed in favor of this config file 51 | dependencies = [ 52 | "mlflow>=2.8.0", 53 | "oracle-ads>=2.8.8", 54 | ] 55 | 56 | [project.urls] 57 | "Github" = "https://github.com/oracle/oci-mlflow" 58 | "Documentation" = "https://oci-mlflow.readthedocs.io/en/latest/index.html" 59 | 60 | # https://peps.python.org/pep-0621/#entry-points 61 | # Note the quotes around mlflow.artifact_repository in order to escape the dot . 62 | [project.entry-points."mlflow.artifact_repository"] 63 | oci = "oci_mlflow.oci_object_storage:OCIObjectStorageArtifactRepository" 64 | 65 | [project.entry-points."mlflow.project_backend"] 66 | oci-datascience = "oci_mlflow.project:OCIProjectBackend" 67 | 68 | [project.entry-points."mlflow.deployments"] 69 | oci-datascience = "oci_mlflow.deployment" 70 | 71 | [project.entry-points."mlflow.request_auth_provider"] 72 | oci-datascience = "oci_mlflow.auth_plugin:OCIMLFlowAuthRequestProvider" 73 | 74 | # Configuring Ruff (https://docs.astral.sh/ruff/configuration/) 75 | [tool.ruff] 76 | fix = true 77 | 78 | [tool.ruff.lint] 79 | exclude = ["*.yaml", "*jinja2"] 80 | # rules - https://docs.astral.sh/ruff/rules/ 81 | extend-ignore = ["E402", "N806", "N803"] 82 | ignore = [ 83 | "S101", # use of assert 84 | "B008", # function call in argument defaults 85 | "B017", # pytest.raises considered evil 86 | "B023", # function definition in loop (TODO: un-ignore this) 87 | "B028", # explicit stacklevel for warnings 88 | "C901", # function is too complex (TODO: un-ignore this) 89 | "E501", # from scripts/lint_backend.sh 90 | "PLR091", # complexity rules 91 | "PLR2004", # magic numbers 92 | "PLW2901", # `for` loop variable overwritten by assignment target 93 | "SIM105", # contextlib.suppress (has a performance cost) 94 | "SIM117", # multiple nested with blocks (doesn't look good with gr.Row etc) 95 | "UP006", # use `list` instead of `List` for type annotations (fails for 3.8) 96 | "UP007", # use X | Y for type annotations (TODO: can be enabled once Pydantic plays nice with them) 97 | ] 98 | extend-select = [ 99 | "ARG", 100 | "B", 101 | "C", 102 | "E", 103 | "F", 104 | "I", 105 | "N", 106 | "PL", 107 | "S101", 108 | "SIM", 109 | "UP", 110 | "W", 111 | ] 112 | 113 | [tool.ruff.lint.per-file-ignores] 114 | "__init__.py" = ["F401"] -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -v -p no:warnings --durations=5 3 | testpaths = 4 | tests 5 | authorizer/tests 6 | pythonpath = . oci_mlflow 7 | env = 8 | OCIFS_IAM_TYPE="api_key" 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = __init__.py 3 | max-line-length = 95 4 | ignore = 5 | E20, # Extra space in brackets 6 | E231,E241, # Multiple spaces around "," 7 | E26, # Comments 8 | E4, # Import formatting 9 | E721, # Comparing types instead of isinstance 10 | E731, # Assigning lambda expression 11 | E121, # continuation line under-indented for hanging indent 12 | E126, # continuation line over-indented for hanging indent 13 | E127, # continuation line over-indented for visual indent 14 | E128, # E128 continuation line under-indented for visual indent 15 | E702, # multiple statements on one line (semicolon) 16 | W503, # line break before binary operator 17 | E129, # visually indented line with same indent as next logical line 18 | E116, # unexpected indentation 19 | F811, # redefinition of unused 'loop' from line 10 20 | F841, # local variable is assigned to but never used 21 | E741 # Ambiguous variable names 22 | W504, # line break after binary operator 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Oracle and/or its affiliates. 2 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 3 | 4 | ### File setup.py obsolete and must not be used. Please update pyproject.toml instead. 5 | ### See detailed explanation why here: 6 | ### https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html. 7 | # PEP 621 – Storing project metadata in pyproject.toml - https://peps.python.org/pep-0621/ 8 | # PEP 518 – Specifying Minimum Build System Requirements for Python Projects https://peps.python.org/pep-0518/ 9 | # PEP 508 – Dependency specification for Python Software Packages - https://peps.python.org/pep-0508/ 10 | # PEP 517 – A build-system independent format for source trees - https://peps.python.org/pep-0517/ 11 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | faker 3 | mock 4 | pip 5 | pytest 6 | pytest-codecov 7 | ruff 8 | -------------------------------------------------------------------------------- /tests/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*-- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | -------------------------------------------------------------------------------- /tests/plugins/unitary/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | -------------------------------------------------------------------------------- /tests/plugins/unitary/artifacts/1.txt: -------------------------------------------------------------------------------- 1 | Test 2 | -------------------------------------------------------------------------------- /tests/plugins/unitary/artifacts/2.txt: -------------------------------------------------------------------------------- 1 | Test 2 | -------------------------------------------------------------------------------- /tests/plugins/unitary/artifacts/sub_folder/3.txt: -------------------------------------------------------------------------------- 1 | Test 2 | -------------------------------------------------------------------------------- /tests/plugins/unitary/artifacts/sub_folder/4.txt: -------------------------------------------------------------------------------- 1 | Test 2 | -------------------------------------------------------------------------------- /tests/plugins/unitary/test_auth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | from unittest.mock import patch 7 | from oci_mlflow.auth_plugin import OCIMLFlowAuthRequestProvider 8 | 9 | 10 | class TestOCIMLFlowAuth: 11 | def test_get_name(self): 12 | provider = OCIMLFlowAuthRequestProvider() 13 | assert provider.get_name() == "OCI_REQUEST_AUTH" 14 | 15 | @patch("oci_mlflow.auth_plugin.default_signer") 16 | def test_get_auth(self, mock_default_signer): 17 | mock_default_signer.return_value = { 18 | "config": {}, 19 | "signer": "test_default_signer", 20 | "client_kwargs": {}, 21 | } 22 | provider = OCIMLFlowAuthRequestProvider() 23 | auth = provider.get_auth() 24 | assert auth == "test_default_signer" 25 | -------------------------------------------------------------------------------- /tests/plugins/unitary/test_files/conda.yaml: -------------------------------------------------------------------------------- 1 | kind: job 2 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}" 3 | dependencies: 4 | - python=3.8 5 | spec: 6 | infrastructure: 7 | kind: infrastructure 8 | spec: 9 | compartmentId: ocid1.testCompartmentId 10 | driverShape: VM.Standard.E4.Flex 11 | driverShapeConfig: 12 | memory_in_gbs: 32 13 | ocpus: 2 14 | executorShape: VM.Standard.E4.Flex 15 | executorShapeConfig: 16 | memory_in_gbs: 32 17 | ocpus: 2 18 | language: PYTHON 19 | logsBucketUri: oci://test-log-bucket 20 | numExecutors: 1 21 | sparkVersion: 3.2.1 22 | privateEndpointId: ocid1.dataflowprivateendpoint 23 | type: dataFlow 24 | runtime: 25 | kind: runtime 26 | spec: 27 | configuration: 28 | spark.driverEnv.MLFLOW_TRACKING_URI: http://test-tracking-uri 29 | conda: 30 | type: published 31 | uri: oci://custom-conda-packs@ociodscdev/conda_environments/cpu/PySpark 3.2 32 | and Data Flow/2.0/pyspark32_p38_cpu_v2 33 | condaAuthType: resource_principal 34 | scriptBucket: oci://test-bucket 35 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}" 36 | type: dataFlow 37 | -------------------------------------------------------------------------------- /tests/plugins/unitary/test_files/invalid-file-type.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/invalid-file-type.txt -------------------------------------------------------------------------------- /tests/plugins/unitary/test_files/oci-datascience-template_test.yaml: -------------------------------------------------------------------------------- 1 | kind: job 2 | name: "{DataFlow application name. For MLflow, it will be replaced with the Project name}" 3 | dependencies: 4 | - python=3.8 5 | spec: 6 | infrastructure: 7 | kind: infrastructure 8 | spec: 9 | compartmentId: ocid1.testCompartmentId 10 | driverShape: VM.Standard.E4.Flex 11 | driverShapeConfig: 12 | memory_in_gbs: 32 13 | ocpus: 2 14 | executorShape: VM.Standard.E4.Flex 15 | executorShapeConfig: 16 | memory_in_gbs: 32 17 | ocpus: 2 18 | language: PYTHON 19 | logsBucketUri: oci://test-log-bucket 20 | numExecutors: 1 21 | sparkVersion: 3.2.1 22 | privateEndpointId: ocid1.dataflowprivateendpoint 23 | type: dataFlow 24 | runtime: 25 | kind: runtime 26 | spec: 27 | configuration: 28 | spark.driverEnv.MLFLOW_TRACKING_URI: http://test-tracking-uri 29 | conda: 30 | type: published 31 | uri: oci://custom-conda-packs@ociodscdev/conda_environments/cpu/PySpark 3.2 32 | and Data Flow/2.0/pyspark32_p38_cpu_v2 33 | model_uri: oci://custom-conda-packs@ociodscdev/conda_environments/cpu/PySpark 3.2 34 | and Data Flow/2.0/pyspark32_p38_cpu_v2 35 | condaAuthType: resource_principal 36 | scriptBucket: oci://test-bucket 37 | scriptPathURI: "{Path to the executable script. For MLflow, it will be replaced with the CMD}" 38 | type: dataFlow 39 | -------------------------------------------------------------------------------- /tests/plugins/unitary/test_files/test-empty-project: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/test-empty-project -------------------------------------------------------------------------------- /tests/plugins/unitary/test_files/test-model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/test-model -------------------------------------------------------------------------------- /tests/plugins/unitary/test_files/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oracle/oci-mlflow/1e7b9f36e5afe70d00dfc56faca6b0a8e311366d/tests/plugins/unitary/test_files/test.txt -------------------------------------------------------------------------------- /tests/plugins/unitary/test_oci_object_storage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | import os 5 | import tempfile 6 | from unittest.mock import MagicMock, Mock, patch 7 | 8 | # Copyright (c) 2023 Oracle and/or its affiliates. 9 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 10 | import pytest 11 | from mlflow.entities import FileInfo 12 | 13 | from oci_mlflow import oci_object_storage 14 | from oci_mlflow.oci_object_storage import ( 15 | ArtifactUploader, 16 | OCIObjectStorageArtifactRepository, 17 | get_token_path, 18 | get_signer, 19 | DEFAULT_DELEGATION_TOKEN_PATH, 20 | ) 21 | from oci import object_storage 22 | 23 | 24 | class DataObject: 25 | def __init__(self, name, size): 26 | self.name = name 27 | self.size = size 28 | 29 | 30 | class TestOCIObjectStorageArtifactRepository: 31 | def setup_class(cls): 32 | cls.curr_dir = os.path.dirname(os.path.abspath(__file__)) 33 | oci_object_storage.OCI_PREFIX = "" 34 | 35 | @classmethod 36 | def teardown_class(cls): 37 | oci_object_storage.OCI_PREFIX = "oci://" 38 | 39 | @pytest.fixture() 40 | def oci_artifact_repo(self): 41 | return OCIObjectStorageArtifactRepository( 42 | artifact_uri="oci://my-bucket@my-namespace/my-artifact-path" 43 | ) 44 | 45 | @pytest.fixture 46 | def mock_fsspec_open(self): 47 | with patch("fsspec.open") as mock_open: 48 | yield mock_open 49 | 50 | def test_parse_os_uri(self, oci_artifact_repo): 51 | bucket, namespace, path = oci_object_storage.parse_os_uri( 52 | "oci://my-bucket@my-namespace/my-artifact-path" 53 | ) 54 | assert bucket == "my-bucket" 55 | assert namespace == "my-namespace" 56 | assert path == "my-artifact-path" 57 | 58 | def test_parse_os_uri_with_invalid_scheme(self, oci_artifact_repo): 59 | with pytest.raises(Exception): 60 | oci_object_storage.parse_os_uri("s3://my-bucket/my-artifact-path") 61 | 62 | def test_download_file(self, oci_artifact_repo): 63 | mock_fs = MagicMock() 64 | mock_fs.download.return_value = None 65 | oci_artifact_repo.get_fs = MagicMock(return_value=mock_fs) 66 | with tempfile.TemporaryDirectory() as tmp_dir: 67 | local_path = os.path.join(tmp_dir, "my_file.txt") 68 | remote_path = "my/remote/path/my_file.txt" 69 | 70 | oci_artifact_repo._download_file( 71 | remote_file_path=remote_path, local_path=local_path 72 | ) 73 | 74 | mock_fs.download.assert_called_once_with( 75 | "oci://my-bucket@my-namespace/my-artifact-path/my/remote/path/my_file.txt", 76 | local_path, 77 | ) 78 | 79 | @patch.object(ArtifactUploader, "upload") 80 | def test_log_artifact(self, mock_upload_file, oci_artifact_repo): 81 | local_file = "test_files/test.txt" 82 | artifact_path = "logs" 83 | oci_artifact_repo.log_artifact(local_file, artifact_path) 84 | expected_dest_path = ( 85 | "oci://my-bucket@my-namespace/my-artifact-path/logs/test.txt" 86 | ) 87 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path) 88 | 89 | @patch.object(ArtifactUploader, "upload") 90 | def test_log_artifact_with_empty_path(self, mock_upload_file, oci_artifact_repo): 91 | local_file = "test_files/test.txt" 92 | artifact_path = "" 93 | oci_artifact_repo.log_artifact(local_file, artifact_path) 94 | expected_dest_path = ( 95 | "oci://my-bucket@my-namespace/my-artifact-path/test.txt" 96 | ) 97 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path) 98 | 99 | def test_log_artifact_with_whitespace(self, oci_artifact_repo): 100 | local_file = "test_files/test.txt" 101 | artifact_path = " " 102 | with pytest.raises( 103 | ValueError, 104 | match="`artifact_path` must not be whitespace string." 105 | ): 106 | oci_artifact_repo.log_artifact(local_file, artifact_path) 107 | 108 | @patch.object(ArtifactUploader, "upload") 109 | def test_log_artifact_with_none(self, mock_upload_file, oci_artifact_repo): 110 | local_file = "test_files/test.txt" 111 | artifact_path = None 112 | oci_artifact_repo.log_artifact(local_file, artifact_path) 113 | expected_dest_path = ( 114 | "oci://my-bucket@my-namespace/my-artifact-path/test.txt" 115 | ) 116 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path) 117 | 118 | @patch.object(ArtifactUploader, "upload") 119 | def test_log_artifact_with_slash_ending_path(self, mock_upload_file, oci_artifact_repo): 120 | local_file = "test_files/test.txt" 121 | artifact_path = "logs/" 122 | oci_artifact_repo.log_artifact(local_file, artifact_path) 123 | expected_dest_path = ( 124 | "oci://my-bucket@my-namespace/my-artifact-path/logs/test.txt" 125 | ) 126 | mock_upload_file.assert_called_once_with(local_file, expected_dest_path) 127 | 128 | @patch.object(ArtifactUploader, "upload") 129 | def test_log_artifacts(self, mock_upload_file, oci_artifact_repo): 130 | local_dir = os.path.join(self.curr_dir, "test_files") 131 | dest_path = "path/to/dest" 132 | oci_artifact_repo.log_artifacts(local_dir, dest_path) 133 | mock_upload_file.assert_called() 134 | 135 | @patch.object(OCIObjectStorageArtifactRepository, "get_fs") 136 | def test_delete_artifacts(self, mock_get_fs, oci_artifact_repo): 137 | mock_fs = Mock() 138 | mock_get_fs.return_value = mock_fs 139 | mock_fs.ls.return_value = ["test/file1", "test/file2", "test/folder/"] 140 | oci_artifact_repo.delete_artifacts("test") 141 | mock_fs.ls.assert_called_once_with( 142 | "oci://my-bucket@my-namespace/my-artifact-path/test", refresh=True 143 | ) 144 | assert mock_fs.delete.call_count == 3 145 | mock_fs.delete.assert_any_call("test/file1") 146 | mock_fs.delete.assert_any_call("test/file2") 147 | mock_fs.delete.assert_any_call("test/folder/") 148 | 149 | def test_list_artifacts(self): 150 | print(os.path.join(self.curr_dir, "artifacts")) 151 | 152 | oci_artifact_repo = OCIObjectStorageArtifactRepository( 153 | artifact_uri=os.path.join(self.curr_dir, "artifacts") 154 | ) 155 | 156 | artifacts = oci_artifact_repo.list_artifacts() 157 | 158 | expected_artifacts = [ 159 | FileInfo("1.txt", False, 5), 160 | FileInfo("2.txt", False, 5), 161 | FileInfo("sub_folder", True, 0), 162 | ] 163 | assert artifacts == expected_artifacts 164 | 165 | 166 | class TestArtifactUploader: 167 | def test_init(self): 168 | """Ensures the ArtifactUploader instance can be initialized.""" 169 | artifact_uploader = ArtifactUploader() 170 | assert isinstance( 171 | artifact_uploader.upload_manager, object_storage.UploadManager 172 | ) 173 | 174 | @patch.object(object_storage.UploadManager, "upload_file") 175 | def test_upload(self, mock_upload_file): 176 | """Tests uploading model artifacts.""" 177 | artifact_uploader = ArtifactUploader() 178 | 179 | local_file = "test_files/test.txt" 180 | dest_path = "oci://my-bucket@my-namespace/my-artifact-path/logs/test.txt" 181 | artifact_uploader.upload(local_file, dest_path) 182 | 183 | mock_upload_file.assert_called_with( 184 | namespace_name="my-namespace", 185 | bucket_name="my-bucket", 186 | object_name="my-artifact-path/logs/test.txt", 187 | file_path=local_file, 188 | ) 189 | 190 | 191 | class TestUtils: 192 | """Test static methods in oci_object_storage.py.""" 193 | 194 | @patch("os.path.exists") 195 | def test_get_token_path_in_df(self, mock_path): 196 | """Tests getting the token path in DF session.""" 197 | mock_path.return_value = True 198 | assert get_token_path() == DEFAULT_DELEGATION_TOKEN_PATH 199 | 200 | @patch("os.path.exists") 201 | def test_get_token_path_locally(self, mock_path): 202 | """Tests getting the token path locally.""" 203 | mock_path.return_value = False 204 | assert get_token_path() == None 205 | 206 | @patch("oci_mlflow.oci_object_storage.get_delegation_token_signer") 207 | @patch("ads.common.auth.set_auth") 208 | def test_get_signer_in_df(self, mock_set_auth, mock_get_signer): 209 | """Tests getting the storage options in DF session.""" 210 | get_signer(token_path=DEFAULT_DELEGATION_TOKEN_PATH) 211 | mock_set_auth.assert_called_once_with( 212 | signer_callable=mock_get_signer, 213 | signer_kwargs={"token_path": DEFAULT_DELEGATION_TOKEN_PATH}, 214 | ) 215 | 216 | @patch("ads.common.auth.default_signer") 217 | @patch("ads.common.auth.set_auth") 218 | def test_get_signer_locally(self, mock_set_auth, mock_default_signer): 219 | """Tests getting the storage options locally.""" 220 | expected_config = {"config": "value", "signer": "value2"} 221 | mock_default_signer.return_value = expected_config 222 | 223 | signer = get_signer(token_path=None) 224 | mock_set_auth.assert_not_called() 225 | assert signer == expected_config 226 | -------------------------------------------------------------------------------- /tests/plugins/unitary/test_telemetry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import os 8 | from unittest.mock import patch 9 | 10 | import pytest 11 | 12 | from oci_mlflow.telemetry_logging import Telemetry 13 | 14 | 15 | class TestTelemetry: 16 | """Tests the Telemetry. 17 | Class to capture telemetry sequence into the environment variable. 18 | """ 19 | 20 | def setup_method(self): 21 | self.telemetry = Telemetry("test.api") 22 | 23 | @patch.dict(os.environ, {}, clear=True) 24 | def test_init(self): 25 | """Ensures initializing Telemetry passes.""" 26 | self.telemetry = Telemetry("test.api") 27 | assert self.telemetry.environ_variable in os.environ 28 | assert os.environ[self.telemetry.environ_variable] == "" 29 | 30 | @patch.dict(os.environ, {}, clear=True) 31 | def test_add(self): 32 | """Tests adding the new value to the telemetry.""" 33 | self.telemetry.begin() 34 | self.telemetry.add("key=value").add("new_key=new_value") 35 | assert ( 36 | os.environ[self.telemetry.environ_variable] 37 | == "test.api&key=value&new_key=new_value" 38 | ) 39 | 40 | @patch.dict(os.environ, {}, clear=True) 41 | def test_begin(self): 42 | """Tests cleaning the value of the associated environment variable.""" 43 | self.telemetry.begin("key=value") 44 | assert os.environ[self.telemetry.environ_variable] == "test.api&key=value" 45 | 46 | @patch.dict(os.environ, {}, clear=True) 47 | def test_clean(self): 48 | """Ensures that telemetry associated environment variable can be cleaned.""" 49 | self.telemetry.begin() 50 | self.telemetry.add("key=value").add("new_key=new_value") 51 | assert ( 52 | os.environ[self.telemetry.environ_variable] 53 | == "test.api&key=value&new_key=new_value" 54 | ) 55 | self.telemetry.clean() 56 | assert os.environ[self.telemetry.environ_variable] == "" 57 | 58 | @pytest.mark.parametrize( 59 | "INPUT_DATA, EXPECTED_RESULT", 60 | [ 61 | ("key=va~!@#$%^*()_+lue", "key=va____________lue"), 62 | ("key=va lue", "key=va_lue"), 63 | ("key=va123***lue", "key=va123___lue"), 64 | ], 65 | ) 66 | @patch.dict(os.environ, {}, clear=True) 67 | def test__prepare(self, INPUT_DATA, EXPECTED_RESULT): 68 | """Tests replacing special characters in the telemetry input value.""" 69 | self.telemetry.begin(INPUT_DATA) 70 | assert ( 71 | os.environ[self.telemetry.environ_variable] == f"test.api&{EXPECTED_RESULT}" 72 | ) 73 | -------------------------------------------------------------------------------- /tests/plugins/unitary/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8; -*- 3 | 4 | # Copyright (c) 2023 Oracle and/or its affiliates. 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ 6 | 7 | import os 8 | from unittest.mock import patch, ANY 9 | 10 | import pytest 11 | 12 | from oci_mlflow import utils 13 | from oci_mlflow.utils import ( 14 | OCIBackendConfig, 15 | OCIProjectBackendConfig, 16 | UnsupportedAuthTypeError, 17 | generate_conda_pack_uri, 18 | generate_slug, 19 | create_conda, 20 | resolve_python_version, 21 | publish, 22 | ) 23 | 24 | 25 | class TestOCIBackendConfig: 26 | """Tests the OCIBackendConfig.""" 27 | 28 | def test_from_dict_with_empty_dict(self): 29 | config = {} 30 | 31 | result = OCIBackendConfig.from_dict(config) 32 | 33 | assert isinstance(result, OCIBackendConfig) 34 | 35 | def test_from_dict_with_valid_dict(self): 36 | config = {"oci_auth": "resource_principal", "oci_profile": "testVal2"} 37 | 38 | result = OCIBackendConfig.from_dict(config) 39 | assert isinstance(result, OCIBackendConfig) 40 | assert result.oci_auth == "resource_principal" 41 | assert result.oci_profile == "testVal2" 42 | 43 | def test_from_dict_with_invalid_dict(self): 44 | config = {"oci_auth": "test_ErrorVal", "oci_profile": "testVal2"} 45 | 46 | with pytest.raises(UnsupportedAuthTypeError): 47 | OCIProjectBackendConfig.from_dict(config) 48 | 49 | 50 | class TestOCIProjectBackendConfig: 51 | """Tests the OCIProjectBackendConfig.""" 52 | 53 | def setup_class(cls): 54 | cls.curr_dir = os.path.dirname(os.path.abspath(__file__)) 55 | 56 | def test_post_init_with_valid_values(self): 57 | config = { 58 | "oci_auth": "resource_principal", 59 | "oci_profile": "testVal2", 60 | "oci_job_template_path": "{work_dir}/test_files/oci-datascience-template_test.yaml", 61 | "project_uri": "/path/to/project", 62 | "work_dir": self.curr_dir, 63 | } 64 | 65 | project_backend_config = OCIProjectBackendConfig.from_dict(config) 66 | 67 | assert project_backend_config.oci_job_template_path == os.path.join( 68 | self.curr_dir, "test_files/oci-datascience-template_test.yaml" 69 | ) 70 | assert project_backend_config.project_uri == "/path/to/project" 71 | assert project_backend_config.work_dir == self.curr_dir 72 | 73 | def test_post_init_with_missing_project_uri(self): 74 | config_dict = { 75 | "oci_auth": "resource_principal", 76 | "oci_job_template_path": "test_files/oci-datascience-template_test.yaml", 77 | "work_dir": "/path/to/work_dir", 78 | } 79 | 80 | with pytest.raises(ValueError): 81 | OCIProjectBackendConfig.from_dict(config_dict) 82 | 83 | def test_post_init_with_missing_work_dir(self): 84 | config_dict = { 85 | "oci_auth": "resource_principal", 86 | "oci_job_template_path": "test_files/oci-datascience-template_test.yaml", 87 | "project_uri": "/path/to/project", 88 | } 89 | 90 | with pytest.raises(ValueError): 91 | OCIProjectBackendConfig.from_dict(config_dict) 92 | 93 | def test_post_init_with_missing_oci_job_template_path(self): 94 | config_dict = { 95 | "oci_auth": "resource_principal", 96 | "project_uri": "/path/to/project", 97 | "work_dir": "/path/to/work_dir", 98 | } 99 | 100 | with pytest.raises(ValueError): 101 | OCIProjectBackendConfig.from_dict(config_dict) 102 | 103 | def test_post_init_with_invalid_oci_job_template_path(self): 104 | config_dict = { 105 | "oci_job_template_path": "test_files/invalid-file-type.html", 106 | "project_uri": "/path/to/project", 107 | "work_dir": "/path/to/work_dir", 108 | } 109 | 110 | with pytest.raises(ValueError): 111 | OCIProjectBackendConfig.from_dict(config_dict) 112 | 113 | def test_post_init_with_invalid_oci_job_template_extension(self): 114 | config_dict = { 115 | "oci_job_template_path": "{work_dir}/test_files/invalid-file-type.txt", 116 | "project_uri": "/path/to/project", 117 | "work_dir": self.curr_dir, 118 | } 119 | 120 | with pytest.raises(ValueError): 121 | OCIProjectBackendConfig.from_dict(config_dict) 122 | 123 | 124 | class TestUtils: 125 | """Tests the common methods in the utils module.""" 126 | 127 | def setup_class(cls): 128 | cls.curr_dir = os.path.dirname(os.path.abspath(__file__)) 129 | 130 | def test_generate_slug(self): 131 | assert generate_slug("Test Sth", "1.0.1") == "teststh_v1_0_1" 132 | 133 | def test_generate_conda_pack_uri(self): 134 | assert generate_conda_pack_uri( 135 | "test_package", "1.0", "/path/to/prefix", "test_package_v1", False 136 | ) == os.path.join( 137 | "/path/to/prefix", "cpu", "test_package", "1.0", "test_package_v1" 138 | ) 139 | 140 | assert generate_conda_pack_uri( 141 | "test_package", "1.0", "/path/to/prefix", "test_package_v1_0", True 142 | ) == os.path.join( 143 | "/path/to/prefix", "gpu", "test_package", "1.0", "test_package_v1_0" 144 | ) 145 | 146 | @patch("oci_mlflow.utils._create") 147 | def test_create_conda(self, mock_create): 148 | mock_create.return_value = "test_return_val" 149 | assert create_conda( 150 | "dummy_name", 151 | "1", 152 | os.path.join( 153 | self.curr_dir, "test_files/oci-datascience-template_test" ".yaml" 154 | ) 155 | == "test_return_val", 156 | ) 157 | 158 | def test_resolve_python_version(self): 159 | assert ( 160 | resolve_python_version( 161 | os.path.join( 162 | self.curr_dir, "test_files/oci-datascience-template_test.yaml" 163 | ) 164 | ) 165 | == "3.8" 166 | ) 167 | 168 | @patch("oci_mlflow.utils.ConfigProcessor") 169 | @patch.object(utils, "generate_conda_pack_uri") 170 | @patch("oci_mlflow.utils._publish") 171 | def test_publish(self, mock_publish, mock_generate_conda_pack, mock_config): 172 | slug = "test_slug" 173 | conda_pack_os_prefix = "test_prefix" 174 | conda_pack_folder = "test_folder" 175 | overwrite = False 176 | ads_config = "test_config" 177 | name = "test_name" 178 | version = "test_version" 179 | gpu = False 180 | 181 | publish( 182 | slug, 183 | conda_pack_os_prefix, 184 | conda_pack_folder, 185 | overwrite, 186 | ads_config, 187 | name, 188 | version, 189 | gpu, 190 | ) 191 | 192 | mock_config.assert_called_once() 193 | mock_generate_conda_pack.assert_called_once_with( 194 | name, version, conda_pack_os_prefix, slug, gpu 195 | ) 196 | mock_publish.assert_called_once_with( 197 | conda_slug=slug, 198 | conda_uri_prefix=conda_pack_os_prefix, 199 | conda_pack_folder=conda_pack_folder, 200 | overwrite=overwrite, 201 | oci_config=ANY, 202 | oci_profile=ANY, 203 | auth_type=ANY, 204 | ) 205 | --------------------------------------------------------------------------------