├── .devcontainer ├── Dockerfile.txt └── devcontainer.json ├── .flake8 ├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── compose.yaml ├── data └── html_urls.txt ├── dev-requirements.txt ├── images ├── 16-workers-training.png ├── gradio.png ├── hf-ds-mlrun.png ├── serving-graph.png ├── training-pipeline.png ├── video-thumbnail.png └── workflow-train.png ├── mlrun.env ├── project.yaml ├── project_setup.py ├── pyproject.toml ├── requirements.txt ├── setup.py ├── src ├── data_collection.py ├── data_preprocess.py ├── serving.py ├── trainer.py └── training_workflow.py └── tutorial.ipynb /.devcontainer/Dockerfile.txt: -------------------------------------------------------------------------------- 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.236.0/containers/python-3/.devcontainer/base.Dockerfile 2 | 3 | # [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster 4 | ARG VARIANT="3.8-bullseye" 5 | FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} 6 | 7 | # [Choice] Node.js version: none, lts/*, 16, 14, 12, 10 8 | ARG NODE_VERSION="none" 9 | RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi 10 | 11 | # [Optional] If your pip requirements rarely change, uncomment this section to add them to the image. 12 | COPY requirements.txt /tmp/pip-tmp/ 13 | RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \ 14 | && rm -rf /tmp/pip-tmp 15 | 16 | # [Optional] Uncomment this section to install additional OS packages. 17 | # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 18 | # && apt-get -y install --no-install-recommends 19 | 20 | # [Optional] Uncomment this line to install global node packages. 21 | # RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 22 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.236.0/containers/python-3 3 | { 4 | "name": "MLRun NYC Taxi Tutorial", 5 | "build": { 6 | "dockerfile": "Dockerfile", 7 | "context": "..", 8 | "args": { 9 | // Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6 10 | // Append -bullseye or -buster to pin to an OS version. 11 | // Use -bullseye variants on local on arm64/Apple Silicon. 12 | "VARIANT": "3.8", 13 | // Options 14 | "NODE_VERSION": "none" 15 | } 16 | }, 17 | "containerEnv": { 18 | "MLRUN_ENV_FILE": "${containerWorkspaceFolder}/mlrun.env", 19 | "SHARED_DIR": "~/mlrun-data", 20 | "MLRUN_TAG": "1.2.0-rc21" 21 | }, 22 | // Configure tool-specific properties. 23 | "customizations": { 24 | // Configure properties specific to VS Code. 25 | "vscode": { 26 | // Set *default* container specific settings.json values on container create. 27 | "settings": { 28 | "python.defaultInterpreterPath": "/usr/local/bin/python", 29 | "python.linting.enabled": true, 30 | "python.linting.pylintEnabled": true, 31 | "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", 32 | "python.formatting.blackPath": "/usr/local/py-utils/bin/black", 33 | "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", 34 | "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", 35 | "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", 36 | "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", 37 | "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", 38 | "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", 39 | "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" 40 | }, 41 | 42 | // Add the IDs of extensions you want installed when the container is created. 43 | "extensions": [ 44 | "ms-python.python", 45 | "ms-python.vscode-pylance" 46 | ] 47 | } 48 | }, 49 | 50 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 51 | "forwardPorts": [8060, 8070, 8080], 52 | // "runArgs": [ "--network", "host"], 53 | "portsAttributes": {"8060": {"label": "MLRun UI"}, "8070": {"label": "Nuclio UI"}, "8080": {"label": "MLRun API"}}, 54 | 55 | // Use 'postCreateCommand' to run commands after the container is created. 56 | // "postCreateCommand": "chmod +x /workspaces/tutorials/start.sh", 57 | // "postStartCommand": "echo XXX=$(ip route get 1.2.3.4 | awk '{print $7}') > xx.env", 58 | 59 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 60 | "remoteUser": "vscode", 61 | "features": { 62 | "docker-from-docker": "latest", 63 | "git": "latest", 64 | "jupyterlab": "latest" 65 | }, 66 | "hostRequirements": { 67 | "cpus": 4, 68 | "memory": "8gb", 69 | "storage": "32gb" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | extend-ignore = E203, W503 4 | 5 | # exclude these dirs 6 | exclude = .git,venv,playground 7 | 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - development 7 | push: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | lint: 13 | name: Lint code (Python ${{ matrix.python-version }}) 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: [3.9] 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - uses: actions/cache@v2 25 | with: 26 | path: ~/.cache/pip 27 | key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/dev-requirements.txt') }} 28 | restore-keys: | 29 | ${{ runner.os }}-pip-${{ matrix.python-version }}- 30 | ${{ runner.os }}-pip- 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip~=22.3.0 34 | pip install -r dev-requirements.txt 35 | - name: Lint 36 | run: make lint 37 | 38 | 39 | tests: 40 | name: Run tests (Python ${{ matrix.python-version }}) 41 | runs-on: ubuntu-latest 42 | strategy: 43 | matrix: 44 | python-version: [3.9] 45 | steps: 46 | - uses: actions/checkout@v3 47 | - name: Set up python ${{ matrix.python-version }} 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: ${{ matrix.python-version }} 51 | - uses: actions/cache@v2 52 | with: 53 | path: ~/.cache/pip 54 | key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }} 55 | restore-keys: | 56 | ${{ runner.os }}-pip-${{ matrix.python-version }}- 57 | ${{ runner.os }}-pip- 58 | - name: Install automation scripts dependencies and add mlrun to dev packages 59 | run: pip install -r requirements.txt -r dev-requirements.txt 60 | - name: Test package 61 | run: make test 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mlrun/ml-models-gpu:1.3.0 2 | RUN pip install -U transformers[deepspeed] 3 | RUN pip install -U datasets 4 | RUN pip install -U accelerate 5 | RUN pip install -U evaluate 6 | RUN pip install -U protobuf==3.20.* 7 | RUN pip install -U mpi4py 8 | RUN conda install -c "nvidia/label/cuda-11.7.1" cuda-nvprof -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | PYTHON_INTERPRETER = python3 3 | SHARED_DIR ?= ~/mlrun-data 4 | MLRUN_TAG ?= 1.3.0 5 | HOST_IP ?=$$(ip route get 1.2.3.4 | awk '{print $$7}') 6 | CONDA_ENV ?= mlrun 7 | SHELL=/bin/bash 8 | CONDA_PY_VER ?= 3.9 9 | CONDA_ACTIVATE = source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate 10 | 11 | ################################################################################# 12 | # COMMANDS # 13 | ################################################################################# 14 | 15 | .PHONY: help 16 | help: ## Display available commands 17 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 18 | 19 | .PHONY: all 20 | all: 21 | $(error please pick a target) 22 | 23 | .PHONY: install-requirements 24 | install-requirements: ## Install all requirements needed for development 25 | $(PYTHON_INTERPRETER) -m pip install -r requirements.txt -r dev-requirements.txt 26 | 27 | 28 | .PHONY: package-wheel 29 | package-wheel: clean ## Build python package wheel 30 | python setup.py bdist_wheel 31 | 32 | .PHONY: clean 33 | clean: ## Clean python package build artifacts 34 | rm -rf build 35 | rm -rf dist 36 | find . -type f -name "*.py[co]" -delete 37 | find . -type d -name "__pycache__" -delete 38 | 39 | .PHONY: fmt 40 | fmt: ## Format the code (using black and isort) 41 | @echo "Running black fmt..." 42 | $(PYTHON_INTERPRETER) -m black src 43 | $(PYTHON_INTERPRETER) -m isort src 44 | 45 | .PHONY: lint 46 | lint: fmt-check flake8 ## Run lint on the code 47 | 48 | .PHONY: fmt-check 49 | fmt-check: ## Format and check the code (using black and isort) 50 | @echo "Running black+isort fmt check..." 51 | $(PYTHON_INTERPRETER) -m black --check --diff src 52 | $(PYTHON_INTERPRETER) -m isort --check --diff src 53 | 54 | .PHONY: flake8 55 | flake8: ## Run flake8 lint 56 | @echo "Running flake8 lint..." 57 | $(PYTHON_INTERPRETER) -m flake8 src 58 | 59 | .PHONY: mlrun-docker 60 | mlrun-docker: ## Start MLRun & Nuclio containers (using Docker compose) 61 | mkdir $(SHARED_DIR) -p 62 | @echo "HOST_IP=$(HOST_IP)" > .env 63 | SHARED_DIR=$(SHARED_DIR) TAG=$(MLRUN_TAG) docker-compose -f compose.yaml up -d 64 | @echo "use docker-compose stop / logs commands to stop or view logs" 65 | 66 | .PHONY: mlrun-api 67 | mlrun-api: ## Run MLRun DB locally (as process) 68 | @echo "Installing MLRun API dependencies ..." 69 | $(PYTHON_INTERPRETER) -m pip install uvicorn~=0.17.0 dask-kubernetes~=0.11.0 apscheduler~=3.6 sqlite3-to-mysql~=1.4 70 | @echo "Starting local mlrun..." 71 | MLRUN_ARTIFACT_PATH=$$(realpath ./artifacts) MLRUN_ENV_FILE= mlrun db -b 72 | 73 | .PHONY: conda-env 74 | conda-env: ## Create a conda environment 75 | @echo "Creating new conda environment $(CONDA_ENV)..." 76 | conda create -n $(CONDA_ENV) -y python=$(CONDA_PY_VER) ipykernel graphviz pip 77 | test -s ./mlrun.env && conda env config vars set -n $(CONDA_ENV) MLRUN_ENV_FILE=$$(realpath ./mlrun.env) 78 | @echo "Installing requirements.txt..." 79 | $(CONDA_ACTIVATE) $(CONDA_ENV); pip install -r requirements.txt 80 | @echo -e "\nTo run mlrun API as a local process type:\n conda activate $(CONDA_ENV) && make mlrun-api" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **MLOpsPedia** - The MLOps Master Bot 2 | 3 | huggingface-mlrun 4 | 5 | This demo demonstrates how to fine tune a LLM and build an ML application: the **MLOps master bot**! We'll train [`falcon-7b`](https://huggingface.co/tiiuae/falcon-7b) on [**Iguazio**'s MLOps blogs](https://www.iguazio.com/blog/) and cover how easy it is to take a model and code from development to production. Even if its a big scary LLM model, MLRun will take care of the dirty work! 6 | 7 | We will use: 8 | * [**HuggingFace**](https://huggingface.co/) - as the main machine learning framework to get the model and tokenizer. 9 | * [**DeepSpeed**](https://www.deepspeed.ai/) - as the distributed training framework. 10 | * and [**MLRun**](https://www.mlrun.org/) - as the orchastraitor to operationalize it, moving it from development to production. 11 | 12 | The demo contains a single [notebook](./tutorial.ipynb) that covers the two main stages in every MLOps project: 13 | 14 | * **Training Pipeline Automation** - Demonstrating how to get an existing model (`falcon-7b`) from HuggingFace's Transformers package and operationalize it through all of its life cycle phases: data collection, data ppreparation, training and evaluation, as a fully automated pipeline. 15 | * **Application Serving Pipeline** - Showing how to productize the newly trained LLM as a serverless function. 16 | 17 | You can find all the python source code under [/src](./src) 18 | 19 | [](http://www.youtube.com/watch?v=aAU54bTH6_o "MLOps for Generative AI with MLRun") 20 | 21 | Be sure to check out Yaron Haviv's video [Deploying Hugging Face Models to Production at Scale with GPUs](http://www.youtube.com/watch?v=aAU54bTH6_o) 22 | to get a walkthrough of a simillar demo. 23 | 24 | ___ 25 | 26 | ## Installation 27 | 28 | This project can run in different development environments: 29 | * Local computer (using PyCharm, VSCode, Jupyter, etc.) 30 | * Inside GitHub Codespaces 31 | * Other managed Jupyter environments 32 | 33 | ### Install the code and mlrun client 34 | 35 | To get started, fork this repo into your GitHub account and clone it into your development environment. 36 | 37 | To install the package dependencies (not required in GitHub codespaces) use: 38 | 39 | make install-requirements 40 | 41 | If you prefer to use Conda use this instead (to create and configure a conda env): 42 | 43 | make conda-env 44 | 45 | > Make sure you open the notebooks and select the `mlrun` conda environment 46 | 47 | ### Install or connect to MLRun service/cluster 48 | 49 | The MLRun service and computation can run locally (minimal setup) or over a remote Kubernetes environment. 50 | 51 | If your development environment support docker and have enough CPU resources run: 52 | 53 | make mlrun-docker 54 | 55 | > MLRun UI can be viewed in: http://localhost:8060 56 | 57 | If your environment is minimal, run mlrun as a process (no UI): 58 | 59 | [conda activate mlrun &&] make mlrun-api 60 | 61 | For MLRun to run properly you should set your client environment, this is not required when using **codespaces**, the mlrun **conda** environment, or **iguazio** managed notebooks. 62 | 63 | Your environment should include `MLRUN_ENV_FILE= ` (point to the mlrun .env file 64 | in this repo), see [mlrun client setup](https://docs.mlrun.org/en/latest/install/remote.html) instructions for details. 65 | 66 | > Note: You can also use a remote MLRun service (over Kubernetes), instead of starting a local mlrun, 67 | > edit the [mlrun.env](./mlrun.env) and specify its address and credentials 68 | -------------------------------------------------------------------------------- /compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | init_nuclio: 3 | image: alpine:3.16 4 | command: 5 | - "/bin/sh" 6 | - "-c" 7 | - | 8 | mkdir -p /etc/nuclio/config/platform; \ 9 | cat << EOF | tee /etc/nuclio/config/platform/platform.yaml 10 | runtime: 11 | common: 12 | env: 13 | MLRUN_DBPATH: http://${HOST_IP:?err}:8080 14 | local: 15 | defaultFunctionContainerNetworkName: mlrun 16 | defaultFunctionRestartPolicy: 17 | name: always 18 | maxRetryCount: 0 19 | defaultFunctionVolumes: 20 | - volume: 21 | name: mlrun-stuff 22 | hostPath: 23 | path: ${SHARED_DIR:?err} 24 | volumeMount: 25 | name: mlrun-stuff 26 | mountPath: /home/jovyan/data/ 27 | logger: 28 | sinks: 29 | myStdoutLoggerSink: 30 | kind: stdout 31 | system: 32 | - level: debug 33 | sink: myStdoutLoggerSink 34 | functions: 35 | - level: debug 36 | sink: myStdoutLoggerSink 37 | EOF 38 | volumes: 39 | - nuclio-platform-config:/etc/nuclio/config 40 | 41 | mlrun-api: 42 | image: "mlrun/mlrun-api:${TAG:-1.1.2}" 43 | ports: 44 | - "8080:8080" 45 | environment: 46 | MLRUN_ARTIFACT_PATH: "${SHARED_DIR}/{{project}}" 47 | # using local storage, meaning files / artifacts are stored locally, so we want to allow access to them 48 | MLRUN_HTTPDB__REAL_PATH: /data 49 | MLRUN_HTTPDB__DATA_VOLUME: "${SHARED_DIR}" 50 | MLRUN_LOG_LEVEL: DEBUG 51 | MLRUN_NUCLIO_DASHBOARD_URL: http://nuclio:8070 52 | MLRUN_HTTPDB__DSN: "sqlite:////data/mlrun.db?check_same_thread=false" 53 | MLRUN_UI__URL: http://localhost:8060 54 | # not running on k8s meaning no need to store secrets 55 | MLRUN_SECRET_STORES__KUBERNETES__AUTO_ADD_PROJECT_SECRETS: "false" 56 | # let mlrun control nuclio resources 57 | MLRUN_HTTPDB__PROJECTS__FOLLOWERS: "nuclio" 58 | volumes: 59 | - "${SHARED_DIR:?err}:/data" 60 | networks: 61 | - mlrun 62 | 63 | mlrun-ui: 64 | image: "mlrun/mlrun-ui:${TAG:-1.1.2}" 65 | ports: 66 | - "8060:8090" 67 | environment: 68 | MLRUN_API_PROXY_URL: http://mlrun-api:8080 69 | MLRUN_NUCLIO_MODE: enable 70 | MLRUN_NUCLIO_API_URL: http://nuclio:8070 71 | MLRUN_NUCLIO_UI_URL: http://localhost:8070 72 | networks: 73 | - mlrun 74 | 75 | nuclio: 76 | image: "quay.io/nuclio/dashboard:${NUCLIO_TAG:-stable-amd64}" 77 | ports: 78 | - "8070:8070" 79 | environment: 80 | NUCLIO_DASHBOARD_EXTERNAL_IP_ADDRESSES: "${HOST_IP:?err}" 81 | volumes: 82 | - /var/run/docker.sock:/var/run/docker.sock 83 | - nuclio-platform-config:/etc/nuclio/config 84 | depends_on: 85 | - init_nuclio 86 | networks: 87 | - mlrun 88 | 89 | volumes: 90 | nuclio-platform-config: {} 91 | 92 | networks: 93 | mlrun: 94 | name: mlrun 95 | -------------------------------------------------------------------------------- /data/html_urls.txt: -------------------------------------------------------------------------------- 1 | https://www.iguazio.com/blog/iguazio-releases-data-science-platform-version-2-8/ 2 | https://www.iguazio.com/blog/intelligent-edge-iguazio-google/ 3 | https://www.iguazio.com/blog/top-9-odsc-europe-sessions-you-cant-miss/ 4 | https://www.iguazio.com/blog/cloud-native-will-shake-up-enterprise-storage/ 5 | https://www.iguazio.com/blog/building-an-automated-ml-pipeline-with-a-feature-store-using-iguazio-snowflake/ 6 | https://www.iguazio.com/blog/concept-drift-and-the-impact-of-covid-19-on-data-science/ 7 | https://www.iguazio.com/blog/odsc-east-boston-2022-top-11-sessions-for-ai-and-ml-professionals-to-attend/ 8 | https://www.iguazio.com/blog/idc-mlopmarketscape-2022/ 9 | https://www.iguazio.com/blog/iguazio-listed-in-7-gartner-hype-cycles-for-2021/ 10 | https://www.iguazio.com/blog/announcing-the-winners-mlops-for-good-hackathon/ 11 | https://www.iguazio.com/blog/the-importance-of-data-storytelling-in-shaping-a-data-science-product/ 12 | https://www.iguazio.com/blog/modernize-it-infrastructure/ 13 | https://www.iguazio.com/blog/implementing-automation-and-an-mlops-framework-for-enterprise-scale-ml/ 14 | https://www.iguazio.com/blog/automating-ml-pipelines-on-azure-and-azure-stack/ 15 | https://www.iguazio.com/blog/real-time-streaming-for-data-science/ 16 | https://www.iguazio.com/blog/dcos-apps/ 17 | https://www.iguazio.com/blog/iguazio-receives-an-honorable-mention-in-the-2021-gartner-magic-quadrant-for-data-science-and-machine-learning-platforms/ 18 | https://www.iguazio.com/blog/gartner-2022-market-guide-for-dsml-engineering-platforms/ 19 | https://www.iguazio.com/blog/can-open-source-serverless-be-simpler-than-lambda/ 20 | https://www.iguazio.com/blog/cncf-webinar-serverless-ai/ 21 | https://www.iguazio.com/blog/2018-can-cloud-big-data-ai-stand-turmoil/ 22 | https://www.iguazio.com/blog/2022-predictions/ 23 | https://www.iguazio.com/blog/mlops-for-python/ 24 | https://www.iguazio.com/blog/mlops-predictions-for-2023/ 25 | https://www.iguazio.com/blog/adopting-a-production-first-approach-to-enterprise-ai/ 26 | https://www.iguazio.com/blog/from-automl-to-automlops/ 27 | https://www.iguazio.com/blog/odscwest2021/ 28 | https://www.iguazio.com/blog/top-10-recommended-mlops-world-2021-sessions/ 29 | https://www.iguazio.com/blog/breaking-the-silos-between-data-scientists-engineers-and-devops-with-new-mlops-practices/ 30 | https://www.iguazio.com/blog/top-8-machine-learning-resources-for-data-scientists-data-engineers-and-everyone/ 31 | https://www.iguazio.com/blog/azure-synapse-analytics-and-iguazio/ 32 | https://www.iguazio.com/blog/how-to-tap-into-higher-level-abstraction-efficiency-automation-to-simplify-your-ai-ml-journey/ 33 | https://www.iguazio.com/blog/how-seagate-runs-advanced-manufacturing-at-scale-with-iguazio/ 34 | https://www.iguazio.com/blog/predictive-real-time-operational-ml-pipeline-fighting-customer-churn/ 35 | https://www.iguazio.com/blog/build-an-ai-app-in-under-20-minutes/ 36 | https://www.iguazio.com/blog/deploying-machine-learning-models-for-real-time-predictions-checklist/ 37 | https://www.iguazio.com/blog/data-science-post-hadoop/ 38 | https://www.iguazio.com/blog/wanted-a-faster-storage-stack/ 39 | https://www.iguazio.com/blog/kubernetes-the-open-scalable-approach-to-ml-pipelines/ 40 | https://www.iguazio.com/blog/vmware-on-aws-a-scorecard-for-winners-and-losers/ 41 | https://www.iguazio.com/blog/aws-reinvent-data-serverless-ai/ 42 | https://www.iguazio.com/blog/beyond-hyped-iguazio-named-in-8-gartner-hype-cycles-for-2022/ 43 | https://www.iguazio.com/blog/ai-ml-and-roi-why-your-balance-sheet-cares-about-your-technology-choices/ 44 | https://www.iguazio.com/blog/orchestrating-ml-pipelines-scale-kubeflow/ 45 | https://www.iguazio.com/blog/using-automated-model-management-for-cpg-trade-success/ 46 | https://www.iguazio.com/blog/spark-over-kubernetes/ 47 | https://www.iguazio.com/blog/announcing-iguazio-version-3-0-breaking-the-silos-for-faster-deployment/ 48 | https://www.iguazio.com/blog/the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml-part-4/ 49 | https://www.iguazio.com/blog/accelerating-ml-deployment-in-hybrid-environments/ 50 | https://www.iguazio.com/blog/it-worked-fine-in-jupyter-now-what/ 51 | https://www.iguazio.com/blog/kubeflow-vs-mlflow-vs-mlrun/ 52 | https://www.iguazio.com/blog/part-one-the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml/ 53 | https://www.iguazio.com/blog/handling-large-datasets-with-mlops-dask-on-kubernetes/ 54 | https://www.iguazio.com/blog/faster-ai-development-serverless/ 55 | https://www.iguazio.com/blog/nuclio-future-serverless-computing/ 56 | https://www.iguazio.com/blog/how-to-build-real-time-feature-engineering-with-a-feature-store/ 57 | https://www.iguazio.com/blog/nyc-meetup-jan2018/ 58 | https://www.iguazio.com/blog/distributed-feature-store-ingestion-with-iguazio-snowflake-and-spark/ 59 | https://www.iguazio.com/blog/iguazio-raises-33m-accelerate-digital-transformation/ 60 | https://www.iguazio.com/blog/the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml-part-2/ 61 | https://www.iguazio.com/blog/serverless-can-it-simplify-data-science-projects/ 62 | https://www.iguazio.com/blog/machine-learning-hard/ 63 | https://www.iguazio.com/blog/free-manufacturing-datasets/ 64 | https://www.iguazio.com/blog/building-real-time-ml-pipelines-with-a-feature-store/ 65 | https://www.iguazio.com/blog/paving-the-data-science-dirt-road/ 66 | https://www.iguazio.com/blog/horovod-for-deep-learning-on-a-gpu-cluster/ 67 | https://www.iguazio.com/blog/using-containers-as-mini-vms-is-not-cloud-native/ 68 | https://www.iguazio.com/blog/top-9-recommended-odsc-europe-2021-sessions/ 69 | https://www.iguazio.com/blog/realtime-bigdata/ 70 | https://www.iguazio.com/blog/python-pandas-performance/ 71 | https://www.iguazio.com/blog/iguazio-rvmworld-2017-vmware-feeds-off-openstack-decay/ 72 | https://www.iguazio.com/blog/how-gpuaas-on-kubeflow-can-boost-your-productivity/ 73 | https://www.iguazio.com/blog/mlops-nyc-sessions/ 74 | https://www.iguazio.com/blog/2017-predictions-clouds-thunder-and-fog/ 75 | https://www.iguazio.com/blog/odsc-east-2023/ 76 | https://www.iguazio.com/blog/join-us-at-nvidia-gtc-2021/ 77 | https://www.iguazio.com/blog/mckinsey-acquires-iguazio-our-startups-journey/ 78 | https://www.iguazio.com/blog/git-based-ci-cd-for-machine-learning-mlops/ 79 | https://www.iguazio.com/blog/mlops-for-good-hackathon-roundup/ 80 | https://www.iguazio.com/blog/big-data-must-begin-with-clean-slate/ 81 | https://www.iguazio.com/blog/suse-iguazio/ 82 | https://www.iguazio.com/blog/how-to-run-workloads-on-spark-operator-with-dynamic-allocation-using-mlrun/ 83 | https://www.iguazio.com/blog/will-kubernetes-sink-the-hadoop-ship/ 84 | https://www.iguazio.com/blog/5-incredible-data-science-solutions-for-real-world-problems/ 85 | https://www.iguazio.com/blog/mlops-challenges-solutions-future-trends/ 86 | https://www.iguazio.com/blog/cloud-data-services-sprawl-its-complicated/ 87 | https://www.iguazio.com/blog/predicting-1st-day-churn-in-real-time/ 88 | https://www.iguazio.com/blog/machine-learning-experiment-tracking-from-zero-to-hero-in-2-lines-of-code/ 89 | https://www.iguazio.com/blog/how-to-bring-breakthrough-performance-and-productivity-to-ai-ml-projects/ 90 | https://www.iguazio.com/blog/how-to-deploy-an-mlrun-project-in-a-ci-cd-process-with-jenkins-pipeline/ 91 | https://www.iguazio.com/blog/iguazio-named-in-forresters-now-tech-ai-ml-platforms-q1-2022/ 92 | https://www.iguazio.com/blog/the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml-part-3/ 93 | https://www.iguazio.com/blog/what-are-feature-stores-and-why-are-they-critical-for-scaling-data-science/ 94 | https://www.iguazio.com/blog/reinventing-data-services/ 95 | https://www.iguazio.com/blog/re-structure-in-big-data/ 96 | https://www.iguazio.com/blog/top-22-free-healthcare-datasets-for-machine-learning/ 97 | https://www.iguazio.com/blog/operationalizing-machine-learning-for-the-automotive-future/ 98 | https://www.iguazio.com/blog/automating-mlops-for-deep-learning-how-to-operationalize-dl-with-minimal-effort/ 99 | https://www.iguazio.com/blog/iguazio-named-a-fast-moving-leader-by-gigaom-in-the-radar-for-mlops-report/ 100 | https://www.iguazio.com/blog/data-science-salon-review-elevating-data-science-practices-for-media-entertainment-advertising/ 101 | https://www.iguazio.com/blog/wrapping-up-serverless-nyc-2018/ 102 | https://www.iguazio.com/blog/the-next-gen-digital-transformation-cloud-native-data-platforms/ 103 | https://www.iguazio.com/blog/best-practices-for-succeeding-with-mlops/ 104 | https://www.iguazio.com/blog/did-amazon-just-kill-open-source/ 105 | https://www.iguazio.com/blog/cloud-native-storage-primer/ 106 | https://www.iguazio.com/blog/serverless-background-challenges-and-future/ 107 | https://www.iguazio.com/blog/experiment-tracking/ 108 | https://www.iguazio.com/blog/continuous-analytics-real-time-meets-cloud-native/ 109 | https://www.iguazio.com/blog/concept-drift-deep-dive-how-to-build-a-drift-aware-ml-system/ 110 | https://www.iguazio.com/blog/building-ml-pipelines-over-federated-data-compute-environments/ 111 | https://www.iguazio.com/blog/top-8-recommended-mlops-world-2022-sessions/ 112 | https://www.iguazio.com/blog/it-vendors-dont-stand-a-chance-against-the-cloud/ 113 | https://www.iguazio.com/blog/ml-workflows-what-can-you-automate/ 114 | https://www.iguazio.com/blog/iguazio-collaborates-with-equinix-to-offer-data-centric-hybrid-cloud-solutions/ 115 | https://www.iguazio.com/blog/gigaom-names-iguazio-a-leader-and-outperformer-for-2022/ 116 | https://www.iguazio.com/blog/iguazio-nvidia-edge/ 117 | https://www.iguazio.com/blog/extending-kubeflow-into-an-end-to-end-ml-solution/ 118 | https://www.iguazio.com/blog/iguazio-listed-in-five-2020-gartner-hype-cycle-reports/ 119 | https://www.iguazio.com/blog/data-science-trends-2020/ 120 | https://www.iguazio.com/blog/operationalizing-data-science/ 121 | https://www.iguazio.com/blog/using-snowflake-and-dask-for-large-scale-ml-workloads/ 122 | https://www.iguazio.com/blog/best-13-free-financial-datasets-for-machine-learning/ 123 | https://www.iguazio.com/blog/introduction-to-tf-serving/ 124 | https://www.iguazio.com/blog/hcis-journey-to-mlops-efficiency/ 125 | https://www.iguazio.com/blog/streamlined-iot-at-scale-with-iguazio/ 126 | https://www.iguazio.com/blog/iguazio-product-update-optimize-your-ml-workload-costs-with-aws-ec2-spot-instances/ 127 | https://www.iguazio.com/blog/top-10-odsc-west-sessions-you-must-attend/ 128 | https://www.iguazio.com/blog/iguazio-named-a-leader-and-outperformer-in-gigaom-radar-for-mlops-2022/ 129 | https://www.iguazio.com/blog/deploying-your-hugging-face-models-to-production-at-scale-with-mlrun/ 130 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest~=5.4 2 | black~=24.8 3 | isort~=5.7 4 | flake8~=5.0 5 | -------------------------------------------------------------------------------- /images/16-workers-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/16-workers-training.png -------------------------------------------------------------------------------- /images/gradio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/gradio.png -------------------------------------------------------------------------------- /images/hf-ds-mlrun.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/hf-ds-mlrun.png -------------------------------------------------------------------------------- /images/serving-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/serving-graph.png -------------------------------------------------------------------------------- /images/training-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/training-pipeline.png -------------------------------------------------------------------------------- /images/video-thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/video-thumbnail.png -------------------------------------------------------------------------------- /images/workflow-train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/workflow-train.png -------------------------------------------------------------------------------- /mlrun.env: -------------------------------------------------------------------------------- 1 | # default env vars, will be loaded once MLRun imports/starts 2 | # write here remote cluster credentials, addresses, etc. 3 | # uncomment the relevant lines and set with proper parameters 4 | 5 | # local/remote MLRun service address 6 | MLRUN_DBPATH=http://localhost:8080 7 | 8 | # if Nuclio not detected simulate it with mock 9 | MLRUN_MOCK_NUCLIO_DEPLOYMENT=auto 10 | 11 | # Iguazio cluster and V3IO credentials (for remote cluster) 12 | # V3IO_USERNAME= 13 | # V3IO_ACCESS_KEY= 14 | 15 | # AWS S3/services credentials 16 | # AWS_ACCESS_KEY_ID= 17 | # AWS_SECRET_ACCESS_KEY= 18 | 19 | # The Azure connection string which points at a storage account. For example: 20 | # DefaultEndpointsProtocol=https;AccountName=myAcct;AccountKey=XXXX;EndpointSuffix=core.windows.net 21 | # AZURE_STORAGE_CONNECTION_STRING= 22 | -------------------------------------------------------------------------------- /project.yaml: -------------------------------------------------------------------------------- 1 | kind: project 2 | metadata: 3 | name: mlopspedia-bot-yonis 4 | spec: 5 | params: 6 | source: git://github.com/mlrun/demo-llm-tuning.git#main 7 | default_image: yonishelach/mlrun-llm 8 | functions: 9 | - url: src/data_collection.py 10 | name: data-collecting 11 | kind: job 12 | image: mlrun/mlrun 13 | - url: src/data_preprocess.py 14 | name: data-preparing 15 | kind: job 16 | - url: src/trainer.py 17 | name: training 18 | kind: job 19 | - name: serving 20 | spec: 21 | kind: serving 22 | metadata: 23 | name: serving 24 | project: mlopspedia-bot-yonis 25 | spec: 26 | command: '' 27 | args: [] 28 | image: yonishelach/mlrun-llm 29 | build: 30 | functionSourceCode: import json
import os
import zipfile
from typing import Any, Dict

import evaluate
import mlrun.artifacts
import numpy as np
import torch
import transformers
from mlrun.serving.v2_serving import V2ModelServer
from peft import PeftModel

SUBJECT_MARK = "### Human: "
CONTENT_MARK = "\n### Assistant: "
PROMPT_FORMAT = SUBJECT_MARK + "{}" + CONTENT_MARK


def preprocess(request: dict) -> dict:
    """
    convert the request to the required structure for the predict function

    :param request: A http request that contains the prompt
    """
    # Read bytes:
    if isinstance(request, bytes):
        request = json.loads(request)

    # Get the prompt:
    prompt = request.pop("prompt")

    # Format the prompt as subject:
    prompt = PROMPT_FORMAT.format(str(prompt))

    # Update the request and return:
    request = {"inputs": [{"prompt": [prompt], **request}]}
    return request


class LLMModelServer(V2ModelServer):
    """
    This is temporary and will be built in mlrun 1.5.0
    """

    def __init__(
        self,
        context: mlrun.MLClientCtx = None,
        name: str = None,
        model_class: str = "AutoModelForCausalLM",
        tokenizer_class: str = "AutoTokenizer",
        # model args:
        model_args: dict = None,
        # Load from MLRun args:
        model_path: str = None,
        # Load from hub args:
        model_name: str = None,
        tokenizer_name: str = None,
        # Deepspeed args:
        use_deepspeed: bool = False,
        n_gpus: int = 1,
        is_fp16: bool = True,
        # peft model:
        peft_model: str = None,
        # Inference args:
        **class_args,
    ):
        # Initialize the base server:
        super(LLMModelServer, self).__init__(
            context=context,
            name=name,
            model_path=model_path,
            **class_args,
        )

        # Save class names:
        self.model_class = model_class
        self.tokenizer_class = tokenizer_class

        # Save hub loading parameters:
        self.model_name = model_name
        self.tokenizer_name = tokenizer_name or self.model_name

        # Save load model arguments:
        self.model_args = model_args

        # Save deepspeed parameters:
        self.use_deepspeed = use_deepspeed
        self.n_gpus = n_gpus
        self.is_fp16 = is_fp16

        # PEFT parameters:
        self.peft_model = peft_model

        # Prepare variables for future use:
        self.model = None
        self.tokenizer = None
        self._model_class = None
        self._tokenizer_class = None

    def load(self):
        # Get classes:
        self._model_class = getattr(transformers, self.model_class)
        self._tokenizer_class = getattr(transformers, self.tokenizer_class)

        # Load the model and tokenizer:
        if self.model_path:
            self._load_from_mlrun()
        else:
            self._load_from_hub()

        # Use deepspeed if needed:
        if self.use_deepspeed:
            import deepspeed

            self.model = deepspeed.init_inference(
                model=self.model,
                mp_size=self.n_gpus,
                dtype=torch.float16 if self.is_fp16 else torch.float32,
                replace_method="auto",
                replace_with_kernel_inject=True,
            )
        if self.peft_model:
            self._load_peft_model()

    def _extract_model(self, url):
        # Get the model artifact and file:
        (
            model_file,
            model_artifact,
            extra_data,
        ) = mlrun.artifacts.get_model(url)

        # Read the name:
        model_name = model_artifact.spec.db_key

        # Extract logged model files:
        model_directory = os.path.join(os.path.dirname(model_file), model_name)
        with zipfile.ZipFile(model_file, "r") as zip_file:
            zip_file.extractall(model_directory)
        return model_directory

    def _load_peft_model(self):
        model_directory = self._extract_model(self.peft_model)
        self.model = PeftModel.from_pretrained(self.model, model_directory)
        self.model.eval()

    def _load_from_mlrun(self):
        model_directory = self._extract_model(self.model_path)

        # Loading the saved pretrained tokenizer and model:
        self.tokenizer = self._tokenizer_class.from_pretrained(model_directory)
        self.model = self._model_class.from_pretrained(
            model_directory, **self.model_args
        )

    def _load_from_hub(self):
        # Loading the pretrained tokenizer and model:
        self.tokenizer = self._tokenizer_class.from_pretrained(
            self.tokenizer_name,
            model_max_length=512,
        )
        self.model = self._model_class.from_pretrained(
            self.model_name, **self.model_args
        )

    def predict(self, request: Dict[str, Any]) -> dict:
        # Get the inputs:
        kwargs = request["inputs"][0]
        prompt = kwargs.pop("prompt")[0]

        # Tokenize:
        inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
        if self.model.device.type == "cuda":
            inputs = inputs.cuda()

        # Get the pad token id:
        pad_token_id = self.tokenizer.eos_token_id

        # Infer through the model:
        output = self.model.generate(
            input_ids=inputs,
            do_sample=True,
            num_return_sequences=1,
            pad_token_id=pad_token_id,
            **kwargs,
        )

        # Detokenize:
        prediction = self.tokenizer.decode(output[0], skip_special_tokens=True)

        return {"prediction": prediction, "prompt": prompt}

    def explain(self, request: Dict) -> str:
        return f"LLM model server named {self.name}"


def postprocess(inputs: dict) -> dict:
    """
    Postprocessing the generated output of the model
    """
    # Read the prediction:
    prediction = inputs["outputs"]["prediction"]

    # Look for a 'Content: ' mark to know the model found the subject, otherwise, it is probably garbage:
    content_index = prediction.find(CONTENT_MARK)
    if content_index == -1:
        output = f"I'm not sure about it but I'll do my best: {prediction}"
    else:
        output = prediction[content_index + len(CONTENT_MARK) :]

    return {
        "inputs": [
            {"prediction": output.strip(), "prompt": inputs["outputs"]["prompt"]}
        ]
    }


class ToxicityClassifierModelServer(V2ModelServer):
    """
    model that checks if the text contain toxicity language.
    """

    def __init__(self, context, name: str, threshold: float = 0.7, **class_args):
        # Initialize the base server:
        super(ToxicityClassifierModelServer, self).__init__(
            context=context,
            name=name,
            model_path=None,
            **class_args,
        )

        # Store the threshold of toxicity:
        self.threshold = threshold

    def load(self):
        self.model = evaluate.load("toxicity", module_type="measurement")

    def predict(self, inputs: Dict) -> str:
        # Read the user's input and model output:
        prediction = inputs["inputs"][0]["prediction"]
        prompt = inputs["inputs"][0]["prompt"]

        # Infer through the evaluator model:
        result = self.model.compute(predictions=[prediction, prompt])["toxicity"]
        if any(np.array(result) > self.threshold):
            return "This bot do not respond to toxicity."

        return prediction

    def explain(self, request: Dict) -> str:
        return f"Text toxicity classifier server named {self.name}"

from mlrun.runtimes import nuclio_init_hook
def init_context(context):
    nuclio_init_hook(context, globals(), 'serving_v2')

def handler(context, event):
    return context.mlrun_handler(context, event)
 31 | source: ./ 32 | commands: [] 33 | code_origin: http://github.com/mlrun/demo-llm-tuning#refs/heads/main#91145f96f3cd627431de34d0bae3547efbdd7097 34 | origin_filename: src/serving.py 35 | requirements: [] 36 | description: '' 37 | default_handler: '' 38 | disable_auto_mount: false 39 | clone_target_dir: '' 40 | env: 41 | - name: V3IO_API 42 | value: '' 43 | - name: V3IO_USERNAME 44 | value: '' 45 | - name: V3IO_ACCESS_KEY 46 | value: '' 47 | - name: V3IO_FRAMESD 48 | value: '' 49 | resources: 50 | requests: 51 | memory: 1Mi 52 | cpu: 25m 53 | limits: 54 | nvidia.com/gpu: 1 55 | priority_class_name: igz-workload-medium 56 | preemption_mode: prevent 57 | min_replicas: 1 58 | max_replicas: 4 59 | source: '' 60 | function_kind: serving_v2 61 | readiness_timeout: 3000 62 | function_handler: serving:handler 63 | base_image_pull: false 64 | graph: 65 | steps: 66 | preprocess: 67 | kind: task 68 | handler: preprocess 69 | after: [] 70 | mlopspedia: 71 | kind: task 72 | class_name: LLMModelServer 73 | class_args: 74 | model_args: 75 | load_in_8bit: true 76 | device_map: cuda:0 77 | trust_remote_code: true 78 | tokenizer_name: tiiuae/falcon-7b 79 | model_name: tiiuae/falcon-7b 80 | peft_model: store://artifacts/mlopspedia-bot-yonis/falcon-7b-mlrun 81 | after: 82 | - preprocess 83 | postprocess: 84 | kind: task 85 | handler: postprocess 86 | after: 87 | - mlopspedia 88 | toxicity-classifier: 89 | kind: task 90 | class_name: ToxicityClassifierModelServer 91 | class_args: 92 | threshold: 0.7 93 | after: 94 | - postprocess 95 | responder: true 96 | engine: async 97 | secret_sources: [] 98 | affinity: 99 | nodeAffinity: 100 | requiredDuringSchedulingIgnoredDuringExecution: 101 | nodeSelectorTerms: 102 | - matchExpressions: 103 | - key: app.iguazio.com/lifecycle 104 | operator: NotIn 105 | values: 106 | - preemptible 107 | - key: eks.amazonaws.com/capacityType 108 | operator: NotIn 109 | values: 110 | - SPOT 111 | - key: node-lifecycle 112 | operator: NotIn 113 | values: 114 | - spot 115 | tolerations: null 116 | security_context: {} 117 | verbose: false 118 | workflows: 119 | - path: src/training_workflow.py 120 | name: training_workflow 121 | artifacts: [] 122 | conda: '' 123 | source: git://github.com/mlrun/demo-llm-tuning.git#main 124 | origin_url: http://github.com/mlrun/demo-llm-tuning#refs/heads/main 125 | load_source_on_run: true 126 | desired_state: online 127 | default_image: yonishelach/mlrun-llm 128 | build: 129 | commands: [] 130 | requirements: [] 131 | custom_packagers: [] 132 | -------------------------------------------------------------------------------- /project_setup.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | import mlrun 4 | 5 | 6 | def assert_build(): 7 | for module_name in [ 8 | "torch", 9 | "transformers", 10 | "datasets", 11 | "accelerate", 12 | "evaluate", 13 | "deepspeed", 14 | "mpi4py", 15 | ]: 16 | module = importlib.import_module(module_name) 17 | print(module.__version__) 18 | 19 | 20 | def setup( 21 | project: mlrun.projects.MlrunProject 22 | ): 23 | """ 24 | Creating the project for this demo. 25 | :returns: a fully prepared project for this demo. 26 | """ 27 | print(project.get_param("source")) 28 | # Set or build the default image: 29 | if project.get_param("default_image") is None: 30 | print("Building image for the demo:") 31 | image_builder = project.set_function( 32 | "project_setup.py", 33 | name="image-builder", 34 | handler="assert_build", 35 | kind="job", 36 | image="mlrun/ml-models-gpu", 37 | requirements=[ 38 | "torch", 39 | "transformers[deepspeed]", 40 | "datasets", 41 | "accelerate", 42 | "evaluate", 43 | "mpi4py", 44 | ], 45 | ) 46 | assert image_builder.deploy() 47 | default_image = image_builder.spec.image 48 | project.set_default_image(project.get_param("default_image")) 49 | 50 | # Set the project git source: 51 | 52 | project.set_source(project.get_param("source"), pull_at_runtime=True) 53 | 54 | # Set the data collection function: 55 | data_collection_function = project.set_function( 56 | "src/data_collection.py", 57 | name="data-collecting", 58 | image="mlrun/mlrun", 59 | kind="job", 60 | 61 | ) 62 | data_collection_function.apply(mlrun.auto_mount()) 63 | data_collection_function.save() 64 | 65 | # Set the data preprocessing function: 66 | project.set_function( 67 | "src/data_preprocess.py", 68 | name="data-preparing", 69 | kind="job", 70 | ) 71 | 72 | # Set the training function: 73 | train_function = project.set_function( 74 | "src/trainer.py", 75 | name="training", 76 | kind="job", 77 | ) 78 | train_function.with_limits( 79 | gpus=project.get_param("num_gpus_per_replica") or 4, 80 | cpu=project.get_param("num_cpus_per_replica") or 48, 81 | mem=project.get_param("memory_per_replica") or "192Gi", 82 | ) 83 | train_function.save() 84 | 85 | project.set_function( 86 | "src/serving.py", 87 | name="serving", 88 | kind="serving", 89 | ) 90 | 91 | # Set the training workflow: 92 | project.set_workflow("training_workflow", "src/training_workflow.py") 93 | 94 | # Save and return the project: 95 | project.save() 96 | return project -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.isort] 2 | profile = "black" 3 | multi_line_output = 3 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mlrun 2 | torch 3 | plotly 4 | gradio 5 | transformers 6 | datasets 7 | accelerate 8 | evaluate 9 | bs4 10 | einops 11 | xformers -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | project_name = "myproj" 4 | with open("README.md", "r", encoding="utf-8") as fh: 5 | long_description = fh.read() 6 | 7 | setup( 8 | name=project_name, 9 | packages=[project_name], 10 | package_dir={project_name: "src"}, 11 | version="0.1.0", 12 | description="my desc", 13 | author="Yaron", 14 | author_email="author@example.com", 15 | license="MIT", 16 | long_description=long_description, 17 | long_description_content_type="text/markdown", 18 | python_requires=">=3.7", 19 | ) 20 | -------------------------------------------------------------------------------- /src/data_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from pathlib import Path 4 | from urllib.request import Request, urlopen 5 | 6 | from bs4 import BeautifulSoup, Tag 7 | 8 | ARTICLE_TOKEN = "Article: " 9 | HEADER_TOKEN = "### Human: " 10 | 11 | 12 | def normalize(s: str) -> str: 13 | """ 14 | Remove newline and tab characters from string 15 | """ 16 | return s.replace("\n", "").replace("\t", "") 17 | 18 | 19 | def mark_header_tags(soup: BeautifulSoup): 20 | """ 21 | Adding header token and article token prefixes to all headers in html, in order to parse the text later easily. 22 | 23 | :param soup: BeautifulSoup object of the html file 24 | """ 25 | nodes = soup.find_all(re.compile("^h[1-6]$")) 26 | # Tagging headers in html to identify in text files: 27 | if nodes: 28 | content_type = type(nodes[0].contents[0]) 29 | nodes[0].string = content_type( 30 | ARTICLE_TOKEN + normalize(str(nodes[0].contents[0])) 31 | ) 32 | for node in nodes[1:]: 33 | if node.string: 34 | content_type = type(node.contents[0]) 35 | if content_type == Tag: 36 | node.string = HEADER_TOKEN + normalize(node.string) 37 | else: 38 | node.string = content_type(HEADER_TOKEN + str(node.contents[0])) 39 | 40 | 41 | def get_html_as_string(url: str, mark_headers: bool) -> str: 42 | """ 43 | Retrieve text from html URL. 44 | 45 | :param url: html URL 46 | :param mark_headers: Whether to add article and header prefixes to headers to text 47 | 48 | :returns: html text content 49 | """ 50 | # read html source: 51 | req = Request(url=url, headers={"User-Agent": "Mozilla/5.0"}) 52 | web_html_content = urlopen(req).read().decode("utf-8") 53 | soup = BeautifulSoup(web_html_content, features="html.parser") 54 | if mark_headers: 55 | mark_header_tags(soup) 56 | return soup.get_text() 57 | 58 | 59 | def collect_html_to_text_files(urls_file: str, mark_headers=True) -> str: 60 | """ 61 | Retrieve all html text content from URLs as text files. 62 | 63 | :param urls_file: html URLs file 64 | :param mark_headers: Whether to add article and header prefixes to headers to text 65 | 66 | :returns: the directory name that contains all the content text files. 67 | """ 68 | directory = "html_as_text_files" 69 | os.makedirs(directory, exist_ok=True) 70 | # Writing html files as text files: 71 | with open(urls_file, "r") as f: 72 | urls = f.readlines() 73 | for url in urls: 74 | url = url.replace("\n", "") 75 | page_name = Path(url).name 76 | with open(f"{directory}/{page_name}.txt", "w") as f: 77 | f.write(get_html_as_string(url, mark_headers)) 78 | return directory 79 | -------------------------------------------------------------------------------- /src/data_preprocess.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tempfile 4 | from pathlib import Path 5 | 6 | from datasets import load_dataset 7 | 8 | ARTICLE_TOKEN = "Article: " 9 | HEADER_TOKEN = "### Human: " 10 | CONTENT_TOKEN = "### Assistant: " 11 | 12 | DATA_FORMAT = """### Human: {} {} 13 | ### Assistant: {}""" 14 | END_OF_ARTICLE = "Latest Posts" 15 | 16 | 17 | def convert_textfile_to_data_with_prompts(txt_file: Path): 18 | """ 19 | Formatting the html text content into prompt form. 20 | Each header-content in the article is an element in the generated list of prompts 21 | 22 | :param txt_file: text content as a string with tokens of headers. 23 | :returns: list of prompts 24 | """ 25 | # Read file: 26 | with open(txt_file, "r") as f: 27 | lines = f.readlines() 28 | 29 | start = 0 30 | end = 0 31 | subject_idx = [] 32 | data = [] 33 | # Dividing text into header - paragraph prompts: 34 | for i, line in enumerate(lines): 35 | if not start and line.startswith(ARTICLE_TOKEN): 36 | start = i 37 | elif HEADER_TOKEN + END_OF_ARTICLE in line: 38 | end = i 39 | break 40 | if line.startswith(HEADER_TOKEN): 41 | subject_idx.append(i) 42 | article_content = lines[start:end] 43 | subject_idx = [subject_i - start for subject_i in subject_idx] 44 | article_name = article_content[0].replace(ARTICLE_TOKEN, "") 45 | for i, subject in enumerate(subject_idx): 46 | if subject + 1 in subject_idx: 47 | continue 48 | subject_data = article_content[subject].replace(HEADER_TOKEN, "") 49 | if i + 1 == len(subject_idx): 50 | content_end = len(article_content) 51 | else: 52 | content_end = subject_idx[i + 1] 53 | content_limits = subject + 1, content_end 54 | data.append( 55 | DATA_FORMAT.format( 56 | article_name, 57 | subject_data, 58 | "".join(article_content[content_limits[0] : content_limits[1]]), 59 | ) 60 | ) 61 | return data 62 | 63 | 64 | def prepare_dataset(source_dir: str): 65 | """ 66 | Build the dataset from text files as a 'text: prompt' structure. 67 | 68 | :param source_dir: the directory that contains all the text files. 69 | 70 | :returns: A dataset with all the prompts inside 71 | """ 72 | path_list = Path(source_dir).glob("./*.txt") 73 | data = [] 74 | # Converting text files into data in our prompt format: 75 | for path in path_list: 76 | data.extend(convert_textfile_to_data_with_prompts(path)) 77 | data_dir = tempfile.mkdtemp() 78 | os.makedirs(data_dir, exist_ok=True) 79 | with open(data_dir + "/html_data.jsonl", "w", encoding="utf8") as f: 80 | for item in data: 81 | f.write( 82 | json.dumps({"text": item.replace(" ", "")}, ensure_ascii=False) + "\n" 83 | ) 84 | return load_dataset(data_dir)["train"].to_pandas() 85 | -------------------------------------------------------------------------------- /src/serving.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import zipfile 4 | from typing import Any, Dict 5 | 6 | import evaluate 7 | import mlrun.artifacts 8 | import numpy as np 9 | import torch 10 | import transformers 11 | from mlrun.serving.v2_serving import V2ModelServer 12 | from peft import PeftModel 13 | 14 | SUBJECT_MARK = "### Human: " 15 | CONTENT_MARK = "\n### Assistant: " 16 | PROMPT_FORMAT = SUBJECT_MARK + "{}" + CONTENT_MARK 17 | 18 | 19 | def preprocess(request: dict) -> dict: 20 | """ 21 | convert the request to the required structure for the predict function 22 | 23 | :param request: A http request that contains the prompt 24 | """ 25 | # Read bytes: 26 | if isinstance(request, bytes): 27 | request = json.loads(request) 28 | 29 | # Get the prompt: 30 | prompt = request.pop("prompt") 31 | 32 | # Format the prompt as subject: 33 | prompt = PROMPT_FORMAT.format(str(prompt)) 34 | 35 | # Update the request and return: 36 | request = {"inputs": [{"prompt": [prompt], **request}]} 37 | return request 38 | 39 | 40 | class LLMModelServer(V2ModelServer): 41 | """ 42 | This is temporary and will be built in mlrun 1.5.0 43 | """ 44 | 45 | def __init__( 46 | self, 47 | context: mlrun.MLClientCtx = None, 48 | name: str = None, 49 | model_class: str = "AutoModelForCausalLM", 50 | tokenizer_class: str = "AutoTokenizer", 51 | # model args: 52 | model_args: dict = None, 53 | # Load from MLRun args: 54 | model_path: str = None, 55 | # Load from hub args: 56 | model_name: str = None, 57 | tokenizer_name: str = None, 58 | # Deepspeed args: 59 | use_deepspeed: bool = False, 60 | n_gpus: int = 1, 61 | is_fp16: bool = True, 62 | # peft model: 63 | peft_model: str = None, 64 | # Inference args: 65 | **class_args, 66 | ): 67 | # Initialize the base server: 68 | super(LLMModelServer, self).__init__( 69 | context=context, 70 | name=name, 71 | model_path=model_path, 72 | **class_args, 73 | ) 74 | 75 | # Save class names: 76 | self.model_class = model_class 77 | self.tokenizer_class = tokenizer_class 78 | 79 | # Save hub loading parameters: 80 | self.model_name = model_name 81 | self.tokenizer_name = tokenizer_name or self.model_name 82 | 83 | # Save load model arguments: 84 | self.model_args = model_args 85 | 86 | # Save deepspeed parameters: 87 | self.use_deepspeed = use_deepspeed 88 | self.n_gpus = n_gpus 89 | self.is_fp16 = is_fp16 90 | 91 | # PEFT parameters: 92 | self.peft_model = peft_model 93 | 94 | # Prepare variables for future use: 95 | self.model = None 96 | self.tokenizer = None 97 | self._model_class = None 98 | self._tokenizer_class = None 99 | 100 | def load(self): 101 | # Get classes: 102 | self._model_class = getattr(transformers, self.model_class) 103 | self._tokenizer_class = getattr(transformers, self.tokenizer_class) 104 | 105 | # Load the model and tokenizer: 106 | if self.model_path: 107 | self._load_from_mlrun() 108 | else: 109 | self._load_from_hub() 110 | 111 | # Use deepspeed if needed: 112 | if self.use_deepspeed: 113 | import deepspeed 114 | 115 | self.model = deepspeed.init_inference( 116 | model=self.model, 117 | mp_size=self.n_gpus, 118 | dtype=torch.float16 if self.is_fp16 else torch.float32, 119 | replace_method="auto", 120 | replace_with_kernel_inject=True, 121 | ) 122 | if self.peft_model: 123 | self._load_peft_model() 124 | 125 | def _extract_model(self, url): 126 | # Get the model artifact and file: 127 | ( 128 | model_file, 129 | model_artifact, 130 | extra_data, 131 | ) = mlrun.artifacts.get_model(url) 132 | 133 | # Read the name: 134 | model_name = model_artifact.spec.db_key 135 | 136 | # Extract logged model files: 137 | model_directory = os.path.join(os.path.dirname(model_file), model_name) 138 | with zipfile.ZipFile(model_file, "r") as zip_file: 139 | zip_file.extractall(model_directory) 140 | return model_directory 141 | 142 | def _load_peft_model(self): 143 | model_directory = self._extract_model(self.peft_model) 144 | self.model = PeftModel.from_pretrained(self.model, model_directory) 145 | self.model.eval() 146 | 147 | def _load_from_mlrun(self): 148 | model_directory = self._extract_model(self.model_path) 149 | 150 | # Loading the saved pretrained tokenizer and model: 151 | self.tokenizer = self._tokenizer_class.from_pretrained(model_directory) 152 | self.model = self._model_class.from_pretrained( 153 | model_directory, **self.model_args 154 | ) 155 | 156 | def _load_from_hub(self): 157 | # Loading the pretrained tokenizer and model: 158 | self.tokenizer = self._tokenizer_class.from_pretrained( 159 | self.tokenizer_name, 160 | model_max_length=512, 161 | ) 162 | self.model = self._model_class.from_pretrained( 163 | self.model_name, **self.model_args 164 | ) 165 | 166 | def predict(self, request: Dict[str, Any]) -> dict: 167 | # Get the inputs: 168 | kwargs = request["inputs"][0] 169 | prompt = kwargs.pop("prompt")[0] 170 | 171 | # Tokenize: 172 | inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"] 173 | if self.model.device.type == "cuda": 174 | inputs = inputs.cuda() 175 | 176 | # Get the pad token id: 177 | pad_token_id = self.tokenizer.eos_token_id 178 | 179 | # Infer through the model: 180 | output = self.model.generate( 181 | input_ids=inputs, 182 | do_sample=True, 183 | num_return_sequences=1, 184 | pad_token_id=pad_token_id, 185 | **kwargs, 186 | ) 187 | 188 | # Detokenize: 189 | prediction = self.tokenizer.decode(output[0], skip_special_tokens=True) 190 | 191 | return {"prediction": prediction, "prompt": prompt} 192 | 193 | def explain(self, request: Dict) -> str: 194 | return f"LLM model server named {self.name}" 195 | 196 | 197 | def postprocess(inputs: dict) -> dict: 198 | """ 199 | Postprocessing the generated output of the model 200 | """ 201 | # Read the prediction: 202 | prediction = inputs["outputs"]["prediction"] 203 | 204 | # Look for a 'Content: ' mark to know the model found the subject, otherwise, it is probably garbage: 205 | content_index = prediction.find(CONTENT_MARK) 206 | if content_index == -1: 207 | output = f"I'm not sure about it but I'll do my best: {prediction}" 208 | else: 209 | output = prediction[content_index + len(CONTENT_MARK) :] 210 | 211 | return { 212 | "inputs": [ 213 | {"prediction": output.strip(), "prompt": inputs["outputs"]["prompt"]} 214 | ] 215 | } 216 | 217 | 218 | class ToxicityClassifierModelServer(V2ModelServer): 219 | """ 220 | model that checks if the text contain toxicity language. 221 | """ 222 | 223 | def __init__(self, context, name: str, threshold: float = 0.7, **class_args): 224 | # Initialize the base server: 225 | super(ToxicityClassifierModelServer, self).__init__( 226 | context=context, 227 | name=name, 228 | model_path=None, 229 | **class_args, 230 | ) 231 | 232 | # Store the threshold of toxicity: 233 | self.threshold = threshold 234 | 235 | def load(self): 236 | self.model = evaluate.load("toxicity", module_type="measurement") 237 | 238 | def predict(self, inputs: Dict) -> str: 239 | # Read the user's input and model output: 240 | prediction = inputs["inputs"][0]["prediction"] 241 | prompt = inputs["inputs"][0]["prompt"] 242 | 243 | # Infer through the evaluator model: 244 | result = self.model.compute(predictions=[prediction, prompt])["toxicity"] 245 | if any(np.array(result) > self.threshold): 246 | return "This bot do not respond to toxicity." 247 | 248 | return prediction 249 | 250 | def explain(self, request: Dict) -> str: 251 | return f"Text toxicity classifier server named {self.name}" 252 | -------------------------------------------------------------------------------- /src/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | import zipfile 5 | from abc import ABC 6 | from typing import Any, Dict, List 7 | 8 | import mlrun 9 | import numpy as np 10 | import pandas as pd 11 | import torch 12 | import transformers 13 | from datasets import Dataset 14 | from mlrun.artifacts.manager import Artifact, PlotlyArtifact 15 | from mlrun.datastore import DataItem 16 | from mlrun.execution import MLClientCtx 17 | from mlrun.frameworks._common import CommonTypes, MLRunInterface 18 | from mlrun.utils import create_class 19 | from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training 20 | from plotly import graph_objects as go 21 | from transformers import ( 22 | AutoModelForCausalLM, 23 | AutoTokenizer, 24 | BitsAndBytesConfig, 25 | DataCollatorForLanguageModeling, 26 | PreTrainedModel, 27 | PreTrainedTokenizer, 28 | Trainer, 29 | TrainerCallback, 30 | TrainerControl, 31 | TrainerState, 32 | TrainingArguments, 33 | ) 34 | 35 | DEEPSPEED_CONFIG = { 36 | "fp16": { 37 | "enabled": "auto", 38 | "loss_scale": 0, 39 | "loss_scale_window": 1000, 40 | "initial_scale_power": 16, 41 | "hysteresis": 2, 42 | "min_loss_scale": 1, 43 | }, 44 | "optimizer": { 45 | "type": "AdamW", 46 | "params": { 47 | "lr": "auto", 48 | "betas": "auto", 49 | "eps": "auto", 50 | "weight_decay": "auto", 51 | }, 52 | }, 53 | "scheduler": { 54 | "type": "WarmupLR", 55 | "params": { 56 | "warmup_min_lr": "auto", 57 | "warmup_max_lr": "auto", 58 | "warmup_num_steps": "auto", 59 | }, 60 | }, 61 | "zero_optimization": { 62 | "stage": 3, 63 | "offload_optimizer": {"device": "cpu", "pin_memory": True}, 64 | "offload_param": {"device": "cpu", "pin_memory": True}, 65 | "overlap_comm": True, 66 | "contiguous_gradients": True, 67 | "sub_group_size": 1e9, 68 | "reduce_bucket_size": "auto", 69 | "stage3_prefetch_bucket_size": "auto", 70 | "stage3_param_persistence_threshold": "auto", 71 | "stage3_max_live_parameters": 1e9, 72 | "stage3_max_reuse_distance": 1e9, 73 | "stage3_gather_16bit_weights_on_model_save": True, 74 | }, 75 | "gradient_accumulation_steps": "auto", 76 | "gradient_clipping": "auto", 77 | "steps_per_print": 2000, 78 | "train_batch_size": "auto", 79 | "train_micro_batch_size_per_gpu": "auto", 80 | "wall_clock_breakdown": False, 81 | "comms_logger": { 82 | "enabled": True, 83 | "verbose": False, 84 | "prof_all": True, 85 | "debug": False, 86 | }, 87 | } 88 | 89 | 90 | # ----------------------from MLRUN-------------------------------- 91 | class HFTrainerMLRunInterface(MLRunInterface, ABC): 92 | """ 93 | This is temporary and will be built in mlrun 1.5.0 94 | Interface for adding MLRun features for tensorflow keras API. 95 | """ 96 | 97 | # MLRuns context default name: 98 | DEFAULT_CONTEXT_NAME = "mlrun-huggingface" 99 | 100 | # Attributes to replace so the MLRun interface will be fully enabled. 101 | _REPLACED_METHODS = [ 102 | "train", 103 | # "evaluate" 104 | ] 105 | 106 | @classmethod 107 | def add_interface( 108 | cls, 109 | obj: Trainer, 110 | restoration: CommonTypes.MLRunInterfaceRestorationType = None, 111 | ): 112 | super(HFTrainerMLRunInterface, cls).add_interface( 113 | obj=obj, restoration=restoration 114 | ) 115 | 116 | @classmethod 117 | def mlrun_train(cls): 118 | def wrapper(self: Trainer, *args, **kwargs): 119 | # Restore the evaluation method as `train` will use it: 120 | # cls._restore_attribute(obj=self, attribute_name="evaluate") 121 | 122 | # Call the original fit method: 123 | result = self.original_train(*args, **kwargs) 124 | 125 | # Replace the evaluation method again: 126 | # cls._replace_function(obj=self, function_name="evaluate") 127 | 128 | return result 129 | 130 | return wrapper 131 | 132 | 133 | class MLRunCallback(TrainerCallback): 134 | """ 135 | This is temporary and will be built in mlrun 1.5.0 136 | Callback for collecting logs during training / evaluation of the `Trainer` API. 137 | """ 138 | 139 | def __init__( 140 | self, 141 | context: mlrun.MLClientCtx = None, 142 | model_name: str = "model", 143 | tag: str = "", 144 | labels: Dict[str, str] = None, 145 | extra_data: dict = None, 146 | ): 147 | super().__init__() 148 | 149 | # Store the configurations: 150 | self._context = ( 151 | context 152 | if context is not None 153 | else mlrun.get_or_create_ctx("./mlrun-huggingface") 154 | ) 155 | self._model_name = model_name 156 | self._tag = tag 157 | self._labels = labels 158 | self._extra_data = extra_data if extra_data is not None else {} 159 | 160 | # Set up the logging mode: 161 | self._is_training = False 162 | self._steps: List[List[int]] = [] 163 | self._metric_scores: Dict[str, List[float]] = {} 164 | self._artifacts: Dict[str, Artifact] = {} 165 | 166 | def on_epoch_begin( 167 | self, 168 | args: TrainingArguments, 169 | state: TrainerState, 170 | control: TrainerControl, 171 | **kwargs, 172 | ): 173 | if not state.is_world_process_zero: 174 | return 175 | self._steps.append([]) 176 | 177 | def on_epoch_end( 178 | self, 179 | args: TrainingArguments, 180 | state: TrainerState, 181 | control: TrainerControl, 182 | **kwargs, 183 | ): 184 | if not state.is_world_process_zero: 185 | return 186 | self._log_metrics() 187 | 188 | def on_log( 189 | self, 190 | args: TrainingArguments, 191 | state: TrainerState, 192 | control: TrainerControl, 193 | logs: Dict[str, float] = None, 194 | **kwargs, 195 | ): 196 | if not state.is_world_process_zero: 197 | return 198 | recent_logs = state.log_history[-1].copy() 199 | 200 | recent_logs.pop("epoch") 201 | current_step = int(recent_logs.pop("step")) 202 | if current_step not in self._steps[-1]: 203 | self._steps[-1].append(current_step) 204 | 205 | for metric_name, metric_score in recent_logs.items(): 206 | if metric_name.startswith("train_"): 207 | if metric_name.split("train_")[1] not in self._metric_scores: 208 | self._metric_scores[metric_name] = [metric_score] 209 | continue 210 | if metric_name not in self._metric_scores: 211 | self._metric_scores[metric_name] = [] 212 | self._metric_scores[metric_name].append(metric_score) 213 | 214 | def on_train_begin( 215 | self, 216 | args: TrainingArguments, 217 | state: TrainerState, 218 | control: TrainerControl, 219 | **kwargs, 220 | ): 221 | if not state.is_world_process_zero: 222 | return 223 | self._is_training = True 224 | 225 | def on_train_end( 226 | self, 227 | args: TrainingArguments, 228 | state: TrainerState, 229 | control: TrainerControl, 230 | model: PreTrainedModel = None, 231 | tokenizer: PreTrainedTokenizer = None, 232 | **kwargs, 233 | ): 234 | if not state.is_world_process_zero: 235 | return 236 | self._log_metrics() 237 | 238 | def on_evaluate( 239 | self, 240 | args: TrainingArguments, 241 | state: TrainerState, 242 | control: TrainerControl, 243 | **kwargs, 244 | ): 245 | if not state.is_world_process_zero: 246 | return 247 | self._log_metrics() 248 | 249 | if self._is_training: 250 | return 251 | 252 | def _log_metrics(self): 253 | for metric_name, metric_scores in self._metric_scores.items(): 254 | self._context.log_result(key=metric_name, value=metric_scores[-1]) 255 | if len(metric_scores) > 1: 256 | self._log_metric_plot(name=metric_name, scores=metric_scores) 257 | self._context.commit(completed=False) 258 | 259 | def _log_metric_plot(self, name: str, scores: List[float]): 260 | # Initialize a plotly figure: 261 | metric_figure = go.Figure() 262 | 263 | # Add titles: 264 | metric_figure.update_layout( 265 | title=name.capitalize().replace("_", " "), 266 | xaxis_title="Samples", 267 | yaxis_title="Scores", 268 | ) 269 | 270 | # Draw: 271 | metric_figure.add_trace( 272 | go.Scatter(x=np.arange(len(scores)), y=scores, mode="lines") 273 | ) 274 | 275 | # Create the plotly artifact: 276 | artifact_name = f"{name}_plot" 277 | artifact = PlotlyArtifact(key=artifact_name, figure=metric_figure) 278 | self._artifacts[artifact_name] = self._context.log_artifact(artifact) 279 | 280 | 281 | def apply_mlrun( 282 | trainer: transformers.Trainer, 283 | model_name: str = None, 284 | tag: str = "", 285 | context: mlrun.MLClientCtx = None, 286 | auto_log: bool = True, 287 | labels: Dict[str, str] = None, 288 | extra_data: dict = None, 289 | **kwargs, 290 | ): 291 | """ 292 | This is temporary and will be built in mlrun 1.5.0 293 | """ 294 | # Get parameters defaults: 295 | if context is None: 296 | context = mlrun.get_or_create_ctx(HFTrainerMLRunInterface.DEFAULT_CONTEXT_NAME) 297 | 298 | HFTrainerMLRunInterface.add_interface(obj=trainer) 299 | 300 | if auto_log: 301 | trainer.add_callback( 302 | MLRunCallback( 303 | context=context, 304 | model_name=model_name, 305 | tag=tag, 306 | labels=labels, 307 | extra_data=extra_data, 308 | ) 309 | ) 310 | 311 | 312 | class KWArgsPrefixes: 313 | MODEL_CLASS = "CLASS_" 314 | FIT = "FIT_" 315 | TRAIN = "TRAIN_" 316 | PREDICT = "PREDICT_" 317 | DATA_COLLATOR = "DC_" 318 | 319 | 320 | def _get_sub_dict_by_prefix(src: Dict, prefix_key: str) -> Dict[str, Any]: 321 | return { 322 | key.replace(prefix_key, ""): val 323 | for key, val in src.items() 324 | if key.startswith(prefix_key) 325 | } 326 | 327 | 328 | def print_trainable_parameters(model): 329 | """ 330 | Prints the number of trainable parameters in the model. 331 | """ 332 | trainable_params = 0 333 | all_param = 0 334 | for _, param in model.named_parameters(): 335 | all_param += param.numel() 336 | if param.requires_grad: 337 | trainable_params += param.numel() 338 | print( 339 | f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}" 340 | ) 341 | 342 | 343 | def train( 344 | context: MLClientCtx, 345 | dataset: DataItem = None, 346 | pretrained_tokenizer: str = None, 347 | pretrained_model: str = None, 348 | model_class: str = None, 349 | tokenizer_class: str = None, 350 | model_name: str = "huggingface-model", 351 | use_deepspeed: bool = True, 352 | ): 353 | torch.cuda.empty_cache() 354 | # deepspeed_config_json = None 355 | # if use_deepspeed: 356 | # deepspeed_config_json = os.path.join(tempfile.mkdtemp(), "ds_config.json") 357 | # with open(deepspeed_config_json, "w") as f: 358 | # json.dump(DEEPSPEED_CONFIG, f) 359 | if tokenizer_class: 360 | tokenizer_class = create_class(tokenizer_class) 361 | else: 362 | tokenizer_class = AutoTokenizer 363 | 364 | tokenizer = tokenizer_class.from_pretrained( 365 | pretrained_tokenizer, 366 | model_max_length=512, 367 | ) 368 | tokenizer.pad_token = tokenizer.eos_token 369 | 370 | train_dataset = Dataset.from_pandas(dataset.as_df()) 371 | 372 | def preprocess_function(examples): 373 | return tokenizer(examples["text"], truncation=True, padding=True) 374 | 375 | tokenized_train = train_dataset.map(preprocess_function, batched=True) 376 | tokenized_test = None 377 | 378 | data_collator_kwargs = _get_sub_dict_by_prefix( 379 | src=context.parameters, prefix_key=KWArgsPrefixes.DATA_COLLATOR 380 | ) 381 | data_collator = DataCollatorForLanguageModeling( 382 | tokenizer=tokenizer, mlm=False, **data_collator_kwargs 383 | ) 384 | 385 | # Parsing kwargs: 386 | train_kwargs = _get_sub_dict_by_prefix( 387 | src=context.parameters, prefix_key=KWArgsPrefixes.TRAIN 388 | ) 389 | # if use_deepspeed: 390 | # train_kwargs["deepspeed"] = deepspeed_config_json 391 | model_class_kwargs = _get_sub_dict_by_prefix( 392 | src=context.parameters, prefix_key=KWArgsPrefixes.MODEL_CLASS 393 | ) 394 | # Loading our pretrained model: 395 | model_class_kwargs["pretrained_model_name_or_path"] = ( 396 | model_class_kwargs.get("pretrained_model_name_or_path") or pretrained_model 397 | ) 398 | train_kwargs["hub_token"] = train_kwargs.get("hub_token") or pretrained_tokenizer 399 | if not model_class_kwargs["pretrained_model_name_or_path"]: 400 | raise mlrun.errors.MLRunRuntimeError( 401 | "Must provide pretrained_model name as " 402 | "function argument or in extra params" 403 | ) 404 | bnb_config = BitsAndBytesConfig( 405 | load_in_4bit=True, 406 | bnb_4bit_use_double_quant=True, 407 | bnb_4bit_quant_type="nf4", 408 | bnb_4bit_compute_dtype=torch.bfloat16, 409 | ) 410 | 411 | model = create_class(model_class).from_pretrained( 412 | quantization_config=bnb_config, 413 | device_map="auto", 414 | trust_remote_code=True, 415 | **model_class_kwargs, 416 | ) 417 | 418 | model.gradient_checkpointing_enable() 419 | model = prepare_model_for_kbit_training(model) 420 | 421 | # Preparing training arguments: 422 | training_args = TrainingArguments( 423 | output_dir=tempfile.mkdtemp(), 424 | optim="paged_adamw_8bit", 425 | gradient_accumulation_steps=2, 426 | warmup_steps=5, 427 | learning_rate=3e-4, 428 | fp16=True, 429 | logging_steps=1, 430 | **train_kwargs, 431 | ) 432 | 433 | config = LoraConfig( 434 | r=16, 435 | lora_alpha=16, 436 | target_modules=["query_key_value"], 437 | lora_dropout=0.05, 438 | bias="none", 439 | task_type="CAUSAL_LM", 440 | ) 441 | 442 | model = get_peft_model(model, config) 443 | print_trainable_parameters(model) 444 | 445 | trainer = transformers.Trainer( 446 | model=model, 447 | args=training_args, 448 | train_dataset=tokenized_train, 449 | eval_dataset=tokenized_test, 450 | tokenizer=tokenizer, 451 | data_collator=data_collator, 452 | ) 453 | 454 | apply_mlrun(trainer, model_name=model_name) 455 | model.config.use_cache = ( 456 | False # silence the warnings. Please re-enable for inference! 457 | ) 458 | 459 | # Apply training with evaluation: 460 | context.logger.info(f"training '{model_name}'") 461 | trainer.train() 462 | 463 | temp_directory = tempfile.TemporaryDirectory().name 464 | trainer.save_model(temp_directory) 465 | 466 | # Zip the model directory: 467 | shutil.make_archive( 468 | base_name="model", 469 | format="zip", 470 | root_dir=temp_directory, 471 | ) 472 | 473 | # Log the model: 474 | context.log_model( 475 | key="model", 476 | db_key=model_name, 477 | model_file="model.zip", 478 | tag="", 479 | framework="Hugging Face", 480 | ) 481 | 482 | 483 | def evaluate( 484 | context, 485 | model_path, 486 | data: pd.DataFrame, 487 | model_name: str = None, 488 | tokenizer_name: str = None, 489 | ): 490 | """ 491 | Evaluating the model using perplexity, for more information visit: 492 | https://huggingface.co/docs/transformers/perplexity 493 | 494 | :param context: mlrun context 495 | :param model_path: path to the model directory 496 | :param data: the data to evaluate the model 497 | :param model_name: name of base model 498 | :param tokenizer_name: name of base tokenizer 499 | """ 500 | # Get the model artifact and file: 501 | ( 502 | model_file, 503 | model_artifact, 504 | extra_data, 505 | ) = mlrun.artifacts.get_model(model_path) 506 | 507 | # Read the name: 508 | _model_name = model_artifact.spec.db_key 509 | 510 | # Extract logged model files: 511 | model_directory = os.path.join(os.path.dirname(model_file), _model_name) 512 | with zipfile.ZipFile(model_file, "r") as zip_file: 513 | zip_file.extractall(model_directory) 514 | 515 | # Loading the saved pretrained tokenizer and model: 516 | dataset = Dataset.from_pandas(data) 517 | tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) 518 | pad_token_id = tokenizer.eos_token_id 519 | model = AutoModelForCausalLM.from_pretrained( 520 | model_name, device_map="cuda:0", trust_remote_code=True, load_in_8bit=True 521 | ) 522 | model = PeftModel.from_pretrained(model, model_directory) 523 | model.eval() 524 | encodings = tokenizer("\n\n".join(dataset["text"][:5]), return_tensors="pt") 525 | 526 | max_length = 1024 527 | stride = 512 528 | seq_len = encodings.input_ids.size(1) 529 | 530 | nlls = [] 531 | prev_end_loc = 0 532 | for begin_loc in range(0, seq_len, stride): 533 | end_loc = min(begin_loc + max_length, seq_len) 534 | trg_len = end_loc - prev_end_loc # may be different from stride on last loop 535 | input_ids = encodings.input_ids[:, begin_loc:end_loc] 536 | target_ids = input_ids.clone() 537 | target_ids[:, :-trg_len] = -100 538 | 539 | with torch.no_grad(): 540 | outputs = model(input_ids.cuda(), labels=target_ids) 541 | 542 | # loss is calculated using CrossEntropyLoss which averages over valid labels 543 | # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels 544 | # to the left by 1. 545 | neg_log_likelihood = outputs.loss 546 | 547 | nlls.append(neg_log_likelihood) 548 | 549 | prev_end_loc = end_loc 550 | if end_loc == seq_len: 551 | break 552 | 553 | ppl = torch.exp(torch.stack(nlls).mean()).item() 554 | context.log_result("perplexity", ppl) 555 | -------------------------------------------------------------------------------- /src/training_workflow.py: -------------------------------------------------------------------------------- 1 | import mlrun 2 | from kfp import dsl 3 | 4 | 5 | @dsl.pipeline(name="MLOps Bot Master Pipeline") 6 | def kfpipeline( 7 | html_links: str, 8 | model_name: str, 9 | pretrained_tokenizer: str, 10 | pretrained_model: str, 11 | epochs: str, 12 | use_deepspeed: bool, 13 | tokenizer_class: str = "transformers.AutoTokenizer", 14 | model_class: str = "transformers.AutoModelForCausalLM", 15 | ): 16 | # Get our project object: 17 | project = mlrun.get_current_project() 18 | 19 | # Collect Dataset: 20 | collect_dataset_run = mlrun.run_function( 21 | function="data-collecting", 22 | handler="collect_html_to_text_files", 23 | name="data-collection", 24 | params={"urls_file": html_links}, 25 | returns=["html-as-text-files:path"], 26 | ) 27 | 28 | # Dataset Preparation: 29 | prepare_dataset_run = mlrun.run_function( 30 | function="data-preparing", 31 | handler="prepare_dataset", 32 | name="data-preparation", 33 | inputs={"source_dir": collect_dataset_run.outputs["html-as-text-files"]}, 34 | returns=["html-data:dataset"], 35 | ) 36 | 37 | # Training: 38 | project.get_function("training") 39 | 40 | training_run = mlrun.run_function( 41 | function="training", 42 | name="train", 43 | inputs={"dataset": prepare_dataset_run.outputs["html-data"]}, 44 | params={ 45 | "model_name": model_name, 46 | "pretrained_tokenizer": pretrained_tokenizer, 47 | "pretrained_model": pretrained_model, 48 | "model_class": model_class, 49 | "tokenizer_class": tokenizer_class, 50 | "TRAIN_num_train_epochs": epochs, 51 | "use_deepspeed": use_deepspeed, 52 | }, 53 | handler="train", 54 | outputs=["model"], 55 | ) 56 | 57 | # evaluation: 58 | mlrun.run_function( 59 | function="training", 60 | name="evaluate", 61 | params={ 62 | "model_path": training_run.outputs["model"], 63 | "model_name": pretrained_model, 64 | "tokenizer_name": pretrained_tokenizer, 65 | }, 66 | inputs={"data": prepare_dataset_run.outputs["html-data"]}, 67 | handler="evaluate", 68 | ) 69 | -------------------------------------------------------------------------------- /tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "75cb6daf-ecdc-4129-8f28-ad871d3a795c", 6 | "metadata": {}, 7 | "source": [ 8 | "# Tutorial: Build & Deploy Custom (fine-tuned) LLM Models and Applications\n", 9 | "\n", 10 | "In the following tutorial you will learn how to operationalize a LLM using MLRun. We will build **MLOpsPedia** - The MLOps Master Bot, a chatbot for answering all your MLOps questions. We will do so by covering the two main stages in every MLOps project:\n", 11 | "\n", 12 | "* **Automated training pipeline** - Build an automated ML pipeline for data collection, data preparation, training and evaluation.\n", 13 | "* **Serving graph deployment** - Build, deploy and test in a Gradio application the newly trained LLM.\n", 14 | "\n", 15 | "**MLRun** is welcoming you to **LLMOps**!\n", 16 | "\n", 17 | "> Make sure you went over the basics in MLRun [Quick Start Tutorial](https://docs.mlrun.org/en/stable/tutorial/01-mlrun-basics.html) to understand the MLRun basics.\n", 18 | "\n", 19 | "Run the notebook in the following order (you may skip the first step):\n", 20 | "1. [Test the Pretrained Model](#test-the-pretrained-model)\n", 21 | "2. [Automated Training Pipeline](#automated-training-pipeline)\n", 22 | "3. [Application Serving Pipeline](#application-serving-pipeline)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "e9565c47-7720-47ca-ab0b-ac8a77286f90", 28 | "metadata": {}, 29 | "source": [ 30 | "But first, please install the following requirements:" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "cdf6b605-348d-4fd7-958d-d484446b5964", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "%pip install -r requirements.txt" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "906e38b6-168a-47cb-9320-2acdd16b0b37", 46 | "metadata": {}, 47 | "source": [ 48 | "___\n", 49 | "\n", 50 | "## 1. Test the Pretrained Model\n", 51 | "\n", 52 | "MLOpsPedia will be based on [falcon-7b](https://huggingface.co/tiiuae/falcon-7b). Before fine-tuning it, we want to see how it performs on some MLOps questions." 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "8d1a9b26-9916-47d8-9c89-1e0e7380bf57", 58 | "metadata": {}, 59 | "source": [ 60 | "### 1.1. Load `falcon-7b` from HuggingFace's Transformers Hub\n", 61 | "\n", 62 | "`falcon-7b` is fully supported by HuggingFace and have its own Model and Tokenizer classes. We will use them in a HuggingFace pipeline and test them out:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 1, 68 | "id": "2c763708-f0e5-4a53-b788-64e4c2634973", 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "application/vnd.jupyter.widget-view+json": { 74 | "model_id": "80910a4c2be34f7ab35b193f37c8e0bb", 75 | "version_major": 2, 76 | "version_minor": 0 77 | }, 78 | "text/plain": [ 79 | "Loading checkpoint shards: 0%| | 0/2 [00:00 str:\n", 116 | " return generator(prompt, \n", 117 | " generation_config=generation_config,\n", 118 | " max_length=50, pad_token_id=tokenizer.eos_token_id)[0][\"generated_text\"]" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 3, 124 | "id": "e1e98ec1-859e-4306-ac0e-be3b714ef5ef", 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "What is a serving pipeline?\n", 132 | "A serving pipeline is a set of tools that help you to create, manage, and deliver your content.\n", 133 | "What is a serving pipeline?\n", 134 | "A serving pipeline is a set of tools that help you to create,\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "print(prompt_to_response(prompt=\"What is a serving pipeline?\"))" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "id": "186841f5-c681-40bf-8467-68801cfca461", 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "What is MLops?\n", 153 | "MLops is a set of practices that help organizations to build, deploy, and manage machine learning models at scale.\n", 154 | "MLops is a set of practices that help organizations to build, deploy, and manage machine learning models\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "print(prompt_to_response(prompt=\"What is MLops?\"))" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "1203c3bc-b3a9-4b30-a7b3-c119a74e0e2d", 165 | "metadata": {}, 166 | "source": [ 167 | "As expected, `falcon-7b` is not that sharp on MLOps questions, but that's about to change..." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "26437209-fa75-496f-8d12-19751ba88530", 173 | "metadata": {}, 174 | "source": [ 175 | "___\n", 176 | "\n", 177 | "## 2. Automated Training Pipeline\n", 178 | "\n", 179 | "To get a `falcon-7b` that knows MLOps, we will fine tune it on [**Iguazio**'s MLOps blogs](https://www.iguazio.com/blog/). To do so, we will create a fully automated pipeline with the following steps:\n", 180 | "\n", 181 | "1. **Collect Data** - Collect all text from given html urls into `.txt` files, meaning we'll be getting all the MLOps blogs as text files.\n", 182 | "2. **Preprocess Data** - Join the `.txt` files, reformatting the text into our prompt template: \"Subject - Content\". We made every header (`` tags) a *subject* of a prompt, and the text (`

` tags) under it as its *content*.\n", 183 | "3. **Train** - Fine-tune the LLM on the data. We'll run the training on **OpenMPI**, and we will use **DeepSpeed** for distributing the model and data between multiple workers, splitting the work between nodes and GPUs. **MLRun will auto-log the entire training process**.\n", 184 | "4. **Evaluate** - Evaluate our model using the *Perplexity* metric.\n", 185 | "\n", 186 | "" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "id": "6e22570a-3c0e-4278-8385-e0d321bb9067", 192 | "metadata": {}, 193 | "source": [ 194 | "### 2.1. Define MLRun project and set all the MLRun functions\n", 195 | "\n", 196 | "Create or load an MLRun project that holds all your functions and configuration (see [project_setup.py](./src/project_setup.py))\n", 197 | "\n", 198 | "The project contains the following files where we'll set the functions from to build the workflow of the pipeline:\n", 199 | "* [data_collection.py](./src/data_collection.py) - to create an MLRun function with the `collect_html_to_text_files` handler.\n", 200 | "* [data_preprocess.py](./src/data_preprocess.py) - to create an MLRun function with the `prepare_dataset` handler.\n", 201 | "* [training]() - to create an MLRun function with the `train` and `evaluate` handlers.\n", 202 | "* [serving.py](./src/serving.py) - to create an MLRun function with all the serving graph steps (will be covered in section 3).\n", 203 | "\n", 204 | "In addition, the training pipeline is set to the project as well. It can be seen at [training_workflow.py](./src/training_workflow.py)\n", 205 | "\n", 206 | "The training and evaluation function we will use is [hugging_face_classifier_trainer](https://www.mlrun.org/hub/). It is taken from [**MLRun's Functions Hub**](https://docs.mlrun.org/en/stable/runtimes/load-from-hub.html) - a collection of ready to be imported functions for variety of use cases. We import the function during the project setup." 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 5, 212 | "id": "d771e4ba-43a4-4bcf-8ae0-d35c0f80d259", 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | "git://github.com/mlrun/demo-llm-tuning.git#main\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "import mlrun\n", 225 | "\n", 226 | "project = mlrun.load_project(\n", 227 | " name=\"mlopspedia-bot\",\n", 228 | " context=\"./\",\n", 229 | " user_project=True,\n", 230 | " parameters={\n", 231 | " \"source\": \"git://github.com/mlrun/demo-llm-tuning.git#main\",\n", 232 | " \"default_image\": \"yonishelach/mlrun-llm\",\n", 233 | " })" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "id": "a66deeb6-7472-4642-8a11-996422cd3091", 239 | "metadata": {}, 240 | "source": [ 241 | "### 2.2. Run full LLM life-cycle workflow\n", 242 | "\n", 243 | "Run the training pipeline by using `project.run(workflow name, ...)`. The steps on the piepline inputs and outputs are as follows:\n", 244 | "\n", 245 | "1. url link -> `collect_html_to_text_files` -> zip containing all url text files.\n", 246 | "2. zip containing all url text files -> `prepare_dataset` -> training set, evaluation set.\n", 247 | "3. training set -> `train` -> model, metrics, plots\n", 248 | "4. evaluation set, model -> `evaluate` -> metrics, plots" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 6, 254 | "id": "b1ea5ec6-cb78-44db-aac7-97e52ce591db", 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/html": [ 260 | "

Pipeline running (id=2012a80c-500b-43fb-ad03-abffd6ee2a6b), click here to view the details in MLRun UI
" 261 | ], 262 | "text/plain": [ 263 | "" 264 | ] 265 | }, 266 | "metadata": {}, 267 | "output_type": "display_data" 268 | }, 269 | { 270 | "data": { 271 | "image/svg+xml": [ 272 | "\n", 273 | "\n", 275 | "\n", 277 | "\n", 278 | "\n", 280 | "\n", 281 | "kfp\n", 282 | "\n", 283 | "\n", 284 | "\n", 285 | "mlops-bot-master-pipeline-zsk5k-1439426288\n", 286 | "\n", 287 | "evaluate\n", 288 | "\n", 289 | "\n", 290 | "\n", 291 | "mlops-bot-master-pipeline-zsk5k-2897139595\n", 292 | "\n", 293 | "data-preparation\n", 294 | "\n", 295 | "\n", 296 | "\n", 297 | "mlops-bot-master-pipeline-zsk5k-2897139595->mlops-bot-master-pipeline-zsk5k-1439426288\n", 298 | "\n", 299 | "\n", 300 | "\n", 301 | "\n", 302 | "\n", 303 | "mlops-bot-master-pipeline-zsk5k-930414823\n", 304 | "\n", 305 | "train\n", 306 | "\n", 307 | "\n", 308 | "\n", 309 | "mlops-bot-master-pipeline-zsk5k-2897139595->mlops-bot-master-pipeline-zsk5k-930414823\n", 310 | "\n", 311 | "\n", 312 | "\n", 313 | "\n", 314 | "\n", 315 | "mlops-bot-master-pipeline-zsk5k-930414823->mlops-bot-master-pipeline-zsk5k-1439426288\n", 316 | "\n", 317 | "\n", 318 | "\n", 319 | "\n", 320 | "\n", 321 | "mlops-bot-master-pipeline-zsk5k-915534038\n", 322 | "\n", 323 | "data-collection\n", 324 | "\n", 325 | "\n", 326 | "\n", 327 | "mlops-bot-master-pipeline-zsk5k-915534038->mlops-bot-master-pipeline-zsk5k-2897139595\n", 328 | "\n", 329 | "\n", 330 | "\n", 331 | "\n", 332 | "\n" 333 | ], 334 | "text/plain": [ 335 | "" 336 | ] 337 | }, 338 | "metadata": {}, 339 | "output_type": "display_data" 340 | }, 341 | { 342 | "data": { 343 | "text/html": [ 344 | "

Run Results

[info] Workflow 2012a80c-500b-43fb-ad03-abffd6ee2a6b finished, state=Succeeded


click the hyper links below to see detailed results
\n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | "
uidstartstatenameparametersresults
Jul 12 05:02:28completedevaluate
model_path=store://artifacts/mlopspedia-bot-yonis/falcon-7b-mlrun:2012a80c-500b-43fb-ad03-abffd6ee2a6b
model_name=tiiuae/falcon-7b
tokenizer_name=tiiuae/falcon-7b
perplexity=8.5703125
Jul 12 03:56:11completedtrain
model_name=falcon-7b-mlrun
pretrained_tokenizer=tiiuae/falcon-7b
pretrained_model=tiiuae/falcon-7b
model_class=transformers.AutoModelForCausalLM
tokenizer_class=transformers.AutoTokenizer
TRAIN_num_train_epochs=5
use_deepspeed=
loss=2.3346
learning_rate=0.0
train_runtime=3898.6792
train_samples_per_second=0.737
train_steps_per_second=0.046
total_flos=2.9304526258176e+16
Jul 12 03:55:46completeddata-preparation
Jul 12 03:53:50completeddata-collection
urls_file=/User/demo-llm-tuning/data/html_urls.txt
" 390 | ], 391 | "text/plain": [ 392 | "" 393 | ] 394 | }, 395 | "metadata": {}, 396 | "output_type": "display_data" 397 | } 398 | ], 399 | "source": [ 400 | "workflow_run = project.run(\n", 401 | " name=\"training_workflow\",\n", 402 | " arguments={\n", 403 | " \"html_links\": \"/User/demo-llm-tuning/data/html_urls.txt\",\n", 404 | " \"model_name\": \"falcon-7b-mlrun\",\n", 405 | " \"pretrained_tokenizer\": model_name,\n", 406 | " \"pretrained_model\": model_name,\n", 407 | " \"epochs\": 5,\n", 408 | " },\n", 409 | " watch=True,\n", 410 | " dirty=True,\n", 411 | " timeout=60 * 120,\n", 412 | ")" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "id": "a21444a9-b66b-4539-a1ea-13f745114fbb", 418 | "metadata": {}, 419 | "source": [ 420 | "#### 2.2.1. Distributed Training\n", 421 | "\n", 422 | "In the following image you can see the 16 workers that trained the model as part of an **MPIJob** and **DeepSpeed**.\n", 423 | "\n", 424 | "" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "id": "1b92e42a-844a-40c4-a09f-dbb4bfd1e23c", 430 | "metadata": {}, 431 | "source": [ 432 | "#### 2.2.2. UI Presentation\n", 433 | "\n", 434 | "Here we can see how the workflow looks on our UI, we can see the entire pipeline and the loss plot produced by the training step that is highlighted.\n", 435 | "\n", 436 | "" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "id": "11910dbc-8c31-4efd-b092-cdb1a649c4f2", 442 | "metadata": {}, 443 | "source": [ 444 | "___\n", 445 | "\n", 446 | "## 3. Application Serving Pipeline\n", 447 | "\n", 448 | "In this last part we'll serve our LLM using [MLRun Serving](https://docs.mlrun.org/en/stable/serving/serving-graph.html).\n", 449 | "\n", 450 | "MLRun serving can produce managed ML application pipelines using real-time auto-scaling [Nuclio](https://nuclio.io/) serverless functions. The application pipeline includes all the steps from accepting events or data, preparing the required model features, inferring results using one or more models, and driving actions.\n", 451 | "\n", 452 | "We'll build the following serving graph for chat application:\n", 453 | "\n", 454 | "* **Preprocess** (`preprocess`) - Fit the user prompt into out prompt structure (\"Subject - Content\") \n", 455 | "* **LLM** (`LLMModelServer`) - To serve our trained model and perform inferences to generate answers.\n", 456 | "* **Postprocess** (`postprocess`) - To see if our model generated text with confidence or not.\n", 457 | "* **Toxicity Filter** (`ToxicityClassifierModelServer`) - To serve a Hugging Face Evaluate package model and perform inferences to catch toxic prompt and responses.\n", 458 | "\n", 459 | "" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "id": "9ecefcef-3a59-4d32-a046-f11e987d7df4", 465 | "metadata": {}, 466 | "source": [ 467 | "### 3.1. Build our Serving Graph\n", 468 | "\n", 469 | "We'll first get the serving function with the code from our project (it was set in section 2.1.):" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 7, 475 | "id": "442e2d73-45fd-4264-92d1-9cfea8620066", 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "serving_function = project.get_function(\"serving\")" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 8, 485 | "id": "2fc82891-9f37-4c38-b3d7-84fdeb0abb25", 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [ 489 | "model_args = {\"load_in_8bit\": True, \"device_map\": \"cuda:0\", \"trust_remote_code\": True}" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "id": "23cc8fcf-f9ff-4175-bdb2-25d0f1b437ef", 495 | "metadata": {}, 496 | "source": [ 497 | "Now we'll build the serving graph:" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 9, 503 | "id": "08594367-5e87-4bf3-8598-d72a6759355b", 504 | "metadata": {}, 505 | "outputs": [ 506 | { 507 | "data": { 508 | "image/svg+xml": [ 509 | "\n", 510 | "\n", 512 | "\n", 514 | "\n", 515 | "\n", 517 | "\n", 518 | "mlrun-flow\n", 519 | "\n", 520 | "\n", 521 | "\n", 522 | "_start\n", 523 | "\n", 524 | "start\n", 525 | "\n", 526 | "\n", 527 | "\n", 528 | "preprocess\n", 529 | "\n", 530 | "preprocess\n", 531 | "\n", 532 | "\n", 533 | "\n", 534 | "_start->preprocess\n", 535 | "\n", 536 | "\n", 537 | "\n", 538 | "\n", 539 | "\n", 540 | "mlopspedia\n", 541 | "\n", 542 | "mlopspedia\n", 543 | "\n", 544 | "\n", 545 | "\n", 546 | "preprocess->mlopspedia\n", 547 | "\n", 548 | "\n", 549 | "\n", 550 | "\n", 551 | "\n", 552 | "postprocess\n", 553 | "\n", 554 | "postprocess\n", 555 | "\n", 556 | "\n", 557 | "\n", 558 | "mlopspedia->postprocess\n", 559 | "\n", 560 | "\n", 561 | "\n", 562 | "\n", 563 | "\n", 564 | "toxicity-classifier\n", 565 | "\n", 566 | "toxicity-classifier\n", 567 | "\n", 568 | "\n", 569 | "\n", 570 | "postprocess->toxicity-classifier\n", 571 | "\n", 572 | "\n", 573 | "\n", 574 | "\n", 575 | "\n" 576 | ], 577 | "text/plain": [ 578 | "" 579 | ] 580 | }, 581 | "execution_count": 9, 582 | "metadata": {}, 583 | "output_type": "execute_result" 584 | } 585 | ], 586 | "source": [ 587 | "# Set the topology and get the graph object:\n", 588 | "graph = serving_function.set_topology(\"flow\", engine=\"async\")\n", 589 | "\n", 590 | "# Add the steps:\n", 591 | "graph.to(handler=\"preprocess\", name=\"preprocess\") \\\n", 592 | " .to(\"LLMModelServer\",\n", 593 | " name=\"mlopspedia\",\n", 594 | " model_args=model_args,\n", 595 | " tokenizer_name=model_name,\n", 596 | " model_name=model_name,\n", 597 | " peft_model=project.get_artifact_uri(\"falcon-7b-mlrun\")) \\\n", 598 | " .to(handler=\"postprocess\", name=\"postprocess\") \\\n", 599 | " .to(\"ToxicityClassifierModelServer\",\n", 600 | " name=\"toxicity-classifier\",\n", 601 | " threshold=0.7).respond()\n", 602 | "\n", 603 | "# Plot to graph:\n", 604 | "serving_function.plot(rankdir='LR')" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "id": "426b91d1-649e-4ab0-8908-e8f2e2e54ceb", 610 | "metadata": {}, 611 | "source": [ 612 | "Lastly, we wish to add a GPU and save the configured function in the project:" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": 10, 618 | "id": "2efaff89-86de-4fa1-9bcb-cb97b0a34b7d", 619 | "metadata": {}, 620 | "outputs": [ 621 | { 622 | "data": { 623 | "text/plain": [ 624 | "" 625 | ] 626 | }, 627 | "execution_count": 10, 628 | "metadata": {}, 629 | "output_type": "execute_result" 630 | } 631 | ], 632 | "source": [ 633 | "# Configure (add a GPU and increase readiness timeout):\n", 634 | "serving_function.with_limits(gpus=1)\n", 635 | "serving_function.spec.readiness_timeout = 3000\n", 636 | "\n", 637 | "# Save the function to the project:\n", 638 | "project.set_function(serving_function, with_repo=True)\n", 639 | "project.save()" 640 | ] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "id": "b3637268-7349-4fdb-9baa-41ca0d41a94a", 645 | "metadata": {}, 646 | "source": [ 647 | "### 3.2. Deploy and Test the Application\n", 648 | "\n", 649 | "We will call the `deploy_function` and wait:" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": 11, 655 | "id": "dce5819e-aea0-48d2-9027-e85ce3b41aa2", 656 | "metadata": {}, 657 | "outputs": [ 658 | { 659 | "name": "stdout", 660 | "output_type": "stream", 661 | "text": [ 662 | "> 2023-07-12 05:03:41,703 [info] Starting remote function deploy\n", 663 | "2023-07-12 05:03:42 (info) Deploying function\n", 664 | "2023-07-12 05:03:42 (info) Building\n", 665 | "2023-07-12 05:03:42 (info) Staging files and preparing base images\n", 666 | "2023-07-12 05:03:42 (info) Building processor image\n", 667 | "2023-07-12 05:26:38 (info) Build complete\n", 668 | "2023-07-12 05:42:21 (info) Function deploy complete\n", 669 | "> 2023-07-12 05:42:23,182 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['mlopspedia-bot-yonis-serving-mlopspedia-bot-yonis.default-tenant.app.llm2.iguazio-cd0.com/']}\n" 670 | ] 671 | } 672 | ], 673 | "source": [ 674 | "# Deploy the serving function:\n", 675 | "deployment = mlrun.deploy_function(\"serving\")" 676 | ] 677 | }, 678 | { 679 | "cell_type": "markdown", 680 | "id": "939ce236-4347-404e-9d61-03e6773fbb28", 681 | "metadata": {}, 682 | "source": [ 683 | "Let's test the function manually on some prompts:" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 12, 689 | "id": "47a367e0-5b18-4032-b876-ff83bf5bf3a3", 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "generate_kwargs = {\"max_length\": 150, \"temperature\": 0.9, \"top_p\": 0.5, \"top_k\": 25, \"repetition_penalty\": 1.0}" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 13, 699 | "id": "4cc39d0d-c32e-43cd-974f-dda458e44d63", 700 | "metadata": {}, 701 | "outputs": [ 702 | { 703 | "name": "stdout", 704 | "output_type": "stream", 705 | "text": [ 706 | "> 2023-07-12 05:42:23,239 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080/predict'}\n", 707 | "MLRun is a complete open source MLOps orchestration platform that provides a single platform for building, training, deploying and managing ML applications at scale. MLRun is built on top of Iguazio’s open source data science platform and provides a unified framework for running data science and ML applications.\n", 708 | "MLRun provides:\n", 709 | "\n", 710 | "A single place to run and manage all ML workloads (from data science to production)\n", 711 | "A unified framework for running data science and ML applications\n", 712 | "A single place to run and manage all ML workloads (from data science to production)\n", 713 | "A unified framework for running data science and ML applications\n", 714 | "A unified framework for running data science and\n" 715 | ] 716 | } 717 | ], 718 | "source": [ 719 | "response = serving_function.invoke(path='/predict', body={\"prompt\": \"What is MLRun?\", **generate_kwargs})\n", 720 | "print(response[\"outputs\"])" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 14, 726 | "id": "a2cc218f-e9f1-490a-ab03-b0656f2bc0c1", 727 | "metadata": {}, 728 | "outputs": [ 729 | { 730 | "name": "stdout", 731 | "output_type": "stream", 732 | "text": [ 733 | "> 2023-07-12 05:42:45,916 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080/predict'}\n", 734 | "Machine learning is a subfield of artificial intelligence (AI) that focuses on algorithms that can learn from data and improve their performance over time. Machine learning algorithms can be used to build intelligent systems that can make decisions, learn from experience, and adapt to new situations.\n", 735 | "Machine learning algorithms are used in many areas of our daily lives, such as:\n", 736 | "\n", 737 | "Automated driving\n", 738 | "Speech recognition\n", 739 | "Image recognition\n", 740 | "Personalized recommendations\n", 741 | "\n", 742 | "Machine learning algorithms are used in the development of autonomous cars. The cars are able to navigate roads and react to situations in real time.\n", 743 | "Speech recognition algorithms are used in voice assistants like Siri and Alexa. They can recognize your voice\n" 744 | ] 745 | } 746 | ], 747 | "source": [ 748 | "response = serving_function.invoke(path='/predict', body={\"prompt\": \"What is machine learning?\", **generate_kwargs})\n", 749 | "print(response[\"outputs\"])" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 15, 755 | "id": "6aebc785-931c-4ea9-8cd1-ec11d8dc02b1", 756 | "metadata": {}, 757 | "outputs": [ 758 | { 759 | "name": "stdout", 760 | "output_type": "stream", 761 | "text": [ 762 | "> 2023-07-12 05:43:06,514 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080/predict'}\n", 763 | "This bot do not respond to toxicity.\n" 764 | ] 765 | } 766 | ], 767 | "source": [ 768 | "response = serving_function.invoke(path='/predict', body={\"prompt\": \"You are stupid!\", **generate_kwargs})\n", 769 | "print(response[\"outputs\"])" 770 | ] 771 | }, 772 | { 773 | "cell_type": "markdown", 774 | "id": "3caa7b5c-eed9-4fb2-b69d-4927a681f25c", 775 | "metadata": {}, 776 | "source": [ 777 | "Now, we'll set up a Gradio application and launch it:" 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": 16, 783 | "id": "4055d2ab-cebc-4456-acb6-80627040416a", 784 | "metadata": { 785 | "tags": [] 786 | }, 787 | "outputs": [], 788 | "source": [ 789 | "import json\n", 790 | "\n", 791 | "import gradio as gr\n", 792 | "import requests\n", 793 | "\n", 794 | "# Get the serving url to send requests to:\n", 795 | "serving_url = deployment.outputs[\"endpoint\"]\n", 796 | "\n", 797 | "\n", 798 | "def generate(prompt, temperature, max_length, top_p, top_k, repetition_penalty):\n", 799 | " # Build the request for our serving graph:\n", 800 | " inputs = {\n", 801 | " \"prompt\": prompt,\n", 802 | " \"temperature\": temperature,\n", 803 | " \"max_length\": max_length,\n", 804 | " \"top_p\": top_p,\n", 805 | " \"top_k\": top_k,\n", 806 | " \"repetition_penalty\": repetition_penalty,\n", 807 | " }\n", 808 | "\n", 809 | " # call the serving function with the request:\n", 810 | " resp = requests.post(serving_url, data=json.dumps(inputs).encode(\"utf-8\"))\n", 811 | "\n", 812 | " # Return the response:\n", 813 | " return resp.json()[\"outputs\"]\n", 814 | "\n", 815 | "\n", 816 | "# Set up a Gradio frontend application:\n", 817 | "with gr.Blocks(analytics_enabled=False, theme=gr.themes.Soft()) as demo:\n", 818 | " gr.Markdown(\n", 819 | " \"\"\"# LLM Playground\n", 820 | "Play with the `generate` configurations and see how they make the LLM's responses better or worse.\n", 821 | "\"\"\"\n", 822 | " )\n", 823 | " with gr.Row():\n", 824 | " with gr.Column(scale=5):\n", 825 | " with gr.Row():\n", 826 | " chatbot = gr.Chatbot()\n", 827 | " with gr.Row():\n", 828 | " prompt = gr.Textbox(label=\"Subject to ask about:\", placeholder=\"Type a question and Enter\")\n", 829 | "\n", 830 | " with gr.Column(scale=1):\n", 831 | " temperature = gr.Slider(minimum=0, maximum=1, value=0.9, label=\"Temperature\", info=\"Choose between 0 and 1\")\n", 832 | " max_length = gr.Slider(minimum=0, maximum=1500, value=150, label=\"Maximum length\", info=\"Choose between 0 and 1500\")\n", 833 | " top_p = gr.Slider(minimum=0, maximum=1, value=0.5, label=\"Top P\", info=\"Choose between 0 and 1\")\n", 834 | " top_k = gr.Slider(minimum=0, maximum=500, value=25, label=\"Top k\", info=\"Choose between 0 and 500\")\n", 835 | " repetition_penalty = gr.Slider(minimum=0, maximum=1, value=1, label=\"repetition penalty\", info=\"Choose between 0 and 1\")\n", 836 | " clear = gr.Button(\"Clear\")\n", 837 | "\n", 838 | " def respond(prompt, chat_history, temperature, max_length, top_p, top_k, repetition_penalty):\n", 839 | " bot_message = generate(prompt, temperature, max_length, top_p, top_k, repetition_penalty)\n", 840 | " chat_history.append((prompt, bot_message))\n", 841 | "\n", 842 | " return \"\", chat_history\n", 843 | "\n", 844 | " prompt.submit(respond, [prompt, chatbot, temperature, max_length, top_p, top_k, repetition_penalty], [prompt, chatbot])\n", 845 | " clear.click(lambda: None, None, chatbot, queue=False)\n" 846 | ] 847 | }, 848 | { 849 | "cell_type": "code", 850 | "execution_count": 17, 851 | "id": "0ef771d3-ecb1-4cde-a9bc-6ebf8b76d37e", 852 | "metadata": {}, 853 | "outputs": [ 854 | { 855 | "name": "stdout", 856 | "output_type": "stream", 857 | "text": [ 858 | "Running on local URL: http://127.0.0.1:7860\n", 859 | "Running on public URL: https://b47d16a4d0489c6dde.gradio.live\n", 860 | "\n", 861 | "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" 862 | ] 863 | }, 864 | { 865 | "data": { 866 | "text/html": [ 867 | "
" 868 | ], 869 | "text/plain": [ 870 | "" 871 | ] 872 | }, 873 | "metadata": {}, 874 | "output_type": "display_data" 875 | }, 876 | { 877 | "data": { 878 | "text/plain": [] 879 | }, 880 | "execution_count": 17, 881 | "metadata": {}, 882 | "output_type": "execute_result" 883 | } 884 | ], 885 | "source": [ 886 | "demo.launch(share=True, height=685)" 887 | ] 888 | }, 889 | { 890 | "cell_type": "markdown", 891 | "id": "ef6b3b68", 892 | "metadata": {}, 893 | "source": [ 894 | "" 895 | ] 896 | }, 897 | { 898 | "cell_type": "code", 899 | "execution_count": null, 900 | "id": "803b4824", 901 | "metadata": { 902 | "collapsed": false, 903 | "jupyter": { 904 | "outputs_hidden": false 905 | } 906 | }, 907 | "outputs": [], 908 | "source": [] 909 | } 910 | ], 911 | "metadata": { 912 | "kernelspec": { 913 | "display_name": "mlrun-base", 914 | "language": "python", 915 | "name": "conda-env-mlrun-base-py" 916 | }, 917 | "language_info": { 918 | "codemirror_mode": { 919 | "name": "ipython", 920 | "version": 3 921 | }, 922 | "file_extension": ".py", 923 | "mimetype": "text/x-python", 924 | "name": "python", 925 | "nbconvert_exporter": "python", 926 | "pygments_lexer": "ipython3", 927 | "version": "3.9.16" 928 | } 929 | }, 930 | "nbformat": 4, 931 | "nbformat_minor": 5 932 | } 933 | --------------------------------------------------------------------------------