├── .devcontainer
    ├── Dockerfile.txt
    └── devcontainer.json
├── .flake8
├── .github
    └── workflows
    │   └── ci.yaml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── compose.yaml
├── data
    └── html_urls.txt
├── dev-requirements.txt
├── images
    ├── 16-workers-training.png
    ├── gradio.png
    ├── hf-ds-mlrun.png
    ├── serving-graph.png
    ├── training-pipeline.png
    ├── video-thumbnail.png
    └── workflow-train.png
├── mlrun.env
├── project.yaml
├── project_setup.py
├── pyproject.toml
├── requirements.txt
├── setup.py
├── src
    ├── data_collection.py
    ├── data_preprocess.py
    ├── serving.py
    ├── trainer.py
    └── training_workflow.py
└── tutorial.ipynb


/.devcontainer/Dockerfile.txt:
--------------------------------------------------------------------------------
 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.236.0/containers/python-3/.devcontainer/base.Dockerfile
 2 | 
 3 | # [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
 4 | ARG VARIANT="3.8-bullseye"
 5 | FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 6 | 
 7 | # [Choice] Node.js version: none, lts/*, 16, 14, 12, 10
 8 | ARG NODE_VERSION="none"
 9 | RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi
10 | 
11 | # [Optional] If your pip requirements rarely change, uncomment this section to add them to the image.
12 | COPY requirements.txt /tmp/pip-tmp/
13 | RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
14 |     && rm -rf /tmp/pip-tmp
15 | 
16 | # [Optional] Uncomment this section to install additional OS packages.
17 | # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
18 | #     && apt-get -y install --no-install-recommends <your-package-list-here>
19 | 
20 | # [Optional] Uncomment this line to install global node packages.
21 | # RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g <your-package-here>" 2>&1
22 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.236.0/containers/python-3
 3 | {
 4 | 	"name": "MLRun NYC Taxi Tutorial",
 5 | 	"build": {
 6 | 		"dockerfile": "Dockerfile",
 7 | 		"context": "..",
 8 | 		"args": { 
 9 | 			// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
10 | 			// Append -bullseye or -buster to pin to an OS version.
11 | 			// Use -bullseye variants on local on arm64/Apple Silicon.
12 | 			"VARIANT": "3.8",
13 | 			// Options
14 | 			"NODE_VERSION": "none"
15 | 		}
16 | 	},
17 | 	"containerEnv": {
18 | 		"MLRUN_ENV_FILE": "${containerWorkspaceFolder}/mlrun.env",
19 | 		"SHARED_DIR": "~/mlrun-data",
20 | 		"MLRUN_TAG": "1.2.0-rc21"
21 | 	},
22 | 	// Configure tool-specific properties.
23 | 	"customizations": {
24 | 		// Configure properties specific to VS Code.
25 | 		"vscode": {
26 | 			// Set *default* container specific settings.json values on container create.
27 | 			"settings": { 
28 | 				"python.defaultInterpreterPath": "/usr/local/bin/python",
29 | 				"python.linting.enabled": true,
30 | 				"python.linting.pylintEnabled": true,
31 | 				"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
32 | 				"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
33 | 				"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
34 | 				"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
35 | 				"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
36 | 				"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
37 | 				"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
38 | 				"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
39 | 				"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
40 | 			},
41 | 			
42 | 			// Add the IDs of extensions you want installed when the container is created.
43 | 			"extensions": [
44 | 				"ms-python.python",
45 | 				"ms-python.vscode-pylance"
46 | 			]
47 | 		}
48 | 	},
49 | 
50 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
51 | 	"forwardPorts": [8060, 8070, 8080],
52 | 	// "runArgs": [ "--network", "host"],
53 | 	"portsAttributes": {"8060": {"label": "MLRun UI"}, "8070": {"label": "Nuclio UI"}, "8080": {"label": "MLRun API"}},
54 | 
55 | 	// Use 'postCreateCommand' to run commands after the container is created.
56 | 	// "postCreateCommand": "chmod +x /workspaces/tutorials/start.sh",
57 | 	// "postStartCommand": "echo XXX=$(ip route get 1.2.3.4 | awk '{print $7}') > xx.env",
58 | 
59 | 	// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
60 | 	"remoteUser": "vscode",
61 | 	"features": {
62 | 		"docker-from-docker": "latest",
63 | 		"git": "latest",
64 | 		"jupyterlab": "latest"
65 | 	},
66 | 	"hostRequirements": {
67 | 	   "cpus": 4,
68 | 	   "memory": "8gb",
69 | 	   "storage": "32gb"
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | extend-ignore = E203, W503
4 | 
5 | # exclude these dirs
6 | exclude = .git,venv,playground
7 | 
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |     - development
 7 |   push:
 8 |     branches:
 9 |     - main
10 | 
11 | jobs:
12 |   lint:
13 |     name: Lint code (Python ${{ matrix.python-version }})
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         python-version: [3.9]
18 |     steps:
19 |     - uses: actions/checkout@v3
20 |     - name: Set up python ${{ matrix.python-version }}
21 |       uses: actions/setup-python@v4
22 |       with:
23 |         python-version: ${{ matrix.python-version }}
24 |     - uses: actions/cache@v2
25 |       with:
26 |         path: ~/.cache/pip
27 |         key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/dev-requirements.txt') }}
28 |         restore-keys: |
29 |           ${{ runner.os }}-pip-${{ matrix.python-version }}-
30 |           ${{ runner.os }}-pip-
31 |     - name: Install dependencies
32 |       run: |
33 |         python -m pip install --upgrade pip~=22.3.0
34 |         pip install -r dev-requirements.txt
35 |     - name: Lint
36 |       run: make lint
37 | 
38 | 
39 |   tests:
40 |     name: Run tests (Python ${{ matrix.python-version }})
41 |     runs-on: ubuntu-latest
42 |     strategy:
43 |       matrix:
44 |         python-version: [3.9]
45 |     steps:
46 |     - uses: actions/checkout@v3
47 |     - name: Set up python ${{ matrix.python-version }}
48 |       uses: actions/setup-python@v4
49 |       with:
50 |         python-version: ${{ matrix.python-version }}
51 |     - uses: actions/cache@v2
52 |       with:
53 |         path: ~/.cache/pip
54 |         key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}
55 |         restore-keys: |
56 |           ${{ runner.os }}-pip-${{ matrix.python-version }}-
57 |           ${{ runner.os }}-pip-
58 |     - name: Install automation scripts dependencies and add mlrun to dev packages
59 |       run: pip install -r requirements.txt -r dev-requirements.txt
60 |     - name: Test package
61 |       run: make test
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mlrun/ml-models-gpu:1.3.0
2 | RUN pip install -U transformers[deepspeed]
3 | RUN pip install -U datasets
4 | RUN pip install -U accelerate
5 | RUN pip install -U evaluate
6 | RUN pip install -U protobuf==3.20.*
7 | RUN pip install -U mpi4py
8 | RUN conda install -c "nvidia/label/cuda-11.7.1" cuda-nvprof


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | PYTHON_INTERPRETER = python3
 3 | SHARED_DIR ?= ~/mlrun-data
 4 | MLRUN_TAG ?= 1.3.0
 5 | HOST_IP ?=$$(ip route get 1.2.3.4 | awk '{print $$7}')
 6 | CONDA_ENV ?= mlrun
 7 | SHELL=/bin/bash
 8 | CONDA_PY_VER ?= 3.9
 9 | CONDA_ACTIVATE = source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate
10 | 
11 | #################################################################################
12 | # COMMANDS                                                                      #
13 | #################################################################################
14 | 
15 | .PHONY: help
16 | help: ## Display available commands
17 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
18 | 
19 | .PHONY: all
20 | all:
21 | 	$(error please pick a target)
22 | 
23 | .PHONY: install-requirements
24 | install-requirements: ## Install all requirements needed for development
25 | 	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt -r dev-requirements.txt
26 | 
27 | 
28 | .PHONY: package-wheel
29 | package-wheel: clean ## Build python package wheel
30 | 	python setup.py bdist_wheel
31 | 
32 | .PHONY: clean
33 | clean: ## Clean python package build artifacts
34 | 	rm -rf build
35 | 	rm -rf dist
36 | 	find . -type f -name "*.py[co]" -delete
37 | 	find . -type d -name "__pycache__" -delete
38 | 
39 | .PHONY: fmt
40 | fmt: ## Format the code (using black and isort)
41 | 	@echo "Running black fmt..."
42 | 	$(PYTHON_INTERPRETER) -m black src
43 | 	$(PYTHON_INTERPRETER) -m isort src
44 | 
45 | .PHONY: lint
46 | lint: fmt-check flake8 ## Run lint on the code
47 | 
48 | .PHONY: fmt-check
49 | fmt-check: ## Format and check the code (using black and isort)
50 | 	@echo "Running black+isort fmt check..."
51 | 	$(PYTHON_INTERPRETER) -m black --check --diff src
52 | 	$(PYTHON_INTERPRETER) -m isort --check --diff src
53 | 
54 | .PHONY: flake8
55 | flake8: ## Run flake8 lint
56 | 	@echo "Running flake8 lint..."
57 | 	$(PYTHON_INTERPRETER) -m flake8 src
58 | 
59 | .PHONY: mlrun-docker
60 | mlrun-docker: ## Start MLRun & Nuclio containers (using Docker compose)
61 | 	mkdir $(SHARED_DIR) -p
62 | 	@echo "HOST_IP=$(HOST_IP)" > .env
63 | 	SHARED_DIR=$(SHARED_DIR) TAG=$(MLRUN_TAG) docker-compose -f compose.yaml up -d
64 | 	@echo "use docker-compose stop / logs commands to stop or view logs"
65 | 
66 | .PHONY: mlrun-api
67 | mlrun-api: ## Run MLRun DB locally (as process)
68 | 	@echo "Installing MLRun API dependencies ..."
69 | 	$(PYTHON_INTERPRETER) -m pip install uvicorn~=0.17.0 dask-kubernetes~=0.11.0 apscheduler~=3.6 sqlite3-to-mysql~=1.4
70 | 	@echo "Starting local mlrun..."
71 | 	MLRUN_ARTIFACT_PATH=$$(realpath ./artifacts) MLRUN_ENV_FILE= mlrun db -b
72 | 
73 | .PHONY: conda-env
74 | conda-env: ## Create a conda environment
75 | 	@echo "Creating new conda environment $(CONDA_ENV)..."
76 | 	conda create -n $(CONDA_ENV) -y python=$(CONDA_PY_VER) ipykernel graphviz pip
77 | 	test -s ./mlrun.env && conda env config vars set -n $(CONDA_ENV) MLRUN_ENV_FILE=$$(realpath ./mlrun.env)
78 | 	@echo "Installing requirements.txt..."
79 | 	$(CONDA_ACTIVATE) $(CONDA_ENV); pip install -r requirements.txt
80 | 	@echo -e "\nTo run mlrun API as a local process type:\n  conda activate $(CONDA_ENV) && make mlrun-api"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # **MLOpsPedia** - The MLOps Master Bot
 2 | 
 3 | <img src="./images/hf-ds-mlrun.png" alt="huggingface-mlrun" style="width: 500px"/>
 4 | 
 5 | This demo demonstrates how to fine tune a LLM and build an ML application: the **MLOps master bot**! We'll train  [`falcon-7b`](https://huggingface.co/tiiuae/falcon-7b) on [**Iguazio**'s MLOps blogs](https://www.iguazio.com/blog/) and cover how easy it is to take a model and code from development to production. Even if its a big scary LLM model, MLRun will take care of the dirty work!
 6 | 
 7 | We will use:
 8 | * [**HuggingFace**](https://huggingface.co/) - as the main machine learning framework to get the model and tokenizer.
 9 | * [**DeepSpeed**](https://www.deepspeed.ai/) - as the distributed training framework.
10 | * and [**MLRun**](https://www.mlrun.org/) - as the orchastraitor to operationalize it, moving it from development to production.
11 | 
12 | The demo contains a single [notebook](./tutorial.ipynb) that covers the two main stages in every MLOps project: 
13 | 
14 | * **Training Pipeline Automation** - Demonstrating how to get an existing model (`falcon-7b`) from HuggingFace's Transformers package and operationalize it through all of its life cycle phases: data collection, data ppreparation, training and evaluation, as a fully automated pipeline.
15 | * **Application Serving Pipeline** - Showing how to productize the newly trained LLM as a serverless function.
16 | 
17 | You can find all the python source code under [/src](./src)
18 | 
19 | [<img src="./images/video-thumbnail.png" style="width: 700px"/>](http://www.youtube.com/watch?v=aAU54bTH6_o "MLOps for Generative AI with MLRun")
20 | 
21 | Be sure to check out Yaron Haviv's video [Deploying Hugging Face Models to Production at Scale with GPUs](http://www.youtube.com/watch?v=aAU54bTH6_o)
22 | to get a walkthrough of a simillar demo.
23 | 
24 | ___
25 | <a id="installation"></a>
26 | ## Installation
27 | 
28 | This project can run in different development environments:
29 | * Local computer (using PyCharm, VSCode, Jupyter, etc.)
30 | * Inside GitHub Codespaces 
31 | * Other managed Jupyter environments
32 | 
33 | ### Install the code and mlrun client 
34 | 
35 | To get started, fork this repo into your GitHub account and clone it into your development environment.
36 | 
37 | To install the package dependencies (not required in GitHub codespaces) use:
38 |  
39 |     make install-requirements
40 |     
41 | If you prefer to use Conda use this instead (to create and configure a conda env):
42 | 
43 |     make conda-env
44 | 
45 | > Make sure you open the notebooks and select the `mlrun` conda environment 
46 |  
47 | ### Install or connect to MLRun service/cluster
48 | 
49 | The MLRun service and computation can run locally (minimal setup) or over a remote Kubernetes environment.
50 | 
51 | If your development environment support docker and have enough CPU resources run:
52 | 
53 |     make mlrun-docker
54 |     
55 | > MLRun UI can be viewed in: http://localhost:8060
56 |     
57 | If your environment is minimal, run mlrun as a process (no UI):
58 | 
59 |     [conda activate mlrun &&] make mlrun-api
60 |  
61 | For MLRun to run properly you should set your client environment, this is not required when using **codespaces**, the mlrun **conda** environment, or **iguazio** managed notebooks.
62 | 
63 | Your environment should include `MLRUN_ENV_FILE=<absolute path to the ./mlrun.env file> ` (point to the mlrun .env file 
64 | in this repo), see [mlrun client setup](https://docs.mlrun.org/en/latest/install/remote.html) instructions for details.  
65 |      
66 | > Note: You can also use a remote MLRun service (over Kubernetes), instead of starting a local mlrun, 
67 | > edit the [mlrun.env](./mlrun.env) and specify its address and credentials  
68 | 


--------------------------------------------------------------------------------
/compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   init_nuclio:
 3 |     image: alpine:3.16
 4 |     command:
 5 |       - "/bin/sh"
 6 |       - "-c"
 7 |       - |
 8 |         mkdir -p /etc/nuclio/config/platform; \
 9 |         cat << EOF | tee /etc/nuclio/config/platform/platform.yaml
10 |         runtime:
11 |           common:
12 |             env:
13 |               MLRUN_DBPATH: http://${HOST_IP:?err}:8080
14 |         local:
15 |           defaultFunctionContainerNetworkName: mlrun
16 |           defaultFunctionRestartPolicy:
17 |             name: always
18 |             maxRetryCount: 0
19 |           defaultFunctionVolumes:
20 |             - volume:
21 |                 name: mlrun-stuff
22 |                 hostPath:
23 |                   path: ${SHARED_DIR:?err}
24 |               volumeMount:
25 |                 name: mlrun-stuff
26 |                 mountPath: /home/jovyan/data/
27 |         logger:
28 |           sinks:
29 |             myStdoutLoggerSink:
30 |               kind: stdout
31 |           system:
32 |             - level: debug
33 |               sink: myStdoutLoggerSink
34 |           functions:
35 |             - level: debug
36 |               sink: myStdoutLoggerSink
37 |         EOF
38 |     volumes:
39 |       - nuclio-platform-config:/etc/nuclio/config
40 | 
41 |   mlrun-api:
42 |     image: "mlrun/mlrun-api:${TAG:-1.1.2}"
43 |     ports:
44 |       - "8080:8080"
45 |     environment:
46 |       MLRUN_ARTIFACT_PATH: "${SHARED_DIR}/{{project}}"
47 |       # using local storage, meaning files / artifacts are stored locally, so we want to allow access to them
48 |       MLRUN_HTTPDB__REAL_PATH: /data
49 |       MLRUN_HTTPDB__DATA_VOLUME: "${SHARED_DIR}"
50 |       MLRUN_LOG_LEVEL: DEBUG
51 |       MLRUN_NUCLIO_DASHBOARD_URL: http://nuclio:8070
52 |       MLRUN_HTTPDB__DSN: "sqlite:////data/mlrun.db?check_same_thread=false"
53 |       MLRUN_UI__URL: http://localhost:8060
54 |       # not running on k8s meaning no need to store secrets
55 |       MLRUN_SECRET_STORES__KUBERNETES__AUTO_ADD_PROJECT_SECRETS: "false"
56 |       # let mlrun control nuclio resources
57 |       MLRUN_HTTPDB__PROJECTS__FOLLOWERS: "nuclio"
58 |     volumes:
59 |       - "${SHARED_DIR:?err}:/data"
60 |     networks:
61 |       - mlrun
62 | 
63 |   mlrun-ui:
64 |     image: "mlrun/mlrun-ui:${TAG:-1.1.2}"
65 |     ports:
66 |       - "8060:8090"
67 |     environment:
68 |       MLRUN_API_PROXY_URL: http://mlrun-api:8080
69 |       MLRUN_NUCLIO_MODE: enable
70 |       MLRUN_NUCLIO_API_URL: http://nuclio:8070
71 |       MLRUN_NUCLIO_UI_URL: http://localhost:8070
72 |     networks:
73 |       - mlrun
74 | 
75 |   nuclio:
76 |     image: "quay.io/nuclio/dashboard:${NUCLIO_TAG:-stable-amd64}"
77 |     ports:
78 |       - "8070:8070"
79 |     environment:
80 |       NUCLIO_DASHBOARD_EXTERNAL_IP_ADDRESSES: "${HOST_IP:?err}"
81 |     volumes:
82 |       - /var/run/docker.sock:/var/run/docker.sock
83 |       - nuclio-platform-config:/etc/nuclio/config
84 |     depends_on:
85 |       - init_nuclio
86 |     networks:
87 |       - mlrun
88 | 
89 | volumes:
90 |   nuclio-platform-config: {}
91 | 
92 | networks:
93 |   mlrun:
94 |     name: mlrun
95 | 


--------------------------------------------------------------------------------
/data/html_urls.txt:
--------------------------------------------------------------------------------
  1 | https://www.iguazio.com/blog/iguazio-releases-data-science-platform-version-2-8/
  2 | https://www.iguazio.com/blog/intelligent-edge-iguazio-google/
  3 | https://www.iguazio.com/blog/top-9-odsc-europe-sessions-you-cant-miss/
  4 | https://www.iguazio.com/blog/cloud-native-will-shake-up-enterprise-storage/
  5 | https://www.iguazio.com/blog/building-an-automated-ml-pipeline-with-a-feature-store-using-iguazio-snowflake/
  6 | https://www.iguazio.com/blog/concept-drift-and-the-impact-of-covid-19-on-data-science/
  7 | https://www.iguazio.com/blog/odsc-east-boston-2022-top-11-sessions-for-ai-and-ml-professionals-to-attend/
  8 | https://www.iguazio.com/blog/idc-mlopmarketscape-2022/
  9 | https://www.iguazio.com/blog/iguazio-listed-in-7-gartner-hype-cycles-for-2021/
 10 | https://www.iguazio.com/blog/announcing-the-winners-mlops-for-good-hackathon/
 11 | https://www.iguazio.com/blog/the-importance-of-data-storytelling-in-shaping-a-data-science-product/
 12 | https://www.iguazio.com/blog/modernize-it-infrastructure/
 13 | https://www.iguazio.com/blog/implementing-automation-and-an-mlops-framework-for-enterprise-scale-ml/
 14 | https://www.iguazio.com/blog/automating-ml-pipelines-on-azure-and-azure-stack/
 15 | https://www.iguazio.com/blog/real-time-streaming-for-data-science/
 16 | https://www.iguazio.com/blog/dcos-apps/
 17 | https://www.iguazio.com/blog/iguazio-receives-an-honorable-mention-in-the-2021-gartner-magic-quadrant-for-data-science-and-machine-learning-platforms/
 18 | https://www.iguazio.com/blog/gartner-2022-market-guide-for-dsml-engineering-platforms/
 19 | https://www.iguazio.com/blog/can-open-source-serverless-be-simpler-than-lambda/
 20 | https://www.iguazio.com/blog/cncf-webinar-serverless-ai/
 21 | https://www.iguazio.com/blog/2018-can-cloud-big-data-ai-stand-turmoil/
 22 | https://www.iguazio.com/blog/2022-predictions/
 23 | https://www.iguazio.com/blog/mlops-for-python/
 24 | https://www.iguazio.com/blog/mlops-predictions-for-2023/
 25 | https://www.iguazio.com/blog/adopting-a-production-first-approach-to-enterprise-ai/
 26 | https://www.iguazio.com/blog/from-automl-to-automlops/
 27 | https://www.iguazio.com/blog/odscwest2021/
 28 | https://www.iguazio.com/blog/top-10-recommended-mlops-world-2021-sessions/
 29 | https://www.iguazio.com/blog/breaking-the-silos-between-data-scientists-engineers-and-devops-with-new-mlops-practices/
 30 | https://www.iguazio.com/blog/top-8-machine-learning-resources-for-data-scientists-data-engineers-and-everyone/
 31 | https://www.iguazio.com/blog/azure-synapse-analytics-and-iguazio/
 32 | https://www.iguazio.com/blog/how-to-tap-into-higher-level-abstraction-efficiency-automation-to-simplify-your-ai-ml-journey/
 33 | https://www.iguazio.com/blog/how-seagate-runs-advanced-manufacturing-at-scale-with-iguazio/
 34 | https://www.iguazio.com/blog/predictive-real-time-operational-ml-pipeline-fighting-customer-churn/
 35 | https://www.iguazio.com/blog/build-an-ai-app-in-under-20-minutes/
 36 | https://www.iguazio.com/blog/deploying-machine-learning-models-for-real-time-predictions-checklist/
 37 | https://www.iguazio.com/blog/data-science-post-hadoop/
 38 | https://www.iguazio.com/blog/wanted-a-faster-storage-stack/
 39 | https://www.iguazio.com/blog/kubernetes-the-open-scalable-approach-to-ml-pipelines/
 40 | https://www.iguazio.com/blog/vmware-on-aws-a-scorecard-for-winners-and-losers/
 41 | https://www.iguazio.com/blog/aws-reinvent-data-serverless-ai/
 42 | https://www.iguazio.com/blog/beyond-hyped-iguazio-named-in-8-gartner-hype-cycles-for-2022/
 43 | https://www.iguazio.com/blog/ai-ml-and-roi-why-your-balance-sheet-cares-about-your-technology-choices/
 44 | https://www.iguazio.com/blog/orchestrating-ml-pipelines-scale-kubeflow/
 45 | https://www.iguazio.com/blog/using-automated-model-management-for-cpg-trade-success/
 46 | https://www.iguazio.com/blog/spark-over-kubernetes/
 47 | https://www.iguazio.com/blog/announcing-iguazio-version-3-0-breaking-the-silos-for-faster-deployment/
 48 | https://www.iguazio.com/blog/the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml-part-4/
 49 | https://www.iguazio.com/blog/accelerating-ml-deployment-in-hybrid-environments/
 50 | https://www.iguazio.com/blog/it-worked-fine-in-jupyter-now-what/
 51 | https://www.iguazio.com/blog/kubeflow-vs-mlflow-vs-mlrun/
 52 | https://www.iguazio.com/blog/part-one-the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml/
 53 | https://www.iguazio.com/blog/handling-large-datasets-with-mlops-dask-on-kubernetes/
 54 | https://www.iguazio.com/blog/faster-ai-development-serverless/
 55 | https://www.iguazio.com/blog/nuclio-future-serverless-computing/
 56 | https://www.iguazio.com/blog/how-to-build-real-time-feature-engineering-with-a-feature-store/
 57 | https://www.iguazio.com/blog/nyc-meetup-jan2018/
 58 | https://www.iguazio.com/blog/distributed-feature-store-ingestion-with-iguazio-snowflake-and-spark/
 59 | https://www.iguazio.com/blog/iguazio-raises-33m-accelerate-digital-transformation/
 60 | https://www.iguazio.com/blog/the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml-part-2/
 61 | https://www.iguazio.com/blog/serverless-can-it-simplify-data-science-projects/
 62 | https://www.iguazio.com/blog/machine-learning-hard/
 63 | https://www.iguazio.com/blog/free-manufacturing-datasets/
 64 | https://www.iguazio.com/blog/building-real-time-ml-pipelines-with-a-feature-store/
 65 | https://www.iguazio.com/blog/paving-the-data-science-dirt-road/
 66 | https://www.iguazio.com/blog/horovod-for-deep-learning-on-a-gpu-cluster/
 67 | https://www.iguazio.com/blog/using-containers-as-mini-vms-is-not-cloud-native/
 68 | https://www.iguazio.com/blog/top-9-recommended-odsc-europe-2021-sessions/
 69 | https://www.iguazio.com/blog/realtime-bigdata/
 70 | https://www.iguazio.com/blog/python-pandas-performance/
 71 | https://www.iguazio.com/blog/iguazio-rvmworld-2017-vmware-feeds-off-openstack-decay/
 72 | https://www.iguazio.com/blog/how-gpuaas-on-kubeflow-can-boost-your-productivity/
 73 | https://www.iguazio.com/blog/mlops-nyc-sessions/
 74 | https://www.iguazio.com/blog/2017-predictions-clouds-thunder-and-fog/
 75 | https://www.iguazio.com/blog/odsc-east-2023/
 76 | https://www.iguazio.com/blog/join-us-at-nvidia-gtc-2021/
 77 | https://www.iguazio.com/blog/mckinsey-acquires-iguazio-our-startups-journey/
 78 | https://www.iguazio.com/blog/git-based-ci-cd-for-machine-learning-mlops/
 79 | https://www.iguazio.com/blog/mlops-for-good-hackathon-roundup/
 80 | https://www.iguazio.com/blog/big-data-must-begin-with-clean-slate/
 81 | https://www.iguazio.com/blog/suse-iguazio/
 82 | https://www.iguazio.com/blog/how-to-run-workloads-on-spark-operator-with-dynamic-allocation-using-mlrun/
 83 | https://www.iguazio.com/blog/will-kubernetes-sink-the-hadoop-ship/
 84 | https://www.iguazio.com/blog/5-incredible-data-science-solutions-for-real-world-problems/
 85 | https://www.iguazio.com/blog/mlops-challenges-solutions-future-trends/
 86 | https://www.iguazio.com/blog/cloud-data-services-sprawl-its-complicated/
 87 | https://www.iguazio.com/blog/predicting-1st-day-churn-in-real-time/
 88 | https://www.iguazio.com/blog/machine-learning-experiment-tracking-from-zero-to-hero-in-2-lines-of-code/
 89 | https://www.iguazio.com/blog/how-to-bring-breakthrough-performance-and-productivity-to-ai-ml-projects/
 90 | https://www.iguazio.com/blog/how-to-deploy-an-mlrun-project-in-a-ci-cd-process-with-jenkins-pipeline/
 91 | https://www.iguazio.com/blog/iguazio-named-in-forresters-now-tech-ai-ml-platforms-q1-2022/
 92 | https://www.iguazio.com/blog/the-complete-guide-to-using-the-iguazio-feature-store-with-azure-ml-part-3/
 93 | https://www.iguazio.com/blog/what-are-feature-stores-and-why-are-they-critical-for-scaling-data-science/
 94 | https://www.iguazio.com/blog/reinventing-data-services/
 95 | https://www.iguazio.com/blog/re-structure-in-big-data/
 96 | https://www.iguazio.com/blog/top-22-free-healthcare-datasets-for-machine-learning/
 97 | https://www.iguazio.com/blog/operationalizing-machine-learning-for-the-automotive-future/
 98 | https://www.iguazio.com/blog/automating-mlops-for-deep-learning-how-to-operationalize-dl-with-minimal-effort/
 99 | https://www.iguazio.com/blog/iguazio-named-a-fast-moving-leader-by-gigaom-in-the-radar-for-mlops-report/
100 | https://www.iguazio.com/blog/data-science-salon-review-elevating-data-science-practices-for-media-entertainment-advertising/
101 | https://www.iguazio.com/blog/wrapping-up-serverless-nyc-2018/
102 | https://www.iguazio.com/blog/the-next-gen-digital-transformation-cloud-native-data-platforms/
103 | https://www.iguazio.com/blog/best-practices-for-succeeding-with-mlops/
104 | https://www.iguazio.com/blog/did-amazon-just-kill-open-source/
105 | https://www.iguazio.com/blog/cloud-native-storage-primer/
106 | https://www.iguazio.com/blog/serverless-background-challenges-and-future/
107 | https://www.iguazio.com/blog/experiment-tracking/
108 | https://www.iguazio.com/blog/continuous-analytics-real-time-meets-cloud-native/
109 | https://www.iguazio.com/blog/concept-drift-deep-dive-how-to-build-a-drift-aware-ml-system/
110 | https://www.iguazio.com/blog/building-ml-pipelines-over-federated-data-compute-environments/
111 | https://www.iguazio.com/blog/top-8-recommended-mlops-world-2022-sessions/
112 | https://www.iguazio.com/blog/it-vendors-dont-stand-a-chance-against-the-cloud/
113 | https://www.iguazio.com/blog/ml-workflows-what-can-you-automate/
114 | https://www.iguazio.com/blog/iguazio-collaborates-with-equinix-to-offer-data-centric-hybrid-cloud-solutions/
115 | https://www.iguazio.com/blog/gigaom-names-iguazio-a-leader-and-outperformer-for-2022/
116 | https://www.iguazio.com/blog/iguazio-nvidia-edge/
117 | https://www.iguazio.com/blog/extending-kubeflow-into-an-end-to-end-ml-solution/
118 | https://www.iguazio.com/blog/iguazio-listed-in-five-2020-gartner-hype-cycle-reports/
119 | https://www.iguazio.com/blog/data-science-trends-2020/
120 | https://www.iguazio.com/blog/operationalizing-data-science/
121 | https://www.iguazio.com/blog/using-snowflake-and-dask-for-large-scale-ml-workloads/
122 | https://www.iguazio.com/blog/best-13-free-financial-datasets-for-machine-learning/
123 | https://www.iguazio.com/blog/introduction-to-tf-serving/
124 | https://www.iguazio.com/blog/hcis-journey-to-mlops-efficiency/
125 | https://www.iguazio.com/blog/streamlined-iot-at-scale-with-iguazio/
126 | https://www.iguazio.com/blog/iguazio-product-update-optimize-your-ml-workload-costs-with-aws-ec2-spot-instances/
127 | https://www.iguazio.com/blog/top-10-odsc-west-sessions-you-must-attend/
128 | https://www.iguazio.com/blog/iguazio-named-a-leader-and-outperformer-in-gigaom-radar-for-mlops-2022/
129 | https://www.iguazio.com/blog/deploying-your-hugging-face-models-to-production-at-scale-with-mlrun/
130 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | pytest~=5.4
2 | black~=24.8
3 | isort~=5.7
4 | flake8~=5.0
5 | 


--------------------------------------------------------------------------------
/images/16-workers-training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/16-workers-training.png


--------------------------------------------------------------------------------
/images/gradio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/gradio.png


--------------------------------------------------------------------------------
/images/hf-ds-mlrun.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/hf-ds-mlrun.png


--------------------------------------------------------------------------------
/images/serving-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/serving-graph.png


--------------------------------------------------------------------------------
/images/training-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/training-pipeline.png


--------------------------------------------------------------------------------
/images/video-thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/video-thumbnail.png


--------------------------------------------------------------------------------
/images/workflow-train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-llm-tuning/2b377d125ce1d2a981d3b1dbfdb9055d69b19714/images/workflow-train.png


--------------------------------------------------------------------------------
/mlrun.env:
--------------------------------------------------------------------------------
 1 | # default env vars, will be loaded once MLRun imports/starts
 2 | # write here remote cluster credentials, addresses, etc.
 3 | # uncomment the relevant lines and set with proper parameters
 4 | 
 5 | # local/remote MLRun service address
 6 | MLRUN_DBPATH=http://localhost:8080
 7 | 
 8 | # if Nuclio not detected simulate it with mock
 9 | MLRUN_MOCK_NUCLIO_DEPLOYMENT=auto
10 | 
11 | # Iguazio cluster and V3IO credentials (for remote cluster)
12 | # V3IO_USERNAME=<user>
13 | # V3IO_ACCESS_KEY=<access-key>
14 | 
15 | # AWS S3/services credentials
16 | # AWS_ACCESS_KEY_ID=<key-id>
17 | # AWS_SECRET_ACCESS_KEY=<access-key>
18 | 
19 | # The Azure connection string which points at a storage account. For example:
20 | #   DefaultEndpointsProtocol=https;AccountName=myAcct;AccountKey=XXXX;EndpointSuffix=core.windows.net
21 | # AZURE_STORAGE_CONNECTION_STRING=<connection-string>
22 | 


--------------------------------------------------------------------------------
/project.yaml:
--------------------------------------------------------------------------------
  1 | kind: project
  2 | metadata:
  3 |   name: mlopspedia-bot-yonis
  4 | spec:
  5 |   params:
  6 |     source: git://github.com/mlrun/demo-llm-tuning.git#main
  7 |     default_image: yonishelach/mlrun-llm
  8 |   functions:
  9 |   - url: src/data_collection.py
 10 |     name: data-collecting
 11 |     kind: job
 12 |     image: mlrun/mlrun
 13 |   - url: src/data_preprocess.py
 14 |     name: data-preparing
 15 |     kind: job
 16 |   - url: src/trainer.py
 17 |     name: training
 18 |     kind: job
 19 |   - name: serving
 20 |     spec:
 21 |       kind: serving
 22 |       metadata:
 23 |         name: serving
 24 |         project: mlopspedia-bot-yonis
 25 |       spec:
 26 |         command: ''
 27 |         args: []
 28 |         image: yonishelach/mlrun-llm
 29 |         build:
 30 |           functionSourceCode: aW1wb3J0IGpzb24KaW1wb3J0IG9zCmltcG9ydCB6aXBmaWxlCmZyb20gdHlwaW5nIGltcG9ydCBBbnksIERpY3QKCmltcG9ydCBldmFsdWF0ZQppbXBvcnQgbWxydW4uYXJ0aWZhY3RzCmltcG9ydCBudW1weSBhcyBucAppbXBvcnQgdG9yY2gKaW1wb3J0IHRyYW5zZm9ybWVycwpmcm9tIG1scnVuLnNlcnZpbmcudjJfc2VydmluZyBpbXBvcnQgVjJNb2RlbFNlcnZlcgpmcm9tIHBlZnQgaW1wb3J0IFBlZnRNb2RlbAoKU1VCSkVDVF9NQVJLID0gIiMjIyBIdW1hbjogIgpDT05URU5UX01BUksgPSAiXG4jIyMgQXNzaXN0YW50OiAiClBST01QVF9GT1JNQVQgPSBTVUJKRUNUX01BUksgKyAie30iICsgQ09OVEVOVF9NQVJLCgoKZGVmIHByZXByb2Nlc3MocmVxdWVzdDogZGljdCkgLT4gZGljdDoKICAgICIiIgogICAgY29udmVydCB0aGUgcmVxdWVzdCB0byB0aGUgcmVxdWlyZWQgc3RydWN0dXJlIGZvciB0aGUgcHJlZGljdCBmdW5jdGlvbgoKICAgIDpwYXJhbSByZXF1ZXN0OiBBIGh0dHAgcmVxdWVzdCB0aGF0IGNvbnRhaW5zIHRoZSBwcm9tcHQKICAgICIiIgogICAgIyBSZWFkIGJ5dGVzOgogICAgaWYgaXNpbnN0YW5jZShyZXF1ZXN0LCBieXRlcyk6CiAgICAgICAgcmVxdWVzdCA9IGpzb24ubG9hZHMocmVxdWVzdCkKCiAgICAjIEdldCB0aGUgcHJvbXB0OgogICAgcHJvbXB0ID0gcmVxdWVzdC5wb3AoInByb21wdCIpCgogICAgIyBGb3JtYXQgdGhlIHByb21wdCBhcyBzdWJqZWN0OgogICAgcHJvbXB0ID0gUFJPTVBUX0ZPUk1BVC5mb3JtYXQoc3RyKHByb21wdCkpCgogICAgIyBVcGRhdGUgdGhlIHJlcXVlc3QgYW5kIHJldHVybjoKICAgIHJlcXVlc3QgPSB7ImlucHV0cyI6IFt7InByb21wdCI6IFtwcm9tcHRdLCAqKnJlcXVlc3R9XX0KICAgIHJldHVybiByZXF1ZXN0CgoKY2xhc3MgTExNTW9kZWxTZXJ2ZXIoVjJNb2RlbFNlcnZlcik6CiAgICAiIiIKICAgIFRoaXMgaXMgdGVtcG9yYXJ5IGFuZCB3aWxsIGJlIGJ1aWx0IGluIG1scnVuIDEuNS4wCiAgICAiIiIKCiAgICBkZWYgX19pbml0X18oCiAgICAgICAgc2VsZiwKICAgICAgICBjb250ZXh0OiBtbHJ1bi5NTENsaWVudEN0eCA9IE5vbmUsCiAgICAgICAgbmFtZTogc3RyID0gTm9uZSwKICAgICAgICBtb2RlbF9jbGFzczogc3RyID0gIkF1dG9Nb2RlbEZvckNhdXNhbExNIiwKICAgICAgICB0b2tlbml6ZXJfY2xhc3M6IHN0ciA9ICJBdXRvVG9rZW5pemVyIiwKICAgICAgICAjIG1vZGVsIGFyZ3M6CiAgICAgICAgbW9kZWxfYXJnczogZGljdCA9IE5vbmUsCiAgICAgICAgIyBMb2FkIGZyb20gTUxSdW4gYXJnczoKICAgICAgICBtb2RlbF9wYXRoOiBzdHIgPSBOb25lLAogICAgICAgICMgTG9hZCBmcm9tIGh1YiBhcmdzOgogICAgICAgIG1vZGVsX25hbWU6IHN0ciA9IE5vbmUsCiAgICAgICAgdG9rZW5pemVyX25hbWU6IHN0ciA9IE5vbmUsCiAgICAgICAgIyBEZWVwc3BlZWQgYXJnczoKICAgICAgICB1c2VfZGVlcHNwZWVkOiBib29sID0gRmFsc2UsCiAgICAgICAgbl9ncHVzOiBpbnQgPSAxLAogICAgICAgIGlzX2ZwMTY6IGJvb2wgPSBUcnVlLAogICAgICAgICMgcGVmdCBtb2RlbDoKICAgICAgICBwZWZ0X21vZGVsOiBzdHIgPSBOb25lLAogICAgICAgICMgSW5mZXJlbmNlIGFyZ3M6CiAgICAgICAgKipjbGFzc19hcmdzLAogICAgKToKICAgICAgICAjIEluaXRpYWxpemUgdGhlIGJhc2Ugc2VydmVyOgogICAgICAgIHN1cGVyKExMTU1vZGVsU2VydmVyLCBzZWxmKS5fX2luaXRfXygKICAgICAgICAgICAgY29udGV4dD1jb250ZXh0LAogICAgICAgICAgICBuYW1lPW5hbWUsCiAgICAgICAgICAgIG1vZGVsX3BhdGg9bW9kZWxfcGF0aCwKICAgICAgICAgICAgKipjbGFzc19hcmdzLAogICAgICAgICkKCiAgICAgICAgIyBTYXZlIGNsYXNzIG5hbWVzOgogICAgICAgIHNlbGYubW9kZWxfY2xhc3MgPSBtb2RlbF9jbGFzcwogICAgICAgIHNlbGYudG9rZW5pemVyX2NsYXNzID0gdG9rZW5pemVyX2NsYXNzCgogICAgICAgICMgU2F2ZSBodWIgbG9hZGluZyBwYXJhbWV0ZXJzOgogICAgICAgIHNlbGYubW9kZWxfbmFtZSA9IG1vZGVsX25hbWUKICAgICAgICBzZWxmLnRva2VuaXplcl9uYW1lID0gdG9rZW5pemVyX25hbWUgb3Igc2VsZi5tb2RlbF9uYW1lCgogICAgICAgICMgU2F2ZSBsb2FkIG1vZGVsIGFyZ3VtZW50czoKICAgICAgICBzZWxmLm1vZGVsX2FyZ3MgPSBtb2RlbF9hcmdzCgogICAgICAgICMgU2F2ZSBkZWVwc3BlZWQgcGFyYW1ldGVyczoKICAgICAgICBzZWxmLnVzZV9kZWVwc3BlZWQgPSB1c2VfZGVlcHNwZWVkCiAgICAgICAgc2VsZi5uX2dwdXMgPSBuX2dwdXMKICAgICAgICBzZWxmLmlzX2ZwMTYgPSBpc19mcDE2CgogICAgICAgICMgUEVGVCBwYXJhbWV0ZXJzOgogICAgICAgIHNlbGYucGVmdF9tb2RlbCA9IHBlZnRfbW9kZWwKCiAgICAgICAgIyBQcmVwYXJlIHZhcmlhYmxlcyBmb3IgZnV0dXJlIHVzZToKICAgICAgICBzZWxmLm1vZGVsID0gTm9uZQogICAgICAgIHNlbGYudG9rZW5pemVyID0gTm9uZQogICAgICAgIHNlbGYuX21vZGVsX2NsYXNzID0gTm9uZQogICAgICAgIHNlbGYuX3Rva2VuaXplcl9jbGFzcyA9IE5vbmUKCiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAjIEdldCBjbGFzc2VzOgogICAgICAgIHNlbGYuX21vZGVsX2NsYXNzID0gZ2V0YXR0cih0cmFuc2Zvcm1lcnMsIHNlbGYubW9kZWxfY2xhc3MpCiAgICAgICAgc2VsZi5fdG9rZW5pemVyX2NsYXNzID0gZ2V0YXR0cih0cmFuc2Zvcm1lcnMsIHNlbGYudG9rZW5pemVyX2NsYXNzKQoKICAgICAgICAjIExvYWQgdGhlIG1vZGVsIGFuZCB0b2tlbml6ZXI6CiAgICAgICAgaWYgc2VsZi5tb2RlbF9wYXRoOgogICAgICAgICAgICBzZWxmLl9sb2FkX2Zyb21fbWxydW4oKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHNlbGYuX2xvYWRfZnJvbV9odWIoKQoKICAgICAgICAjIFVzZSBkZWVwc3BlZWQgaWYgbmVlZGVkOgogICAgICAgIGlmIHNlbGYudXNlX2RlZXBzcGVlZDoKICAgICAgICAgICAgaW1wb3J0IGRlZXBzcGVlZAoKICAgICAgICAgICAgc2VsZi5tb2RlbCA9IGRlZXBzcGVlZC5pbml0X2luZmVyZW5jZSgKICAgICAgICAgICAgICAgIG1vZGVsPXNlbGYubW9kZWwsCiAgICAgICAgICAgICAgICBtcF9zaXplPXNlbGYubl9ncHVzLAogICAgICAgICAgICAgICAgZHR5cGU9dG9yY2guZmxvYXQxNiBpZiBzZWxmLmlzX2ZwMTYgZWxzZSB0b3JjaC5mbG9hdDMyLAogICAgICAgICAgICAgICAgcmVwbGFjZV9tZXRob2Q9ImF1dG8iLAogICAgICAgICAgICAgICAgcmVwbGFjZV93aXRoX2tlcm5lbF9pbmplY3Q9VHJ1ZSwKICAgICAgICAgICAgKQogICAgICAgIGlmIHNlbGYucGVmdF9tb2RlbDoKICAgICAgICAgICAgc2VsZi5fbG9hZF9wZWZ0X21vZGVsKCkKCiAgICBkZWYgX2V4dHJhY3RfbW9kZWwoc2VsZiwgdXJsKToKICAgICAgICAjIEdldCB0aGUgbW9kZWwgYXJ0aWZhY3QgYW5kIGZpbGU6CiAgICAgICAgKAogICAgICAgICAgICBtb2RlbF9maWxlLAogICAgICAgICAgICBtb2RlbF9hcnRpZmFjdCwKICAgICAgICAgICAgZXh0cmFfZGF0YSwKICAgICAgICApID0gbWxydW4uYXJ0aWZhY3RzLmdldF9tb2RlbCh1cmwpCgogICAgICAgICMgUmVhZCB0aGUgbmFtZToKICAgICAgICBtb2RlbF9uYW1lID0gbW9kZWxfYXJ0aWZhY3Quc3BlYy5kYl9rZXkKCiAgICAgICAgIyBFeHRyYWN0IGxvZ2dlZCBtb2RlbCBmaWxlczoKICAgICAgICBtb2RlbF9kaXJlY3RvcnkgPSBvcy5wYXRoLmpvaW4ob3MucGF0aC5kaXJuYW1lKG1vZGVsX2ZpbGUpLCBtb2RlbF9uYW1lKQogICAgICAgIHdpdGggemlwZmlsZS5aaXBGaWxlKG1vZGVsX2ZpbGUsICJyIikgYXMgemlwX2ZpbGU6CiAgICAgICAgICAgIHppcF9maWxlLmV4dHJhY3RhbGwobW9kZWxfZGlyZWN0b3J5KQogICAgICAgIHJldHVybiBtb2RlbF9kaXJlY3RvcnkKCiAgICBkZWYgX2xvYWRfcGVmdF9tb2RlbChzZWxmKToKICAgICAgICBtb2RlbF9kaXJlY3RvcnkgPSBzZWxmLl9leHRyYWN0X21vZGVsKHNlbGYucGVmdF9tb2RlbCkKICAgICAgICBzZWxmLm1vZGVsID0gUGVmdE1vZGVsLmZyb21fcHJldHJhaW5lZChzZWxmLm1vZGVsLCBtb2RlbF9kaXJlY3RvcnkpCiAgICAgICAgc2VsZi5tb2RlbC5ldmFsKCkKCiAgICBkZWYgX2xvYWRfZnJvbV9tbHJ1bihzZWxmKToKICAgICAgICBtb2RlbF9kaXJlY3RvcnkgPSBzZWxmLl9leHRyYWN0X21vZGVsKHNlbGYubW9kZWxfcGF0aCkKCiAgICAgICAgIyBMb2FkaW5nIHRoZSBzYXZlZCBwcmV0cmFpbmVkIHRva2VuaXplciBhbmQgbW9kZWw6CiAgICAgICAgc2VsZi50b2tlbml6ZXIgPSBzZWxmLl90b2tlbml6ZXJfY2xhc3MuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2RpcmVjdG9yeSkKICAgICAgICBzZWxmLm1vZGVsID0gc2VsZi5fbW9kZWxfY2xhc3MuZnJvbV9wcmV0cmFpbmVkKAogICAgICAgICAgICBtb2RlbF9kaXJlY3RvcnksICoqc2VsZi5tb2RlbF9hcmdzCiAgICAgICAgKQoKICAgIGRlZiBfbG9hZF9mcm9tX2h1YihzZWxmKToKICAgICAgICAjIExvYWRpbmcgdGhlIHByZXRyYWluZWQgdG9rZW5pemVyIGFuZCBtb2RlbDoKICAgICAgICBzZWxmLnRva2VuaXplciA9IHNlbGYuX3Rva2VuaXplcl9jbGFzcy5mcm9tX3ByZXRyYWluZWQoCiAgICAgICAgICAgIHNlbGYudG9rZW5pemVyX25hbWUsCiAgICAgICAgICAgIG1vZGVsX21heF9sZW5ndGg9NTEyLAogICAgICAgICkKICAgICAgICBzZWxmLm1vZGVsID0gc2VsZi5fbW9kZWxfY2xhc3MuZnJvbV9wcmV0cmFpbmVkKAogICAgICAgICAgICBzZWxmLm1vZGVsX25hbWUsICoqc2VsZi5tb2RlbF9hcmdzCiAgICAgICAgKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIHJlcXVlc3Q6IERpY3Rbc3RyLCBBbnldKSAtPiBkaWN0OgogICAgICAgICMgR2V0IHRoZSBpbnB1dHM6CiAgICAgICAga3dhcmdzID0gcmVxdWVzdFsiaW5wdXRzIl1bMF0KICAgICAgICBwcm9tcHQgPSBrd2FyZ3MucG9wKCJwcm9tcHQiKVswXQoKICAgICAgICAjIFRva2VuaXplOgogICAgICAgIGlucHV0cyA9IHNlbGYudG9rZW5pemVyKHByb21wdCwgcmV0dXJuX3RlbnNvcnM9InB0IilbImlucHV0X2lkcyJdCiAgICAgICAgaWYgc2VsZi5tb2RlbC5kZXZpY2UudHlwZSA9PSAiY3VkYSI6CiAgICAgICAgICAgIGlucHV0cyA9IGlucHV0cy5jdWRhKCkKCiAgICAgICAgIyBHZXQgdGhlIHBhZCB0b2tlbiBpZDoKICAgICAgICBwYWRfdG9rZW5faWQgPSBzZWxmLnRva2VuaXplci5lb3NfdG9rZW5faWQKCiAgICAgICAgIyBJbmZlciB0aHJvdWdoIHRoZSBtb2RlbDoKICAgICAgICBvdXRwdXQgPSBzZWxmLm1vZGVsLmdlbmVyYXRlKAogICAgICAgICAgICBpbnB1dF9pZHM9aW5wdXRzLAogICAgICAgICAgICBkb19zYW1wbGU9VHJ1ZSwKICAgICAgICAgICAgbnVtX3JldHVybl9zZXF1ZW5jZXM9MSwKICAgICAgICAgICAgcGFkX3Rva2VuX2lkPXBhZF90b2tlbl9pZCwKICAgICAgICAgICAgKiprd2FyZ3MsCiAgICAgICAgKQoKICAgICAgICAjIERldG9rZW5pemU6CiAgICAgICAgcHJlZGljdGlvbiA9IHNlbGYudG9rZW5pemVyLmRlY29kZShvdXRwdXRbMF0sIHNraXBfc3BlY2lhbF90b2tlbnM9VHJ1ZSkKCiAgICAgICAgcmV0dXJuIHsicHJlZGljdGlvbiI6IHByZWRpY3Rpb24sICJwcm9tcHQiOiBwcm9tcHR9CgogICAgZGVmIGV4cGxhaW4oc2VsZiwgcmVxdWVzdDogRGljdCkgLT4gc3RyOgogICAgICAgIHJldHVybiBmIkxMTSBtb2RlbCBzZXJ2ZXIgbmFtZWQge3NlbGYubmFtZX0iCgoKZGVmIHBvc3Rwcm9jZXNzKGlucHV0czogZGljdCkgLT4gZGljdDoKICAgICIiIgogICAgUG9zdHByb2Nlc3NpbmcgdGhlIGdlbmVyYXRlZCBvdXRwdXQgb2YgdGhlIG1vZGVsCiAgICAiIiIKICAgICMgUmVhZCB0aGUgcHJlZGljdGlvbjoKICAgIHByZWRpY3Rpb24gPSBpbnB1dHNbIm91dHB1dHMiXVsicHJlZGljdGlvbiJdCgogICAgIyBMb29rIGZvciBhICdDb250ZW50OiAnIG1hcmsgdG8ga25vdyB0aGUgbW9kZWwgZm91bmQgdGhlIHN1YmplY3QsIG90aGVyd2lzZSwgaXQgaXMgcHJvYmFibHkgZ2FyYmFnZToKICAgIGNvbnRlbnRfaW5kZXggPSBwcmVkaWN0aW9uLmZpbmQoQ09OVEVOVF9NQVJLKQogICAgaWYgY29udGVudF9pbmRleCA9PSAtMToKICAgICAgICBvdXRwdXQgPSBmIkknbSBub3Qgc3VyZSBhYm91dCBpdCBidXQgSSdsbCBkbyBteSBiZXN0OiB7cHJlZGljdGlvbn0iCiAgICBlbHNlOgogICAgICAgIG91dHB1dCA9IHByZWRpY3Rpb25bY29udGVudF9pbmRleCArIGxlbihDT05URU5UX01BUkspIDpdCgogICAgcmV0dXJuIHsKICAgICAgICAiaW5wdXRzIjogWwogICAgICAgICAgICB7InByZWRpY3Rpb24iOiBvdXRwdXQuc3RyaXAoKSwgInByb21wdCI6IGlucHV0c1sib3V0cHV0cyJdWyJwcm9tcHQiXX0KICAgICAgICBdCiAgICB9CgoKY2xhc3MgVG94aWNpdHlDbGFzc2lmaWVyTW9kZWxTZXJ2ZXIoVjJNb2RlbFNlcnZlcik6CiAgICAiIiIKICAgIG1vZGVsIHRoYXQgY2hlY2tzIGlmIHRoZSB0ZXh0IGNvbnRhaW4gdG94aWNpdHkgbGFuZ3VhZ2UuCiAgICAiIiIKCiAgICBkZWYgX19pbml0X18oc2VsZiwgY29udGV4dCwgbmFtZTogc3RyLCB0aHJlc2hvbGQ6IGZsb2F0ID0gMC43LCAqKmNsYXNzX2FyZ3MpOgogICAgICAgICMgSW5pdGlhbGl6ZSB0aGUgYmFzZSBzZXJ2ZXI6CiAgICAgICAgc3VwZXIoVG94aWNpdHlDbGFzc2lmaWVyTW9kZWxTZXJ2ZXIsIHNlbGYpLl9faW5pdF9fKAogICAgICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgICAgIG5hbWU9bmFtZSwKICAgICAgICAgICAgbW9kZWxfcGF0aD1Ob25lLAogICAgICAgICAgICAqKmNsYXNzX2FyZ3MsCiAgICAgICAgKQoKICAgICAgICAjIFN0b3JlIHRoZSB0aHJlc2hvbGQgb2YgdG94aWNpdHk6CiAgICAgICAgc2VsZi50aHJlc2hvbGQgPSB0aHJlc2hvbGQKCiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICBzZWxmLm1vZGVsID0gZXZhbHVhdGUubG9hZCgidG94aWNpdHkiLCBtb2R1bGVfdHlwZT0ibWVhc3VyZW1lbnQiKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIGlucHV0czogRGljdCkgLT4gc3RyOgogICAgICAgICMgUmVhZCB0aGUgdXNlcidzIGlucHV0IGFuZCBtb2RlbCBvdXRwdXQ6CiAgICAgICAgcHJlZGljdGlvbiA9IGlucHV0c1siaW5wdXRzIl1bMF1bInByZWRpY3Rpb24iXQogICAgICAgIHByb21wdCA9IGlucHV0c1siaW5wdXRzIl1bMF1bInByb21wdCJdCgogICAgICAgICMgSW5mZXIgdGhyb3VnaCB0aGUgZXZhbHVhdG9yIG1vZGVsOgogICAgICAgIHJlc3VsdCA9IHNlbGYubW9kZWwuY29tcHV0ZShwcmVkaWN0aW9ucz1bcHJlZGljdGlvbiwgcHJvbXB0XSlbInRveGljaXR5Il0KICAgICAgICBpZiBhbnkobnAuYXJyYXkocmVzdWx0KSA+IHNlbGYudGhyZXNob2xkKToKICAgICAgICAgICAgcmV0dXJuICJUaGlzIGJvdCBkbyBub3QgcmVzcG9uZCB0byB0b3hpY2l0eS4iCgogICAgICAgIHJldHVybiBwcmVkaWN0aW9uCgogICAgZGVmIGV4cGxhaW4oc2VsZiwgcmVxdWVzdDogRGljdCkgLT4gc3RyOgogICAgICAgIHJldHVybiBmIlRleHQgdG94aWNpdHkgY2xhc3NpZmllciBzZXJ2ZXIgbmFtZWQge3NlbGYubmFtZX0iCgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK
 31 |           source: ./
 32 |           commands: []
 33 |           code_origin: http://github.com/mlrun/demo-llm-tuning#refs/heads/main#91145f96f3cd627431de34d0bae3547efbdd7097
 34 |           origin_filename: src/serving.py
 35 |           requirements: []
 36 |         description: ''
 37 |         default_handler: ''
 38 |         disable_auto_mount: false
 39 |         clone_target_dir: ''
 40 |         env:
 41 |         - name: V3IO_API
 42 |           value: ''
 43 |         - name: V3IO_USERNAME
 44 |           value: ''
 45 |         - name: V3IO_ACCESS_KEY
 46 |           value: ''
 47 |         - name: V3IO_FRAMESD
 48 |           value: ''
 49 |         resources:
 50 |           requests:
 51 |             memory: 1Mi
 52 |             cpu: 25m
 53 |           limits:
 54 |             nvidia.com/gpu: 1
 55 |         priority_class_name: igz-workload-medium
 56 |         preemption_mode: prevent
 57 |         min_replicas: 1
 58 |         max_replicas: 4
 59 |         source: ''
 60 |         function_kind: serving_v2
 61 |         readiness_timeout: 3000
 62 |         function_handler: serving:handler
 63 |         base_image_pull: false
 64 |         graph:
 65 |           steps:
 66 |             preprocess:
 67 |               kind: task
 68 |               handler: preprocess
 69 |               after: []
 70 |             mlopspedia:
 71 |               kind: task
 72 |               class_name: LLMModelServer
 73 |               class_args:
 74 |                 model_args:
 75 |                   load_in_8bit: true
 76 |                   device_map: cuda:0
 77 |                   trust_remote_code: true
 78 |                 tokenizer_name: tiiuae/falcon-7b
 79 |                 model_name: tiiuae/falcon-7b
 80 |                 peft_model: store://artifacts/mlopspedia-bot-yonis/falcon-7b-mlrun
 81 |               after:
 82 |               - preprocess
 83 |             postprocess:
 84 |               kind: task
 85 |               handler: postprocess
 86 |               after:
 87 |               - mlopspedia
 88 |             toxicity-classifier:
 89 |               kind: task
 90 |               class_name: ToxicityClassifierModelServer
 91 |               class_args:
 92 |                 threshold: 0.7
 93 |               after:
 94 |               - postprocess
 95 |               responder: true
 96 |           engine: async
 97 |         secret_sources: []
 98 |         affinity:
 99 |           nodeAffinity:
100 |             requiredDuringSchedulingIgnoredDuringExecution:
101 |               nodeSelectorTerms:
102 |               - matchExpressions:
103 |                 - key: app.iguazio.com/lifecycle
104 |                   operator: NotIn
105 |                   values:
106 |                   - preemptible
107 |                 - key: eks.amazonaws.com/capacityType
108 |                   operator: NotIn
109 |                   values:
110 |                   - SPOT
111 |                 - key: node-lifecycle
112 |                   operator: NotIn
113 |                   values:
114 |                   - spot
115 |         tolerations: null
116 |         security_context: {}
117 |       verbose: false
118 |   workflows:
119 |   - path: src/training_workflow.py
120 |     name: training_workflow
121 |   artifacts: []
122 |   conda: ''
123 |   source: git://github.com/mlrun/demo-llm-tuning.git#main
124 |   origin_url: http://github.com/mlrun/demo-llm-tuning#refs/heads/main
125 |   load_source_on_run: true
126 |   desired_state: online
127 |   default_image: yonishelach/mlrun-llm
128 |   build:
129 |     commands: []
130 |     requirements: []
131 |   custom_packagers: []
132 | 


--------------------------------------------------------------------------------
/project_setup.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | import mlrun
 4 | 
 5 | 
 6 | def assert_build():
 7 |     for module_name in [
 8 |         "torch",
 9 |         "transformers",
10 |         "datasets",
11 |         "accelerate",
12 |         "evaluate",
13 |         "deepspeed",
14 |         "mpi4py",
15 |     ]:
16 |         module = importlib.import_module(module_name)
17 |         print(module.__version__)
18 | 
19 | 
20 | def setup(
21 |         project: mlrun.projects.MlrunProject
22 | ):
23 |     """
24 |     Creating the project for this demo.
25 |     :returns: a fully prepared project for this demo.
26 |     """
27 |     print(project.get_param("source"))
28 |     # Set or build the default image:
29 |     if project.get_param("default_image") is None:
30 |         print("Building image for the demo:")
31 |         image_builder = project.set_function(
32 |             "project_setup.py",
33 |             name="image-builder",
34 |             handler="assert_build",
35 |             kind="job",
36 |             image="mlrun/ml-models-gpu",
37 |             requirements=[
38 |                 "torch",
39 |                 "transformers[deepspeed]",
40 |                 "datasets",
41 |                 "accelerate",
42 |                 "evaluate",
43 |                 "mpi4py",
44 |             ],
45 |         )
46 |         assert image_builder.deploy()
47 |         default_image = image_builder.spec.image
48 |     project.set_default_image(project.get_param("default_image"))
49 | 
50 |     # Set the project git source:
51 | 
52 |     project.set_source(project.get_param("source"), pull_at_runtime=True)
53 | 
54 |     # Set the data collection function:
55 |     data_collection_function = project.set_function(
56 |         "src/data_collection.py",
57 |         name="data-collecting",
58 |         image="mlrun/mlrun",
59 |         kind="job",
60 | 
61 |     )
62 |     data_collection_function.apply(mlrun.auto_mount())
63 |     data_collection_function.save()
64 | 
65 |     # Set the data preprocessing function:
66 |     project.set_function(
67 |         "src/data_preprocess.py",
68 |         name="data-preparing",
69 |         kind="job",
70 |     )
71 | 
72 |     # Set the training function:
73 |     train_function = project.set_function(
74 |         "src/trainer.py",
75 |         name="training",
76 |         kind="job",
77 |     )
78 |     train_function.with_limits(
79 |         gpus=project.get_param("num_gpus_per_replica") or 4,
80 |         cpu=project.get_param("num_cpus_per_replica") or 48,
81 |         mem=project.get_param("memory_per_replica") or "192Gi",
82 |     )
83 |     train_function.save()
84 | 
85 |     project.set_function(
86 |         "src/serving.py",
87 |         name="serving",
88 |         kind="serving",
89 |     )
90 | 
91 |     # Set the training workflow:
92 |     project.set_workflow("training_workflow", "src/training_workflow.py")
93 | 
94 |     # Save and return the project:
95 |     project.save()
96 |     return project


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.isort]
2 | profile = "black"
3 | multi_line_output = 3
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | mlrun
 2 | torch
 3 | plotly
 4 | gradio
 5 | transformers
 6 | datasets
 7 | accelerate
 8 | evaluate
 9 | bs4
10 | einops
11 | xformers


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | project_name = "myproj"
 4 | with open("README.md", "r", encoding="utf-8") as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | setup(
 8 |     name=project_name,
 9 |     packages=[project_name],
10 |     package_dir={project_name: "src"},
11 |     version="0.1.0",
12 |     description="my desc",
13 |     author="Yaron",
14 |     author_email="author@example.com",
15 |     license="MIT",
16 |     long_description=long_description,
17 |     long_description_content_type="text/markdown",
18 |     python_requires=">=3.7",
19 | )
20 | 


--------------------------------------------------------------------------------
/src/data_collection.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from pathlib import Path
 4 | from urllib.request import Request, urlopen
 5 | 
 6 | from bs4 import BeautifulSoup, Tag
 7 | 
 8 | ARTICLE_TOKEN = "Article: "
 9 | HEADER_TOKEN = "### Human: "
10 | 
11 | 
12 | def normalize(s: str) -> str:
13 |     """
14 |     Remove newline and tab characters from string
15 |     """
16 |     return s.replace("\n", "").replace("\t", "")
17 | 
18 | 
19 | def mark_header_tags(soup: BeautifulSoup):
20 |     """
21 |     Adding header token and article token prefixes to all headers in html, in order to parse the text later easily.
22 | 
23 |     :param soup: BeautifulSoup object of the html file
24 |     """
25 |     nodes = soup.find_all(re.compile("^h[1-6]$"))
26 |     # Tagging headers in html to identify in text files:
27 |     if nodes:
28 |         content_type = type(nodes[0].contents[0])
29 |         nodes[0].string = content_type(
30 |             ARTICLE_TOKEN + normalize(str(nodes[0].contents[0]))
31 |         )
32 |         for node in nodes[1:]:
33 |             if node.string:
34 |                 content_type = type(node.contents[0])
35 |                 if content_type == Tag:
36 |                     node.string = HEADER_TOKEN + normalize(node.string)
37 |                 else:
38 |                     node.string = content_type(HEADER_TOKEN + str(node.contents[0]))
39 | 
40 | 
41 | def get_html_as_string(url: str, mark_headers: bool) -> str:
42 |     """
43 |     Retrieve text from html URL.
44 | 
45 |     :param url:             html URL
46 |     :param mark_headers:    Whether to add article and header prefixes to headers to text
47 | 
48 |     :returns:                html text content
49 |     """
50 |     # read html source:
51 |     req = Request(url=url, headers={"User-Agent": "Mozilla/5.0"})
52 |     web_html_content = urlopen(req).read().decode("utf-8")
53 |     soup = BeautifulSoup(web_html_content, features="html.parser")
54 |     if mark_headers:
55 |         mark_header_tags(soup)
56 |     return soup.get_text()
57 | 
58 | 
59 | def collect_html_to_text_files(urls_file: str, mark_headers=True) -> str:
60 |     """
61 |     Retrieve all html text content from URLs as text files.
62 | 
63 |     :param urls_file:       html URLs file
64 |     :param mark_headers:    Whether to add article and header prefixes to headers to text
65 | 
66 |     :returns:  the directory name that contains all the content text files.
67 |     """
68 |     directory = "html_as_text_files"
69 |     os.makedirs(directory, exist_ok=True)
70 |     # Writing html files as text files:
71 |     with open(urls_file, "r") as f:
72 |         urls = f.readlines()
73 |     for url in urls:
74 |         url = url.replace("\n", "")
75 |         page_name = Path(url).name
76 |         with open(f"{directory}/{page_name}.txt", "w") as f:
77 |             f.write(get_html_as_string(url, mark_headers))
78 |     return directory
79 | 


--------------------------------------------------------------------------------
/src/data_preprocess.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import tempfile
 4 | from pathlib import Path
 5 | 
 6 | from datasets import load_dataset
 7 | 
 8 | ARTICLE_TOKEN = "Article: "
 9 | HEADER_TOKEN = "### Human: "
10 | CONTENT_TOKEN = "### Assistant: "
11 | 
12 | DATA_FORMAT = """### Human: {} {}
13 | ### Assistant: {}"""
14 | END_OF_ARTICLE = "Latest Posts"
15 | 
16 | 
17 | def convert_textfile_to_data_with_prompts(txt_file: Path):
18 |     """
19 |     Formatting the html text content into prompt form.
20 |     Each header-content in the article is an element in the generated list of prompts
21 | 
22 |     :param txt_file: text content as a string with tokens of headers.
23 |     :returns: list of prompts
24 |     """
25 |     # Read file:
26 |     with open(txt_file, "r") as f:
27 |         lines = f.readlines()
28 | 
29 |     start = 0
30 |     end = 0
31 |     subject_idx = []
32 |     data = []
33 |     # Dividing text into header - paragraph prompts:
34 |     for i, line in enumerate(lines):
35 |         if not start and line.startswith(ARTICLE_TOKEN):
36 |             start = i
37 |         elif HEADER_TOKEN + END_OF_ARTICLE in line:
38 |             end = i
39 |             break
40 |         if line.startswith(HEADER_TOKEN):
41 |             subject_idx.append(i)
42 |     article_content = lines[start:end]
43 |     subject_idx = [subject_i - start for subject_i in subject_idx]
44 |     article_name = article_content[0].replace(ARTICLE_TOKEN, "")
45 |     for i, subject in enumerate(subject_idx):
46 |         if subject + 1 in subject_idx:
47 |             continue
48 |         subject_data = article_content[subject].replace(HEADER_TOKEN, "")
49 |         if i + 1 == len(subject_idx):
50 |             content_end = len(article_content)
51 |         else:
52 |             content_end = subject_idx[i + 1]
53 |         content_limits = subject + 1, content_end
54 |         data.append(
55 |             DATA_FORMAT.format(
56 |                 article_name,
57 |                 subject_data,
58 |                 "".join(article_content[content_limits[0] : content_limits[1]]),
59 |             )
60 |         )
61 |     return data
62 | 
63 | 
64 | def prepare_dataset(source_dir: str):
65 |     """
66 |     Build the dataset from text files as a 'text: prompt' structure.
67 | 
68 |     :param source_dir: the directory that contains all the text files.
69 | 
70 |     :returns: A dataset with all the prompts inside
71 |     """
72 |     path_list = Path(source_dir).glob("./*.txt")
73 |     data = []
74 |     # Converting text files into data in our prompt format:
75 |     for path in path_list:
76 |         data.extend(convert_textfile_to_data_with_prompts(path))
77 |     data_dir = tempfile.mkdtemp()
78 |     os.makedirs(data_dir, exist_ok=True)
79 |     with open(data_dir + "/html_data.jsonl", "w", encoding="utf8") as f:
80 |         for item in data:
81 |             f.write(
82 |                 json.dumps({"text": item.replace(" ", "")}, ensure_ascii=False) + "\n"
83 |             )
84 |     return load_dataset(data_dir)["train"].to_pandas()
85 | 


--------------------------------------------------------------------------------
/src/serving.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import zipfile
  4 | from typing import Any, Dict
  5 | 
  6 | import evaluate
  7 | import mlrun.artifacts
  8 | import numpy as np
  9 | import torch
 10 | import transformers
 11 | from mlrun.serving.v2_serving import V2ModelServer
 12 | from peft import PeftModel
 13 | 
 14 | SUBJECT_MARK = "### Human: "
 15 | CONTENT_MARK = "\n### Assistant: "
 16 | PROMPT_FORMAT = SUBJECT_MARK + "{}" + CONTENT_MARK
 17 | 
 18 | 
 19 | def preprocess(request: dict) -> dict:
 20 |     """
 21 |     convert the request to the required structure for the predict function
 22 | 
 23 |     :param request: A http request that contains the prompt
 24 |     """
 25 |     # Read bytes:
 26 |     if isinstance(request, bytes):
 27 |         request = json.loads(request)
 28 | 
 29 |     # Get the prompt:
 30 |     prompt = request.pop("prompt")
 31 | 
 32 |     # Format the prompt as subject:
 33 |     prompt = PROMPT_FORMAT.format(str(prompt))
 34 | 
 35 |     # Update the request and return:
 36 |     request = {"inputs": [{"prompt": [prompt], **request}]}
 37 |     return request
 38 | 
 39 | 
 40 | class LLMModelServer(V2ModelServer):
 41 |     """
 42 |     This is temporary and will be built in mlrun 1.5.0
 43 |     """
 44 | 
 45 |     def __init__(
 46 |         self,
 47 |         context: mlrun.MLClientCtx = None,
 48 |         name: str = None,
 49 |         model_class: str = "AutoModelForCausalLM",
 50 |         tokenizer_class: str = "AutoTokenizer",
 51 |         # model args:
 52 |         model_args: dict = None,
 53 |         # Load from MLRun args:
 54 |         model_path: str = None,
 55 |         # Load from hub args:
 56 |         model_name: str = None,
 57 |         tokenizer_name: str = None,
 58 |         # Deepspeed args:
 59 |         use_deepspeed: bool = False,
 60 |         n_gpus: int = 1,
 61 |         is_fp16: bool = True,
 62 |         # peft model:
 63 |         peft_model: str = None,
 64 |         # Inference args:
 65 |         **class_args,
 66 |     ):
 67 |         # Initialize the base server:
 68 |         super(LLMModelServer, self).__init__(
 69 |             context=context,
 70 |             name=name,
 71 |             model_path=model_path,
 72 |             **class_args,
 73 |         )
 74 | 
 75 |         # Save class names:
 76 |         self.model_class = model_class
 77 |         self.tokenizer_class = tokenizer_class
 78 | 
 79 |         # Save hub loading parameters:
 80 |         self.model_name = model_name
 81 |         self.tokenizer_name = tokenizer_name or self.model_name
 82 | 
 83 |         # Save load model arguments:
 84 |         self.model_args = model_args
 85 | 
 86 |         # Save deepspeed parameters:
 87 |         self.use_deepspeed = use_deepspeed
 88 |         self.n_gpus = n_gpus
 89 |         self.is_fp16 = is_fp16
 90 | 
 91 |         # PEFT parameters:
 92 |         self.peft_model = peft_model
 93 | 
 94 |         # Prepare variables for future use:
 95 |         self.model = None
 96 |         self.tokenizer = None
 97 |         self._model_class = None
 98 |         self._tokenizer_class = None
 99 | 
100 |     def load(self):
101 |         # Get classes:
102 |         self._model_class = getattr(transformers, self.model_class)
103 |         self._tokenizer_class = getattr(transformers, self.tokenizer_class)
104 | 
105 |         # Load the model and tokenizer:
106 |         if self.model_path:
107 |             self._load_from_mlrun()
108 |         else:
109 |             self._load_from_hub()
110 | 
111 |         # Use deepspeed if needed:
112 |         if self.use_deepspeed:
113 |             import deepspeed
114 | 
115 |             self.model = deepspeed.init_inference(
116 |                 model=self.model,
117 |                 mp_size=self.n_gpus,
118 |                 dtype=torch.float16 if self.is_fp16 else torch.float32,
119 |                 replace_method="auto",
120 |                 replace_with_kernel_inject=True,
121 |             )
122 |         if self.peft_model:
123 |             self._load_peft_model()
124 | 
125 |     def _extract_model(self, url):
126 |         # Get the model artifact and file:
127 |         (
128 |             model_file,
129 |             model_artifact,
130 |             extra_data,
131 |         ) = mlrun.artifacts.get_model(url)
132 | 
133 |         # Read the name:
134 |         model_name = model_artifact.spec.db_key
135 | 
136 |         # Extract logged model files:
137 |         model_directory = os.path.join(os.path.dirname(model_file), model_name)
138 |         with zipfile.ZipFile(model_file, "r") as zip_file:
139 |             zip_file.extractall(model_directory)
140 |         return model_directory
141 | 
142 |     def _load_peft_model(self):
143 |         model_directory = self._extract_model(self.peft_model)
144 |         self.model = PeftModel.from_pretrained(self.model, model_directory)
145 |         self.model.eval()
146 | 
147 |     def _load_from_mlrun(self):
148 |         model_directory = self._extract_model(self.model_path)
149 | 
150 |         # Loading the saved pretrained tokenizer and model:
151 |         self.tokenizer = self._tokenizer_class.from_pretrained(model_directory)
152 |         self.model = self._model_class.from_pretrained(
153 |             model_directory, **self.model_args
154 |         )
155 | 
156 |     def _load_from_hub(self):
157 |         # Loading the pretrained tokenizer and model:
158 |         self.tokenizer = self._tokenizer_class.from_pretrained(
159 |             self.tokenizer_name,
160 |             model_max_length=512,
161 |         )
162 |         self.model = self._model_class.from_pretrained(
163 |             self.model_name, **self.model_args
164 |         )
165 | 
166 |     def predict(self, request: Dict[str, Any]) -> dict:
167 |         # Get the inputs:
168 |         kwargs = request["inputs"][0]
169 |         prompt = kwargs.pop("prompt")[0]
170 | 
171 |         # Tokenize:
172 |         inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
173 |         if self.model.device.type == "cuda":
174 |             inputs = inputs.cuda()
175 | 
176 |         # Get the pad token id:
177 |         pad_token_id = self.tokenizer.eos_token_id
178 | 
179 |         # Infer through the model:
180 |         output = self.model.generate(
181 |             input_ids=inputs,
182 |             do_sample=True,
183 |             num_return_sequences=1,
184 |             pad_token_id=pad_token_id,
185 |             **kwargs,
186 |         )
187 | 
188 |         # Detokenize:
189 |         prediction = self.tokenizer.decode(output[0], skip_special_tokens=True)
190 | 
191 |         return {"prediction": prediction, "prompt": prompt}
192 | 
193 |     def explain(self, request: Dict) -> str:
194 |         return f"LLM model server named {self.name}"
195 | 
196 | 
197 | def postprocess(inputs: dict) -> dict:
198 |     """
199 |     Postprocessing the generated output of the model
200 |     """
201 |     # Read the prediction:
202 |     prediction = inputs["outputs"]["prediction"]
203 | 
204 |     # Look for a 'Content: ' mark to know the model found the subject, otherwise, it is probably garbage:
205 |     content_index = prediction.find(CONTENT_MARK)
206 |     if content_index == -1:
207 |         output = f"I'm not sure about it but I'll do my best: {prediction}"
208 |     else:
209 |         output = prediction[content_index + len(CONTENT_MARK) :]
210 | 
211 |     return {
212 |         "inputs": [
213 |             {"prediction": output.strip(), "prompt": inputs["outputs"]["prompt"]}
214 |         ]
215 |     }
216 | 
217 | 
218 | class ToxicityClassifierModelServer(V2ModelServer):
219 |     """
220 |     model that checks if the text contain toxicity language.
221 |     """
222 | 
223 |     def __init__(self, context, name: str, threshold: float = 0.7, **class_args):
224 |         # Initialize the base server:
225 |         super(ToxicityClassifierModelServer, self).__init__(
226 |             context=context,
227 |             name=name,
228 |             model_path=None,
229 |             **class_args,
230 |         )
231 | 
232 |         # Store the threshold of toxicity:
233 |         self.threshold = threshold
234 | 
235 |     def load(self):
236 |         self.model = evaluate.load("toxicity", module_type="measurement")
237 | 
238 |     def predict(self, inputs: Dict) -> str:
239 |         # Read the user's input and model output:
240 |         prediction = inputs["inputs"][0]["prediction"]
241 |         prompt = inputs["inputs"][0]["prompt"]
242 | 
243 |         # Infer through the evaluator model:
244 |         result = self.model.compute(predictions=[prediction, prompt])["toxicity"]
245 |         if any(np.array(result) > self.threshold):
246 |             return "This bot do not respond to toxicity."
247 | 
248 |         return prediction
249 | 
250 |     def explain(self, request: Dict) -> str:
251 |         return f"Text toxicity classifier server named {self.name}"
252 | 


--------------------------------------------------------------------------------
/src/trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import zipfile
  5 | from abc import ABC
  6 | from typing import Any, Dict, List
  7 | 
  8 | import mlrun
  9 | import numpy as np
 10 | import pandas as pd
 11 | import torch
 12 | import transformers
 13 | from datasets import Dataset
 14 | from mlrun.artifacts.manager import Artifact, PlotlyArtifact
 15 | from mlrun.datastore import DataItem
 16 | from mlrun.execution import MLClientCtx
 17 | from mlrun.frameworks._common import CommonTypes, MLRunInterface
 18 | from mlrun.utils import create_class
 19 | from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
 20 | from plotly import graph_objects as go
 21 | from transformers import (
 22 |     AutoModelForCausalLM,
 23 |     AutoTokenizer,
 24 |     BitsAndBytesConfig,
 25 |     DataCollatorForLanguageModeling,
 26 |     PreTrainedModel,
 27 |     PreTrainedTokenizer,
 28 |     Trainer,
 29 |     TrainerCallback,
 30 |     TrainerControl,
 31 |     TrainerState,
 32 |     TrainingArguments,
 33 | )
 34 | 
 35 | DEEPSPEED_CONFIG = {
 36 |     "fp16": {
 37 |         "enabled": "auto",
 38 |         "loss_scale": 0,
 39 |         "loss_scale_window": 1000,
 40 |         "initial_scale_power": 16,
 41 |         "hysteresis": 2,
 42 |         "min_loss_scale": 1,
 43 |     },
 44 |     "optimizer": {
 45 |         "type": "AdamW",
 46 |         "params": {
 47 |             "lr": "auto",
 48 |             "betas": "auto",
 49 |             "eps": "auto",
 50 |             "weight_decay": "auto",
 51 |         },
 52 |     },
 53 |     "scheduler": {
 54 |         "type": "WarmupLR",
 55 |         "params": {
 56 |             "warmup_min_lr": "auto",
 57 |             "warmup_max_lr": "auto",
 58 |             "warmup_num_steps": "auto",
 59 |         },
 60 |     },
 61 |     "zero_optimization": {
 62 |         "stage": 3,
 63 |         "offload_optimizer": {"device": "cpu", "pin_memory": True},
 64 |         "offload_param": {"device": "cpu", "pin_memory": True},
 65 |         "overlap_comm": True,
 66 |         "contiguous_gradients": True,
 67 |         "sub_group_size": 1e9,
 68 |         "reduce_bucket_size": "auto",
 69 |         "stage3_prefetch_bucket_size": "auto",
 70 |         "stage3_param_persistence_threshold": "auto",
 71 |         "stage3_max_live_parameters": 1e9,
 72 |         "stage3_max_reuse_distance": 1e9,
 73 |         "stage3_gather_16bit_weights_on_model_save": True,
 74 |     },
 75 |     "gradient_accumulation_steps": "auto",
 76 |     "gradient_clipping": "auto",
 77 |     "steps_per_print": 2000,
 78 |     "train_batch_size": "auto",
 79 |     "train_micro_batch_size_per_gpu": "auto",
 80 |     "wall_clock_breakdown": False,
 81 |     "comms_logger": {
 82 |         "enabled": True,
 83 |         "verbose": False,
 84 |         "prof_all": True,
 85 |         "debug": False,
 86 |     },
 87 | }
 88 | 
 89 | 
 90 | # ----------------------from MLRUN--------------------------------
 91 | class HFTrainerMLRunInterface(MLRunInterface, ABC):
 92 |     """
 93 |     This is temporary and will be built in mlrun 1.5.0
 94 |     Interface for adding MLRun features for tensorflow keras API.
 95 |     """
 96 | 
 97 |     # MLRuns context default name:
 98 |     DEFAULT_CONTEXT_NAME = "mlrun-huggingface"
 99 | 
100 |     # Attributes to replace so the MLRun interface will be fully enabled.
101 |     _REPLACED_METHODS = [
102 |         "train",
103 |         # "evaluate"
104 |     ]
105 | 
106 |     @classmethod
107 |     def add_interface(
108 |         cls,
109 |         obj: Trainer,
110 |         restoration: CommonTypes.MLRunInterfaceRestorationType = None,
111 |     ):
112 |         super(HFTrainerMLRunInterface, cls).add_interface(
113 |             obj=obj, restoration=restoration
114 |         )
115 | 
116 |     @classmethod
117 |     def mlrun_train(cls):
118 |         def wrapper(self: Trainer, *args, **kwargs):
119 |             # Restore the evaluation method as `train` will use it:
120 |             # cls._restore_attribute(obj=self, attribute_name="evaluate")
121 | 
122 |             # Call the original fit method:
123 |             result = self.original_train(*args, **kwargs)
124 | 
125 |             # Replace the evaluation method again:
126 |             # cls._replace_function(obj=self, function_name="evaluate")
127 | 
128 |             return result
129 | 
130 |         return wrapper
131 | 
132 | 
133 | class MLRunCallback(TrainerCallback):
134 |     """
135 |     This is temporary and will be built in mlrun 1.5.0
136 |     Callback for collecting logs during training / evaluation of the `Trainer` API.
137 |     """
138 | 
139 |     def __init__(
140 |         self,
141 |         context: mlrun.MLClientCtx = None,
142 |         model_name: str = "model",
143 |         tag: str = "",
144 |         labels: Dict[str, str] = None,
145 |         extra_data: dict = None,
146 |     ):
147 |         super().__init__()
148 | 
149 |         # Store the configurations:
150 |         self._context = (
151 |             context
152 |             if context is not None
153 |             else mlrun.get_or_create_ctx("./mlrun-huggingface")
154 |         )
155 |         self._model_name = model_name
156 |         self._tag = tag
157 |         self._labels = labels
158 |         self._extra_data = extra_data if extra_data is not None else {}
159 | 
160 |         # Set up the logging mode:
161 |         self._is_training = False
162 |         self._steps: List[List[int]] = []
163 |         self._metric_scores: Dict[str, List[float]] = {}
164 |         self._artifacts: Dict[str, Artifact] = {}
165 | 
166 |     def on_epoch_begin(
167 |         self,
168 |         args: TrainingArguments,
169 |         state: TrainerState,
170 |         control: TrainerControl,
171 |         **kwargs,
172 |     ):
173 |         if not state.is_world_process_zero:
174 |             return
175 |         self._steps.append([])
176 | 
177 |     def on_epoch_end(
178 |         self,
179 |         args: TrainingArguments,
180 |         state: TrainerState,
181 |         control: TrainerControl,
182 |         **kwargs,
183 |     ):
184 |         if not state.is_world_process_zero:
185 |             return
186 |         self._log_metrics()
187 | 
188 |     def on_log(
189 |         self,
190 |         args: TrainingArguments,
191 |         state: TrainerState,
192 |         control: TrainerControl,
193 |         logs: Dict[str, float] = None,
194 |         **kwargs,
195 |     ):
196 |         if not state.is_world_process_zero:
197 |             return
198 |         recent_logs = state.log_history[-1].copy()
199 | 
200 |         recent_logs.pop("epoch")
201 |         current_step = int(recent_logs.pop("step"))
202 |         if current_step not in self._steps[-1]:
203 |             self._steps[-1].append(current_step)
204 | 
205 |         for metric_name, metric_score in recent_logs.items():
206 |             if metric_name.startswith("train_"):
207 |                 if metric_name.split("train_")[1] not in self._metric_scores:
208 |                     self._metric_scores[metric_name] = [metric_score]
209 |                 continue
210 |             if metric_name not in self._metric_scores:
211 |                 self._metric_scores[metric_name] = []
212 |             self._metric_scores[metric_name].append(metric_score)
213 | 
214 |     def on_train_begin(
215 |         self,
216 |         args: TrainingArguments,
217 |         state: TrainerState,
218 |         control: TrainerControl,
219 |         **kwargs,
220 |     ):
221 |         if not state.is_world_process_zero:
222 |             return
223 |         self._is_training = True
224 | 
225 |     def on_train_end(
226 |         self,
227 |         args: TrainingArguments,
228 |         state: TrainerState,
229 |         control: TrainerControl,
230 |         model: PreTrainedModel = None,
231 |         tokenizer: PreTrainedTokenizer = None,
232 |         **kwargs,
233 |     ):
234 |         if not state.is_world_process_zero:
235 |             return
236 |         self._log_metrics()
237 | 
238 |     def on_evaluate(
239 |         self,
240 |         args: TrainingArguments,
241 |         state: TrainerState,
242 |         control: TrainerControl,
243 |         **kwargs,
244 |     ):
245 |         if not state.is_world_process_zero:
246 |             return
247 |         self._log_metrics()
248 | 
249 |         if self._is_training:
250 |             return
251 | 
252 |     def _log_metrics(self):
253 |         for metric_name, metric_scores in self._metric_scores.items():
254 |             self._context.log_result(key=metric_name, value=metric_scores[-1])
255 |             if len(metric_scores) > 1:
256 |                 self._log_metric_plot(name=metric_name, scores=metric_scores)
257 |         self._context.commit(completed=False)
258 | 
259 |     def _log_metric_plot(self, name: str, scores: List[float]):
260 |         # Initialize a plotly figure:
261 |         metric_figure = go.Figure()
262 | 
263 |         # Add titles:
264 |         metric_figure.update_layout(
265 |             title=name.capitalize().replace("_", " "),
266 |             xaxis_title="Samples",
267 |             yaxis_title="Scores",
268 |         )
269 | 
270 |         # Draw:
271 |         metric_figure.add_trace(
272 |             go.Scatter(x=np.arange(len(scores)), y=scores, mode="lines")
273 |         )
274 | 
275 |         # Create the plotly artifact:
276 |         artifact_name = f"{name}_plot"
277 |         artifact = PlotlyArtifact(key=artifact_name, figure=metric_figure)
278 |         self._artifacts[artifact_name] = self._context.log_artifact(artifact)
279 | 
280 | 
281 | def apply_mlrun(
282 |     trainer: transformers.Trainer,
283 |     model_name: str = None,
284 |     tag: str = "",
285 |     context: mlrun.MLClientCtx = None,
286 |     auto_log: bool = True,
287 |     labels: Dict[str, str] = None,
288 |     extra_data: dict = None,
289 |     **kwargs,
290 | ):
291 |     """
292 |     This is temporary and will be built in mlrun 1.5.0
293 |     """
294 |     # Get parameters defaults:
295 |     if context is None:
296 |         context = mlrun.get_or_create_ctx(HFTrainerMLRunInterface.DEFAULT_CONTEXT_NAME)
297 | 
298 |     HFTrainerMLRunInterface.add_interface(obj=trainer)
299 | 
300 |     if auto_log:
301 |         trainer.add_callback(
302 |             MLRunCallback(
303 |                 context=context,
304 |                 model_name=model_name,
305 |                 tag=tag,
306 |                 labels=labels,
307 |                 extra_data=extra_data,
308 |             )
309 |         )
310 | 
311 | 
312 | class KWArgsPrefixes:
313 |     MODEL_CLASS = "CLASS_"
314 |     FIT = "FIT_"
315 |     TRAIN = "TRAIN_"
316 |     PREDICT = "PREDICT_"
317 |     DATA_COLLATOR = "DC_"
318 | 
319 | 
320 | def _get_sub_dict_by_prefix(src: Dict, prefix_key: str) -> Dict[str, Any]:
321 |     return {
322 |         key.replace(prefix_key, ""): val
323 |         for key, val in src.items()
324 |         if key.startswith(prefix_key)
325 |     }
326 | 
327 | 
328 | def print_trainable_parameters(model):
329 |     """
330 |     Prints the number of trainable parameters in the model.
331 |     """
332 |     trainable_params = 0
333 |     all_param = 0
334 |     for _, param in model.named_parameters():
335 |         all_param += param.numel()
336 |         if param.requires_grad:
337 |             trainable_params += param.numel()
338 |     print(
339 |         f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
340 |     )
341 | 
342 | 
343 | def train(
344 |     context: MLClientCtx,
345 |     dataset: DataItem = None,
346 |     pretrained_tokenizer: str = None,
347 |     pretrained_model: str = None,
348 |     model_class: str = None,
349 |     tokenizer_class: str = None,
350 |     model_name: str = "huggingface-model",
351 |     use_deepspeed: bool = True,
352 | ):
353 |     torch.cuda.empty_cache()
354 |     # deepspeed_config_json = None
355 |     # if use_deepspeed:
356 |     #     deepspeed_config_json = os.path.join(tempfile.mkdtemp(), "ds_config.json")
357 |     #     with open(deepspeed_config_json, "w") as f:
358 |     #         json.dump(DEEPSPEED_CONFIG, f)
359 |     if tokenizer_class:
360 |         tokenizer_class = create_class(tokenizer_class)
361 |     else:
362 |         tokenizer_class = AutoTokenizer
363 | 
364 |     tokenizer = tokenizer_class.from_pretrained(
365 |         pretrained_tokenizer,
366 |         model_max_length=512,
367 |     )
368 |     tokenizer.pad_token = tokenizer.eos_token
369 | 
370 |     train_dataset = Dataset.from_pandas(dataset.as_df())
371 | 
372 |     def preprocess_function(examples):
373 |         return tokenizer(examples["text"], truncation=True, padding=True)
374 | 
375 |     tokenized_train = train_dataset.map(preprocess_function, batched=True)
376 |     tokenized_test = None
377 | 
378 |     data_collator_kwargs = _get_sub_dict_by_prefix(
379 |         src=context.parameters, prefix_key=KWArgsPrefixes.DATA_COLLATOR
380 |     )
381 |     data_collator = DataCollatorForLanguageModeling(
382 |         tokenizer=tokenizer, mlm=False, **data_collator_kwargs
383 |     )
384 | 
385 |     # Parsing kwargs:
386 |     train_kwargs = _get_sub_dict_by_prefix(
387 |         src=context.parameters, prefix_key=KWArgsPrefixes.TRAIN
388 |     )
389 |     # if use_deepspeed:
390 |     #     train_kwargs["deepspeed"] = deepspeed_config_json
391 |     model_class_kwargs = _get_sub_dict_by_prefix(
392 |         src=context.parameters, prefix_key=KWArgsPrefixes.MODEL_CLASS
393 |     )
394 |     # Loading our pretrained model:
395 |     model_class_kwargs["pretrained_model_name_or_path"] = (
396 |         model_class_kwargs.get("pretrained_model_name_or_path") or pretrained_model
397 |     )
398 |     train_kwargs["hub_token"] = train_kwargs.get("hub_token") or pretrained_tokenizer
399 |     if not model_class_kwargs["pretrained_model_name_or_path"]:
400 |         raise mlrun.errors.MLRunRuntimeError(
401 |             "Must provide pretrained_model name as "
402 |             "function argument or in extra params"
403 |         )
404 |     bnb_config = BitsAndBytesConfig(
405 |         load_in_4bit=True,
406 |         bnb_4bit_use_double_quant=True,
407 |         bnb_4bit_quant_type="nf4",
408 |         bnb_4bit_compute_dtype=torch.bfloat16,
409 |     )
410 | 
411 |     model = create_class(model_class).from_pretrained(
412 |         quantization_config=bnb_config,
413 |         device_map="auto",
414 |         trust_remote_code=True,
415 |         **model_class_kwargs,
416 |     )
417 | 
418 |     model.gradient_checkpointing_enable()
419 |     model = prepare_model_for_kbit_training(model)
420 | 
421 |     # Preparing training arguments:
422 |     training_args = TrainingArguments(
423 |         output_dir=tempfile.mkdtemp(),
424 |         optim="paged_adamw_8bit",
425 |         gradient_accumulation_steps=2,
426 |         warmup_steps=5,
427 |         learning_rate=3e-4,
428 |         fp16=True,
429 |         logging_steps=1,
430 |         **train_kwargs,
431 |     )
432 | 
433 |     config = LoraConfig(
434 |         r=16,
435 |         lora_alpha=16,
436 |         target_modules=["query_key_value"],
437 |         lora_dropout=0.05,
438 |         bias="none",
439 |         task_type="CAUSAL_LM",
440 |     )
441 | 
442 |     model = get_peft_model(model, config)
443 |     print_trainable_parameters(model)
444 | 
445 |     trainer = transformers.Trainer(
446 |         model=model,
447 |         args=training_args,
448 |         train_dataset=tokenized_train,
449 |         eval_dataset=tokenized_test,
450 |         tokenizer=tokenizer,
451 |         data_collator=data_collator,
452 |     )
453 | 
454 |     apply_mlrun(trainer, model_name=model_name)
455 |     model.config.use_cache = (
456 |         False  # silence the warnings. Please re-enable for inference!
457 |     )
458 | 
459 |     # Apply training with evaluation:
460 |     context.logger.info(f"training '{model_name}'")
461 |     trainer.train()
462 | 
463 |     temp_directory = tempfile.TemporaryDirectory().name
464 |     trainer.save_model(temp_directory)
465 | 
466 |     # Zip the model directory:
467 |     shutil.make_archive(
468 |         base_name="model",
469 |         format="zip",
470 |         root_dir=temp_directory,
471 |     )
472 | 
473 |     # Log the model:
474 |     context.log_model(
475 |         key="model",
476 |         db_key=model_name,
477 |         model_file="model.zip",
478 |         tag="",
479 |         framework="Hugging Face",
480 |     )
481 | 
482 | 
483 | def evaluate(
484 |     context,
485 |     model_path,
486 |     data: pd.DataFrame,
487 |     model_name: str = None,
488 |     tokenizer_name: str = None,
489 | ):
490 |     """
491 |     Evaluating the model using perplexity, for more information visit:
492 |     https://huggingface.co/docs/transformers/perplexity
493 | 
494 |     :param context:     mlrun context
495 |     :param model_path:  path to the model directory
496 |     :param data:        the data to evaluate the model
497 |     :param model_name:  name of base model
498 |     :param tokenizer_name: name of base tokenizer
499 |     """
500 |     # Get the model artifact and file:
501 |     (
502 |         model_file,
503 |         model_artifact,
504 |         extra_data,
505 |     ) = mlrun.artifacts.get_model(model_path)
506 | 
507 |     # Read the name:
508 |     _model_name = model_artifact.spec.db_key
509 | 
510 |     # Extract logged model files:
511 |     model_directory = os.path.join(os.path.dirname(model_file), _model_name)
512 |     with zipfile.ZipFile(model_file, "r") as zip_file:
513 |         zip_file.extractall(model_directory)
514 | 
515 |     # Loading the saved pretrained tokenizer and model:
516 |     dataset = Dataset.from_pandas(data)
517 |     tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
518 |     pad_token_id = tokenizer.eos_token_id
519 |     model = AutoModelForCausalLM.from_pretrained(
520 |         model_name, device_map="cuda:0", trust_remote_code=True, load_in_8bit=True
521 |     )
522 |     model = PeftModel.from_pretrained(model, model_directory)
523 |     model.eval()
524 |     encodings = tokenizer("\n\n".join(dataset["text"][:5]), return_tensors="pt")
525 | 
526 |     max_length = 1024
527 |     stride = 512
528 |     seq_len = encodings.input_ids.size(1)
529 | 
530 |     nlls = []
531 |     prev_end_loc = 0
532 |     for begin_loc in range(0, seq_len, stride):
533 |         end_loc = min(begin_loc + max_length, seq_len)
534 |         trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
535 |         input_ids = encodings.input_ids[:, begin_loc:end_loc]
536 |         target_ids = input_ids.clone()
537 |         target_ids[:, :-trg_len] = -100
538 | 
539 |         with torch.no_grad():
540 |             outputs = model(input_ids.cuda(), labels=target_ids)
541 | 
542 |             # loss is calculated using CrossEntropyLoss which averages over valid labels
543 |             # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
544 |             # to the left by 1.
545 |             neg_log_likelihood = outputs.loss
546 | 
547 |         nlls.append(neg_log_likelihood)
548 | 
549 |         prev_end_loc = end_loc
550 |         if end_loc == seq_len:
551 |             break
552 | 
553 |     ppl = torch.exp(torch.stack(nlls).mean()).item()
554 |     context.log_result("perplexity", ppl)
555 | 


--------------------------------------------------------------------------------
/src/training_workflow.py:
--------------------------------------------------------------------------------
 1 | import mlrun
 2 | from kfp import dsl
 3 | 
 4 | 
 5 | @dsl.pipeline(name="MLOps Bot Master Pipeline")
 6 | def kfpipeline(
 7 |     html_links: str,
 8 |     model_name: str,
 9 |     pretrained_tokenizer: str,
10 |     pretrained_model: str,
11 |     epochs: str,
12 |     use_deepspeed: bool,
13 |     tokenizer_class: str = "transformers.AutoTokenizer",
14 |     model_class: str = "transformers.AutoModelForCausalLM",
15 | ):
16 |     # Get our project object:
17 |     project = mlrun.get_current_project()
18 | 
19 |     # Collect Dataset:
20 |     collect_dataset_run = mlrun.run_function(
21 |         function="data-collecting",
22 |         handler="collect_html_to_text_files",
23 |         name="data-collection",
24 |         params={"urls_file": html_links},
25 |         returns=["html-as-text-files:path"],
26 |     )
27 | 
28 |     # Dataset Preparation:
29 |     prepare_dataset_run = mlrun.run_function(
30 |         function="data-preparing",
31 |         handler="prepare_dataset",
32 |         name="data-preparation",
33 |         inputs={"source_dir": collect_dataset_run.outputs["html-as-text-files"]},
34 |         returns=["html-data:dataset"],
35 |     )
36 | 
37 |     # Training:
38 |     project.get_function("training")
39 | 
40 |     training_run = mlrun.run_function(
41 |         function="training",
42 |         name="train",
43 |         inputs={"dataset": prepare_dataset_run.outputs["html-data"]},
44 |         params={
45 |             "model_name": model_name,
46 |             "pretrained_tokenizer": pretrained_tokenizer,
47 |             "pretrained_model": pretrained_model,
48 |             "model_class": model_class,
49 |             "tokenizer_class": tokenizer_class,
50 |             "TRAIN_num_train_epochs": epochs,
51 |             "use_deepspeed": use_deepspeed,
52 |         },
53 |         handler="train",
54 |         outputs=["model"],
55 |     )
56 | 
57 |     # evaluation:
58 |     mlrun.run_function(
59 |         function="training",
60 |         name="evaluate",
61 |         params={
62 |             "model_path": training_run.outputs["model"],
63 |             "model_name": pretrained_model,
64 |             "tokenizer_name": pretrained_tokenizer,
65 |         },
66 |         inputs={"data": prepare_dataset_run.outputs["html-data"]},
67 |         handler="evaluate",
68 |     )
69 | 


--------------------------------------------------------------------------------
/tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "75cb6daf-ecdc-4129-8f28-ad871d3a795c",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Tutorial: Build & Deploy Custom (fine-tuned) LLM Models and Applications\n",
  9 |     "\n",
 10 |     "In the following tutorial you will learn how to operationalize a LLM using MLRun. We will build **MLOpsPedia** - The MLOps Master Bot, a chatbot for answering all your MLOps questions. We will do so by covering the two main stages in every MLOps project:\n",
 11 |     "\n",
 12 |     "* **Automated training pipeline** - Build an automated ML pipeline for data collection, data preparation, training and evaluation.\n",
 13 |     "* **Serving graph deployment** - Build, deploy and test in a Gradio application the newly trained LLM.\n",
 14 |     "\n",
 15 |     "**MLRun** is welcoming you to **LLMOps**!\n",
 16 |     "\n",
 17 |     "> Make sure you went over the basics in MLRun [Quick Start Tutorial](https://docs.mlrun.org/en/stable/tutorial/01-mlrun-basics.html) to understand the MLRun basics.\n",
 18 |     "\n",
 19 |     "Run the notebook in the following order (you may skip the first step):\n",
 20 |     "1. [Test the Pretrained Model](#test-the-pretrained-model)\n",
 21 |     "2. [Automated Training Pipeline](#automated-training-pipeline)\n",
 22 |     "3. [Application Serving Pipeline](#application-serving-pipeline)"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "id": "e9565c47-7720-47ca-ab0b-ac8a77286f90",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "But first, please install the following requirements:"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "id": "cdf6b605-348d-4fd7-958d-d484446b5964",
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "%pip install -r requirements.txt"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "id": "906e38b6-168a-47cb-9320-2acdd16b0b37",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "___\n",
 49 |     "<a id=\"test-the-pretrained-model\"></a>\n",
 50 |     "## 1. Test the Pretrained Model\n",
 51 |     "\n",
 52 |     "MLOpsPedia will be based on [falcon-7b](https://huggingface.co/tiiuae/falcon-7b). Before fine-tuning it, we want to see how it performs on some MLOps questions."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "id": "8d1a9b26-9916-47d8-9c89-1e0e7380bf57",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "### 1.1. Load `falcon-7b` from HuggingFace's Transformers Hub\n",
 61 |     "\n",
 62 |     "`falcon-7b` is fully supported by HuggingFace and have its own Model and Tokenizer classes. We will use them in a HuggingFace pipeline and test them out:"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 1,
 68 |    "id": "2c763708-f0e5-4a53-b788-64e4c2634973",
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "application/vnd.jupyter.widget-view+json": {
 74 |        "model_id": "80910a4c2be34f7ab35b193f37c8e0bb",
 75 |        "version_major": 2,
 76 |        "version_minor": 0
 77 |       },
 78 |       "text/plain": [
 79 |        "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
 80 |       ]
 81 |      },
 82 |      "metadata": {},
 83 |      "output_type": "display_data"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "import os\n",
 88 |     "from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline\n",
 89 |     "from transformers import logging\n",
 90 |     "logging.set_verbosity(\"CRITICAL\")\n",
 91 |     "\n",
 92 |     "model_name = \"tiiuae/falcon-7b\"\n",
 93 |     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
 94 |     "generation_config = GenerationConfig.from_pretrained(model_name)\n",
 95 |     "generator = pipeline(\"text-generation\", model=model_name, tokenizer=tokenizer, trust_remote_code=True)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "id": "65fa310e-cf4f-4f5b-9f96-aacb9ab3a394",
101 |    "metadata": {},
102 |    "source": [
103 |     "### 1.2. Test it on some MLOps Questions\n",
104 |     "\n",
105 |     "For the good order, we prepared `prompt_to_response` that infer a prompt through the pipeline we initialized and return the response. We'll use it for a couple of questions:"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 2,
111 |    "id": "76ef3714-a0d7-4818-be55-fb17f5c3cf21",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "def prompt_to_response(prompt: str) -> str:\n",
116 |     "    return generator(prompt, \n",
117 |     "                     generation_config=generation_config,\n",
118 |     "                     max_length=50, pad_token_id=tokenizer.eos_token_id)[0][\"generated_text\"]"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 3,
124 |    "id": "e1e98ec1-859e-4306-ac0e-be3b714ef5ef",
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "What is a serving pipeline?\n",
132 |       "A serving pipeline is a set of tools that help you to create, manage, and deliver your content.\n",
133 |       "What is a serving pipeline?\n",
134 |       "A serving pipeline is a set of tools that help you to create,\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "print(prompt_to_response(prompt=\"What is a serving pipeline?\"))"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 4,
145 |    "id": "186841f5-c681-40bf-8467-68801cfca461",
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "What is MLops?\n",
153 |       "MLops is a set of practices that help organizations to build, deploy, and manage machine learning models at scale.\n",
154 |       "MLops is a set of practices that help organizations to build, deploy, and manage machine learning models\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "print(prompt_to_response(prompt=\"What is MLops?\"))"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "id": "1203c3bc-b3a9-4b30-a7b3-c119a74e0e2d",
165 |    "metadata": {},
166 |    "source": [
167 |     "As expected, `falcon-7b` is not that sharp on MLOps questions, but that's about to change..."
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "id": "26437209-fa75-496f-8d12-19751ba88530",
173 |    "metadata": {},
174 |    "source": [
175 |     "___\n",
176 |     "<a id=\"automated-training-pipeline\"></a>\n",
177 |     "## 2. Automated Training Pipeline\n",
178 |     "\n",
179 |     "To get a `falcon-7b` that knows MLOps, we will fine tune it on [**Iguazio**'s MLOps blogs](https://www.iguazio.com/blog/). To do so, we will create a fully automated pipeline with the following steps:\n",
180 |     "\n",
181 |     "1. **Collect Data** - Collect all text from given html urls into `.txt` files, meaning we'll be getting all the MLOps blogs as text files.\n",
182 |     "2. **Preprocess Data** - Join the `.txt` files, reformatting the text into our prompt template: \"Subject - Content\". We made every header (`<h>` tags) a *subject* of a prompt, and the text (`<p>` tags) under it as its *content*.\n",
183 |     "3. **Train** - Fine-tune the LLM on the data. We'll run the training on **OpenMPI**, and we will use **DeepSpeed** for distributing the model and data between multiple workers, splitting the work between nodes and GPUs. **MLRun will auto-log the entire training process**.\n",
184 |     "4. **Evaluate** - Evaluate our model using the *Perplexity* metric.\n",
185 |     "\n",
186 |     "<img src=\"./images/training-pipeline.png\" style=\"width: 400px\"/>"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "id": "6e22570a-3c0e-4278-8385-e0d321bb9067",
192 |    "metadata": {},
193 |    "source": [
194 |     "### 2.1. Define MLRun project and set all the MLRun functions\n",
195 |     "\n",
196 |     "Create or load an MLRun project that holds all your functions and configuration (see [project_setup.py](./src/project_setup.py))\n",
197 |     "\n",
198 |     "The project contains the following files where we'll set the functions from to build the workflow of the pipeline:\n",
199 |     "* [data_collection.py](./src/data_collection.py) - to create an MLRun function with the `collect_html_to_text_files` handler.\n",
200 |     "* [data_preprocess.py](./src/data_preprocess.py) - to create an MLRun function with the `prepare_dataset` handler.\n",
201 |     "* [training]() - to create an MLRun function with the `train` and `evaluate` handlers.\n",
202 |     "* [serving.py](./src/serving.py) - to create an MLRun function with all the serving graph steps (will be covered in section 3).\n",
203 |     "\n",
204 |     "In addition, the training pipeline is set to the project as well. It can be seen at [training_workflow.py](./src/training_workflow.py)\n",
205 |     "\n",
206 |     "The training and evaluation function we will use is [hugging_face_classifier_trainer](https://www.mlrun.org/hub/). It is taken from [**MLRun's Functions Hub**](https://docs.mlrun.org/en/stable/runtimes/load-from-hub.html) - a collection of ready to be imported functions for variety of use cases. We import the function during the project setup."
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 5,
212 |    "id": "d771e4ba-43a4-4bcf-8ae0-d35c0f80d259",
213 |    "metadata": {},
214 |    "outputs": [
215 |     {
216 |      "name": "stdout",
217 |      "output_type": "stream",
218 |      "text": [
219 |       "git://github.com/mlrun/demo-llm-tuning.git#main\n"
220 |      ]
221 |     }
222 |    ],
223 |    "source": [
224 |     "import mlrun\n",
225 |     "\n",
226 |     "project = mlrun.load_project(\n",
227 |     "    name=\"mlopspedia-bot\",\n",
228 |     "    context=\"./\",\n",
229 |     "    user_project=True,\n",
230 |     "    parameters={\n",
231 |     "        \"source\": \"git://github.com/mlrun/demo-llm-tuning.git#main\",\n",
232 |     "        \"default_image\": \"yonishelach/mlrun-llm\",\n",
233 |     "    })"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "id": "a66deeb6-7472-4642-8a11-996422cd3091",
239 |    "metadata": {},
240 |    "source": [
241 |     "### 2.2. Run full LLM life-cycle workflow\n",
242 |     "\n",
243 |     "Run the training pipeline by using `project.run(workflow name, ...)`. The steps on the piepline inputs and outputs are as follows:\n",
244 |     "\n",
245 |     "1. url link -> `collect_html_to_text_files` -> zip containing all url text files.\n",
246 |     "2. zip containing all url text files -> `prepare_dataset` -> training set, evaluation set.\n",
247 |     "3. training set -> `train` -> model, metrics, plots\n",
248 |     "4. evaluation set, model -> `evaluate` -> metrics, plots"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 6,
254 |    "id": "b1ea5ec6-cb78-44db-aac7-97e52ce591db",
255 |    "metadata": {},
256 |    "outputs": [
257 |     {
258 |      "data": {
259 |       "text/html": [
260 |        "<div>Pipeline running (id=2012a80c-500b-43fb-ad03-abffd6ee2a6b), <a href=\"https://dashboard.default-tenant.app.llm2.iguazio-cd0.com/mlprojects/mlopspedia-bot-yonis/jobs/monitor-workflows/workflow/2012a80c-500b-43fb-ad03-abffd6ee2a6b\" target=\"_blank\"><b>click here</b></a> to view the details in MLRun UI</div>"
261 |       ],
262 |       "text/plain": [
263 |        "<IPython.core.display.HTML object>"
264 |       ]
265 |      },
266 |      "metadata": {},
267 |      "output_type": "display_data"
268 |     },
269 |     {
270 |      "data": {
271 |       "image/svg+xml": [
272 |        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
273 |        "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
274 |        " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
275 |        "<!-- Generated by graphviz version 2.43.0 (0)\n",
276 |        " -->\n",
277 |        "<!-- Title: kfp Pages: 1 -->\n",
278 |        "<svg width=\"186pt\" height=\"260pt\"\n",
279 |        " viewBox=\"0.00 0.00 186.08 260.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
280 |        "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 256)\">\n",
281 |        "<title>kfp</title>\n",
282 |        "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-256 182.08,-256 182.08,4 -4,4\"/>\n",
283 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;1439426288 -->\n",
284 |        "<g id=\"node1\" class=\"node\">\n",
285 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;1439426288</title>\n",
286 |        "<ellipse fill=\"green\" stroke=\"black\" cx=\"89.04\" cy=\"-18\" rx=\"50.09\" ry=\"18\"/>\n",
287 |        "<text text-anchor=\"middle\" x=\"89.04\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">evaluate</text>\n",
288 |        "</g>\n",
289 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595 -->\n",
290 |        "<g id=\"node2\" class=\"node\">\n",
291 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595</title>\n",
292 |        "<ellipse fill=\"green\" stroke=\"black\" cx=\"89.04\" cy=\"-162\" rx=\"89.08\" ry=\"18\"/>\n",
293 |        "<text text-anchor=\"middle\" x=\"89.04\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">data&#45;preparation</text>\n",
294 |        "</g>\n",
295 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;1439426288 -->\n",
296 |        "<g id=\"edge2\" class=\"edge\">\n",
297 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;1439426288</title>\n",
298 |        "<path fill=\"none\" stroke=\"black\" d=\"M84.58,-143.95C82.11,-133.63 79.29,-120.15 78.04,-108 76.41,-92.08 76.41,-87.92 78.04,-72 78.92,-63.46 80.57,-54.26 82.34,-45.96\"/>\n",
299 |        "<polygon fill=\"black\" stroke=\"black\" points=\"85.79,-46.57 84.58,-36.05 78.96,-45.03 85.79,-46.57\"/>\n",
300 |        "</g>\n",
301 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;930414823 -->\n",
302 |        "<g id=\"node3\" class=\"node\">\n",
303 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;930414823</title>\n",
304 |        "<ellipse fill=\"green\" stroke=\"black\" cx=\"120.04\" cy=\"-90\" rx=\"33.29\" ry=\"18\"/>\n",
305 |        "<text text-anchor=\"middle\" x=\"120.04\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">train</text>\n",
306 |        "</g>\n",
307 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;930414823 -->\n",
308 |        "<g id=\"edge1\" class=\"edge\">\n",
309 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;930414823</title>\n",
310 |        "<path fill=\"none\" stroke=\"black\" d=\"M96.55,-144.05C100.13,-135.97 104.49,-126.12 108.48,-117.11\"/>\n",
311 |        "<polygon fill=\"black\" stroke=\"black\" points=\"111.76,-118.35 112.61,-107.79 105.36,-115.52 111.76,-118.35\"/>\n",
312 |        "</g>\n",
313 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;930414823&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;1439426288 -->\n",
314 |        "<g id=\"edge4\" class=\"edge\">\n",
315 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;930414823&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;1439426288</title>\n",
316 |        "<path fill=\"none\" stroke=\"black\" d=\"M112.7,-72.41C109.12,-64.34 104.73,-54.43 100.71,-45.35\"/>\n",
317 |        "<polygon fill=\"black\" stroke=\"black\" points=\"103.8,-43.68 96.55,-35.96 97.4,-46.52 103.8,-43.68\"/>\n",
318 |        "</g>\n",
319 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;915534038 -->\n",
320 |        "<g id=\"node4\" class=\"node\">\n",
321 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;915534038</title>\n",
322 |        "<ellipse fill=\"green\" stroke=\"black\" cx=\"89.04\" cy=\"-234\" rx=\"78.79\" ry=\"18\"/>\n",
323 |        "<text text-anchor=\"middle\" x=\"89.04\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">data&#45;collection</text>\n",
324 |        "</g>\n",
325 |        "<!-- mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;915534038&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595 -->\n",
326 |        "<g id=\"edge3\" class=\"edge\">\n",
327 |        "<title>mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;915534038&#45;&gt;mlops&#45;bot&#45;master&#45;pipeline&#45;zsk5k&#45;2897139595</title>\n",
328 |        "<path fill=\"none\" stroke=\"black\" d=\"M89.04,-215.7C89.04,-207.98 89.04,-198.71 89.04,-190.11\"/>\n",
329 |        "<polygon fill=\"black\" stroke=\"black\" points=\"92.54,-190.1 89.04,-180.1 85.54,-190.1 92.54,-190.1\"/>\n",
330 |        "</g>\n",
331 |        "</g>\n",
332 |        "</svg>\n"
333 |       ],
334 |       "text/plain": [
335 |        "<graphviz.graphs.Digraph at 0x7f1ba746ca00>"
336 |       ]
337 |      },
338 |      "metadata": {},
339 |      "output_type": "display_data"
340 |     },
341 |     {
342 |      "data": {
343 |       "text/html": [
344 |        "<h2>Run Results</h2><h3>[info] Workflow 2012a80c-500b-43fb-ad03-abffd6ee2a6b finished, state=Succeeded</h3><br>click the hyper links below to see detailed results<br><table border=\"1\" class=\"dataframe\">\n",
345 |        "  <thead>\n",
346 |        "    <tr style=\"text-align: right;\">\n",
347 |        "      <th>uid</th>\n",
348 |        "      <th>start</th>\n",
349 |        "      <th>state</th>\n",
350 |        "      <th>name</th>\n",
351 |        "      <th>parameters</th>\n",
352 |        "      <th>results</th>\n",
353 |        "    </tr>\n",
354 |        "  </thead>\n",
355 |        "  <tbody>\n",
356 |        "    <tr>\n",
357 |        "      <td><div title=\"fd71b79950314761876121289eef349d\"><a href=\"https://dashboard.default-tenant.app.llm2.iguazio-cd0.com/mlprojects/mlopspedia-bot-yonis/jobs/monitor/fd71b79950314761876121289eef349d/overview\" target=\"_blank\" >...9eef349d</a></div></td>\n",
358 |        "      <td>Jul 12 05:02:28</td>\n",
359 |        "      <td>completed</td>\n",
360 |        "      <td>evaluate</td>\n",
361 |        "      <td><div class=\"dictlist\">model_path=store://artifacts/mlopspedia-bot-yonis/falcon-7b-mlrun:2012a80c-500b-43fb-ad03-abffd6ee2a6b</div><div class=\"dictlist\">model_name=tiiuae/falcon-7b</div><div class=\"dictlist\">tokenizer_name=tiiuae/falcon-7b</div></td>\n",
362 |        "      <td><div class=\"dictlist\">perplexity=8.5703125</div></td>\n",
363 |        "    </tr>\n",
364 |        "    <tr>\n",
365 |        "      <td><div title=\"c14ad50afff5456d9d67a0a280920e39\"><a href=\"https://dashboard.default-tenant.app.llm2.iguazio-cd0.com/mlprojects/mlopspedia-bot-yonis/jobs/monitor/c14ad50afff5456d9d67a0a280920e39/overview\" target=\"_blank\" >...80920e39</a></div></td>\n",
366 |        "      <td>Jul 12 03:56:11</td>\n",
367 |        "      <td>completed</td>\n",
368 |        "      <td>train</td>\n",
369 |        "      <td><div class=\"dictlist\">model_name=falcon-7b-mlrun</div><div class=\"dictlist\">pretrained_tokenizer=tiiuae/falcon-7b</div><div class=\"dictlist\">pretrained_model=tiiuae/falcon-7b</div><div class=\"dictlist\">model_class=transformers.AutoModelForCausalLM</div><div class=\"dictlist\">tokenizer_class=transformers.AutoTokenizer</div><div class=\"dictlist\">TRAIN_num_train_epochs=5</div><div class=\"dictlist\">use_deepspeed=</div></td>\n",
370 |        "      <td><div class=\"dictlist\">loss=2.3346</div><div class=\"dictlist\">learning_rate=0.0</div><div class=\"dictlist\">train_runtime=3898.6792</div><div class=\"dictlist\">train_samples_per_second=0.737</div><div class=\"dictlist\">train_steps_per_second=0.046</div><div class=\"dictlist\">total_flos=2.9304526258176e+16</div></td>\n",
371 |        "    </tr>\n",
372 |        "    <tr>\n",
373 |        "      <td><div title=\"3d61bcbef459400c871bb9010ffbf5ab\"><a href=\"https://dashboard.default-tenant.app.llm2.iguazio-cd0.com/mlprojects/mlopspedia-bot-yonis/jobs/monitor/3d61bcbef459400c871bb9010ffbf5ab/overview\" target=\"_blank\" >...0ffbf5ab</a></div></td>\n",
374 |        "      <td>Jul 12 03:55:46</td>\n",
375 |        "      <td>completed</td>\n",
376 |        "      <td>data-preparation</td>\n",
377 |        "      <td></td>\n",
378 |        "      <td></td>\n",
379 |        "    </tr>\n",
380 |        "    <tr>\n",
381 |        "      <td><div title=\"57ad1dde5bd64fe391fff1137dea94d6\"><a href=\"https://dashboard.default-tenant.app.llm2.iguazio-cd0.com/mlprojects/mlopspedia-bot-yonis/jobs/monitor/57ad1dde5bd64fe391fff1137dea94d6/overview\" target=\"_blank\" >...7dea94d6</a></div></td>\n",
382 |        "      <td>Jul 12 03:53:50</td>\n",
383 |        "      <td>completed</td>\n",
384 |        "      <td>data-collection</td>\n",
385 |        "      <td><div class=\"dictlist\">urls_file=/User/demo-llm-tuning/data/html_urls.txt</div></td>\n",
386 |        "      <td></td>\n",
387 |        "    </tr>\n",
388 |        "  </tbody>\n",
389 |        "</table>"
390 |       ],
391 |       "text/plain": [
392 |        "<IPython.core.display.HTML object>"
393 |       ]
394 |      },
395 |      "metadata": {},
396 |      "output_type": "display_data"
397 |     }
398 |    ],
399 |    "source": [
400 |     "workflow_run = project.run(\n",
401 |     "    name=\"training_workflow\",\n",
402 |     "    arguments={\n",
403 |     "        \"html_links\": \"/User/demo-llm-tuning/data/html_urls.txt\",\n",
404 |     "        \"model_name\": \"falcon-7b-mlrun\",\n",
405 |     "        \"pretrained_tokenizer\": model_name,\n",
406 |     "        \"pretrained_model\": model_name,\n",
407 |     "        \"epochs\": 5,\n",
408 |     "    },\n",
409 |     "    watch=True,\n",
410 |     "    dirty=True,\n",
411 |     "    timeout=60 * 120,\n",
412 |     ")"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "id": "a21444a9-b66b-4539-a1ea-13f745114fbb",
418 |    "metadata": {},
419 |    "source": [
420 |     "#### 2.2.1. Distributed Training\n",
421 |     "\n",
422 |     "In the following image you can see the 16 workers that trained the model as part of an **MPIJob** and **DeepSpeed**.\n",
423 |     "\n",
424 |     "<img src=\"./images/16-workers-training.png\" style=\"width: 1000px\"/>"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "markdown",
429 |    "id": "1b92e42a-844a-40c4-a09f-dbb4bfd1e23c",
430 |    "metadata": {},
431 |    "source": [
432 |     "#### 2.2.2. UI Presentation\n",
433 |     "\n",
434 |     "Here we can see how the workflow looks on our UI, we can see the entire pipeline and the loss plot produced by the training step that is highlighted.\n",
435 |     "\n",
436 |     "<img src=\"./images/workflow-train.png\" style=\"width: 1000px\"/>"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "id": "11910dbc-8c31-4efd-b092-cdb1a649c4f2",
442 |    "metadata": {},
443 |    "source": [
444 |     "___\n",
445 |     "<a id=\"application-serving-pipeline\"></a>\n",
446 |     "## 3. Application Serving Pipeline\n",
447 |     "\n",
448 |     "In this last part we'll serve our LLM using [MLRun Serving](https://docs.mlrun.org/en/stable/serving/serving-graph.html).\n",
449 |     "\n",
450 |     "MLRun serving can produce managed ML application pipelines using real-time auto-scaling [Nuclio](https://nuclio.io/) serverless functions. The application pipeline includes all the steps from accepting events or data, preparing the required model features, inferring results using one or more models, and driving actions.\n",
451 |     "\n",
452 |     "We'll build the following serving graph for chat application:\n",
453 |     "\n",
454 |     "* **Preprocess** (`preprocess`) - Fit the user prompt into out prompt structure (\"Subject - Content\") \n",
455 |     "* **LLM** (`LLMModelServer`) - To serve our trained model and perform inferences to generate answers.\n",
456 |     "* **Postprocess** (`postprocess`) - To see if our model generated text with confidence or not.\n",
457 |     "* **Toxicity Filter** (`ToxicityClassifierModelServer`) - To serve a Hugging Face Evaluate package model and perform inferences to catch toxic prompt and responses.\n",
458 |     "\n",
459 |     "<img src=\"./images/serving-graph.png\" style=\"width: 800px\"/>"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "markdown",
464 |    "id": "9ecefcef-3a59-4d32-a046-f11e987d7df4",
465 |    "metadata": {},
466 |    "source": [
467 |     "### 3.1. Build our Serving Graph\n",
468 |     "\n",
469 |     "We'll first get the serving function with the code from our project (it was set in section 2.1.):"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": 7,
475 |    "id": "442e2d73-45fd-4264-92d1-9cfea8620066",
476 |    "metadata": {},
477 |    "outputs": [],
478 |    "source": [
479 |     "serving_function = project.get_function(\"serving\")"
480 |    ]
481 |   },
482 |   {
483 |    "cell_type": "code",
484 |    "execution_count": 8,
485 |    "id": "2fc82891-9f37-4c38-b3d7-84fdeb0abb25",
486 |    "metadata": {},
487 |    "outputs": [],
488 |    "source": [
489 |     "model_args = {\"load_in_8bit\": True, \"device_map\": \"cuda:0\", \"trust_remote_code\": True}"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "markdown",
494 |    "id": "23cc8fcf-f9ff-4175-bdb2-25d0f1b437ef",
495 |    "metadata": {},
496 |    "source": [
497 |     "Now we'll build the serving graph:"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": 9,
503 |    "id": "08594367-5e87-4bf3-8598-d72a6759355b",
504 |    "metadata": {},
505 |    "outputs": [
506 |     {
507 |      "data": {
508 |       "image/svg+xml": [
509 |        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
510 |        "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
511 |        " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
512 |        "<!-- Generated by graphviz version 2.43.0 (0)\n",
513 |        " -->\n",
514 |        "<!-- Title: mlrun&#45;flow Pages: 1 -->\n",
515 |        "<svg width=\"785pt\" height=\"44pt\"\n",
516 |        " viewBox=\"0.00 0.00 785.45 44.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
517 |        "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 40)\">\n",
518 |        "<title>mlrun&#45;flow</title>\n",
519 |        "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-40 781.45,-40 781.45,4 -4,4\"/>\n",
520 |        "<!-- _start -->\n",
521 |        "<g id=\"node1\" class=\"node\">\n",
522 |        "<title>_start</title>\n",
523 |        "<polygon fill=\"lightgrey\" stroke=\"black\" points=\"38.55,-0.05 40.7,-0.15 42.83,-0.3 44.92,-0.49 46.98,-0.74 48.99,-1.03 50.95,-1.36 52.84,-1.75 54.66,-2.18 56.4,-2.65 58.06,-3.16 59.63,-3.71 61.11,-4.31 62.49,-4.94 63.76,-5.61 64.93,-6.31 65.99,-7.04 66.93,-7.8 67.77,-8.59 68.48,-9.41 69.09,-10.25 69.58,-11.11 69.95,-11.99 70.21,-12.89 70.36,-13.8 70.4,-14.72 70.33,-15.65 70.16,-16.59 69.89,-17.53 69.53,-18.47 69.07,-19.41 68.52,-20.35 67.89,-21.28 67.18,-22.2 66.4,-23.11 65.55,-24.01 64.63,-24.89 63.65,-25.75 62.62,-26.59 61.53,-27.41 60.4,-28.2 59.23,-28.96 58.02,-29.69 56.78,-30.39 55.5,-31.06 54.2,-31.69 52.88,-32.29 51.53,-32.84 50.17,-33.35 48.79,-33.82 47.4,-34.25 46,-34.64 44.59,-34.97 43.17,-35.26 41.75,-35.51 40.32,-35.7 38.89,-35.85 37.45,-35.95 36.02,-36 34.58,-36 33.15,-35.95 31.71,-35.85 30.28,-35.7 28.85,-35.51 27.43,-35.26 26.01,-34.97 24.6,-34.64 23.2,-34.25 21.81,-33.82 20.43,-33.35 19.07,-32.84 17.72,-32.29 16.4,-31.69 15.1,-31.06 13.82,-30.39 12.58,-29.69 11.37,-28.96 10.2,-28.2 9.07,-27.41 7.98,-26.59 6.95,-25.75 5.97,-24.89 5.05,-24.01 4.2,-23.11 3.42,-22.2 2.71,-21.28 2.08,-20.35 1.53,-19.41 1.07,-18.47 0.71,-17.53 0.44,-16.59 0.27,-15.65 0.2,-14.72 0.24,-13.8 0.39,-12.89 0.65,-11.99 1.02,-11.11 1.51,-10.25 2.11,-9.41 2.83,-8.59 3.66,-7.8 4.61,-7.04 5.67,-6.31 6.84,-5.61 8.11,-4.94 9.49,-4.31 10.97,-3.71 12.54,-3.16 14.2,-2.65 15.94,-2.18 17.76,-1.75 19.65,-1.36 21.61,-1.03 23.62,-0.74 25.68,-0.49 27.77,-0.3 29.9,-0.15 32.05,-0.05 34.22,0 36.38,0 38.55,-0.05\"/>\n",
524 |        "<text text-anchor=\"middle\" x=\"35.3\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">start</text>\n",
525 |        "</g>\n",
526 |        "<!-- preprocess -->\n",
527 |        "<g id=\"node2\" class=\"node\">\n",
528 |        "<title>preprocess</title>\n",
529 |        "<ellipse fill=\"none\" stroke=\"black\" cx=\"168.34\" cy=\"-18\" rx=\"61.99\" ry=\"18\"/>\n",
530 |        "<text text-anchor=\"middle\" x=\"168.34\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">preprocess</text>\n",
531 |        "</g>\n",
532 |        "<!-- _start&#45;&gt;preprocess -->\n",
533 |        "<g id=\"edge1\" class=\"edge\">\n",
534 |        "<title>_start&#45;&gt;preprocess</title>\n",
535 |        "<path fill=\"none\" stroke=\"black\" d=\"M70.05,-18C78.23,-18 87.29,-18 96.49,-18\"/>\n",
536 |        "<polygon fill=\"black\" stroke=\"black\" points=\"96.5,-21.5 106.5,-18 96.5,-14.5 96.5,-21.5\"/>\n",
537 |        "</g>\n",
538 |        "<!-- mlopspedia -->\n",
539 |        "<g id=\"node3\" class=\"node\">\n",
540 |        "<title>mlopspedia</title>\n",
541 |        "<ellipse fill=\"none\" stroke=\"black\" cx=\"329.78\" cy=\"-18\" rx=\"63.89\" ry=\"18\"/>\n",
542 |        "<text text-anchor=\"middle\" x=\"329.78\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">mlopspedia</text>\n",
543 |        "</g>\n",
544 |        "<!-- preprocess&#45;&gt;mlopspedia -->\n",
545 |        "<g id=\"edge2\" class=\"edge\">\n",
546 |        "<title>preprocess&#45;&gt;mlopspedia</title>\n",
547 |        "<path fill=\"none\" stroke=\"black\" d=\"M230.53,-18C238.77,-18 247.29,-18 255.71,-18\"/>\n",
548 |        "<polygon fill=\"black\" stroke=\"black\" points=\"255.91,-21.5 265.91,-18 255.91,-14.5 255.91,-21.5\"/>\n",
549 |        "</g>\n",
550 |        "<!-- postprocess -->\n",
551 |        "<g id=\"node4\" class=\"node\">\n",
552 |        "<title>postprocess</title>\n",
553 |        "<ellipse fill=\"none\" stroke=\"black\" cx=\"495.77\" cy=\"-18\" rx=\"66.09\" ry=\"18\"/>\n",
554 |        "<text text-anchor=\"middle\" x=\"495.77\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">postprocess</text>\n",
555 |        "</g>\n",
556 |        "<!-- mlopspedia&#45;&gt;postprocess -->\n",
557 |        "<g id=\"edge3\" class=\"edge\">\n",
558 |        "<title>mlopspedia&#45;&gt;postprocess</title>\n",
559 |        "<path fill=\"none\" stroke=\"black\" d=\"M393.72,-18C402,-18 410.56,-18 419.03,-18\"/>\n",
560 |        "<polygon fill=\"black\" stroke=\"black\" points=\"419.29,-21.5 429.29,-18 419.29,-14.5 419.29,-21.5\"/>\n",
561 |        "</g>\n",
562 |        "<!-- toxicity&#45;classifier -->\n",
563 |        "<g id=\"node5\" class=\"node\">\n",
564 |        "<title>toxicity&#45;classifier</title>\n",
565 |        "<ellipse fill=\"none\" stroke=\"black\" cx=\"687.76\" cy=\"-18\" rx=\"89.88\" ry=\"18\"/>\n",
566 |        "<text text-anchor=\"middle\" x=\"687.76\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">toxicity&#45;classifier</text>\n",
567 |        "</g>\n",
568 |        "<!-- postprocess&#45;&gt;toxicity&#45;classifier -->\n",
569 |        "<g id=\"edge4\" class=\"edge\">\n",
570 |        "<title>postprocess&#45;&gt;toxicity&#45;classifier</title>\n",
571 |        "<path fill=\"none\" stroke=\"black\" d=\"M562.15,-18C570.41,-18 578.99,-18 587.63,-18\"/>\n",
572 |        "<polygon fill=\"black\" stroke=\"black\" points=\"587.76,-21.5 597.76,-18 587.76,-14.5 587.76,-21.5\"/>\n",
573 |        "</g>\n",
574 |        "</g>\n",
575 |        "</svg>\n"
576 |       ],
577 |       "text/plain": [
578 |        "<graphviz.graphs.Digraph at 0x7f1ba7445970>"
579 |       ]
580 |      },
581 |      "execution_count": 9,
582 |      "metadata": {},
583 |      "output_type": "execute_result"
584 |     }
585 |    ],
586 |    "source": [
587 |     "# Set the topology and get the graph object:\n",
588 |     "graph = serving_function.set_topology(\"flow\", engine=\"async\")\n",
589 |     "\n",
590 |     "# Add the steps:\n",
591 |     "graph.to(handler=\"preprocess\", name=\"preprocess\") \\\n",
592 |     "     .to(\"LLMModelServer\",\n",
593 |     "         name=\"mlopspedia\",\n",
594 |     "         model_args=model_args,\n",
595 |     "         tokenizer_name=model_name,\n",
596 |     "         model_name=model_name,\n",
597 |     "         peft_model=project.get_artifact_uri(\"falcon-7b-mlrun\")) \\\n",
598 |     "     .to(handler=\"postprocess\", name=\"postprocess\") \\\n",
599 |     "     .to(\"ToxicityClassifierModelServer\",\n",
600 |     "         name=\"toxicity-classifier\",\n",
601 |     "         threshold=0.7).respond()\n",
602 |     "\n",
603 |     "# Plot to graph:\n",
604 |     "serving_function.plot(rankdir='LR')"
605 |    ]
606 |   },
607 |   {
608 |    "cell_type": "markdown",
609 |    "id": "426b91d1-649e-4ab0-8908-e8f2e2e54ceb",
610 |    "metadata": {},
611 |    "source": [
612 |     "Lastly, we wish to add a GPU and save the configured function in the project:"
613 |    ]
614 |   },
615 |   {
616 |    "cell_type": "code",
617 |    "execution_count": 10,
618 |    "id": "2efaff89-86de-4fa1-9bcb-cb97b0a34b7d",
619 |    "metadata": {},
620 |    "outputs": [
621 |     {
622 |      "data": {
623 |       "text/plain": [
624 |        "<mlrun.projects.project.MlrunProject at 0x7f1aa8d30d00>"
625 |       ]
626 |      },
627 |      "execution_count": 10,
628 |      "metadata": {},
629 |      "output_type": "execute_result"
630 |     }
631 |    ],
632 |    "source": [
633 |     "# Configure (add a GPU and increase readiness timeout):\n",
634 |     "serving_function.with_limits(gpus=1)\n",
635 |     "serving_function.spec.readiness_timeout = 3000\n",
636 |     "\n",
637 |     "# Save the function to the project:\n",
638 |     "project.set_function(serving_function, with_repo=True)\n",
639 |     "project.save()"
640 |    ]
641 |   },
642 |   {
643 |    "cell_type": "markdown",
644 |    "id": "b3637268-7349-4fdb-9baa-41ca0d41a94a",
645 |    "metadata": {},
646 |    "source": [
647 |     "### 3.2. Deploy and Test the Application\n",
648 |     "\n",
649 |     "We will call the `deploy_function` and wait:"
650 |    ]
651 |   },
652 |   {
653 |    "cell_type": "code",
654 |    "execution_count": 11,
655 |    "id": "dce5819e-aea0-48d2-9027-e85ce3b41aa2",
656 |    "metadata": {},
657 |    "outputs": [
658 |     {
659 |      "name": "stdout",
660 |      "output_type": "stream",
661 |      "text": [
662 |       "> 2023-07-12 05:03:41,703 [info] Starting remote function deploy\n",
663 |       "2023-07-12 05:03:42  (info) Deploying function\n",
664 |       "2023-07-12 05:03:42  (info) Building\n",
665 |       "2023-07-12 05:03:42  (info) Staging files and preparing base images\n",
666 |       "2023-07-12 05:03:42  (info) Building processor image\n",
667 |       "2023-07-12 05:26:38  (info) Build complete\n",
668 |       "2023-07-12 05:42:21  (info) Function deploy complete\n",
669 |       "> 2023-07-12 05:42:23,182 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['mlopspedia-bot-yonis-serving-mlopspedia-bot-yonis.default-tenant.app.llm2.iguazio-cd0.com/']}\n"
670 |      ]
671 |     }
672 |    ],
673 |    "source": [
674 |     "# Deploy the serving function:\n",
675 |     "deployment = mlrun.deploy_function(\"serving\")"
676 |    ]
677 |   },
678 |   {
679 |    "cell_type": "markdown",
680 |    "id": "939ce236-4347-404e-9d61-03e6773fbb28",
681 |    "metadata": {},
682 |    "source": [
683 |     "Let's test the function manually on some prompts:"
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "code",
688 |    "execution_count": 12,
689 |    "id": "47a367e0-5b18-4032-b876-ff83bf5bf3a3",
690 |    "metadata": {},
691 |    "outputs": [],
692 |    "source": [
693 |     "generate_kwargs = {\"max_length\": 150, \"temperature\": 0.9, \"top_p\": 0.5, \"top_k\": 25, \"repetition_penalty\": 1.0}"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "code",
698 |    "execution_count": 13,
699 |    "id": "4cc39d0d-c32e-43cd-974f-dda458e44d63",
700 |    "metadata": {},
701 |    "outputs": [
702 |     {
703 |      "name": "stdout",
704 |      "output_type": "stream",
705 |      "text": [
706 |       "> 2023-07-12 05:42:23,239 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080/predict'}\n",
707 |       "MLRun is a complete open source MLOps orchestration platform that provides a single platform for building, training, deploying and managing ML applications at scale. MLRun is built on top of Iguazio’s open source data science platform and provides a unified framework for running data science and ML applications.\n",
708 |       "MLRun provides:\n",
709 |       "\n",
710 |       "A single place to run and manage all ML workloads (from data science to production)\n",
711 |       "A unified framework for running data science and ML applications\n",
712 |       "A single place to run and manage all ML workloads (from data science to production)\n",
713 |       "A unified framework for running data science and ML applications\n",
714 |       "A unified framework for running data science and\n"
715 |      ]
716 |     }
717 |    ],
718 |    "source": [
719 |     "response = serving_function.invoke(path='/predict', body={\"prompt\": \"What is MLRun?\", **generate_kwargs})\n",
720 |     "print(response[\"outputs\"])"
721 |    ]
722 |   },
723 |   {
724 |    "cell_type": "code",
725 |    "execution_count": 14,
726 |    "id": "a2cc218f-e9f1-490a-ab03-b0656f2bc0c1",
727 |    "metadata": {},
728 |    "outputs": [
729 |     {
730 |      "name": "stdout",
731 |      "output_type": "stream",
732 |      "text": [
733 |       "> 2023-07-12 05:42:45,916 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080/predict'}\n",
734 |       "Machine learning is a subfield of artificial intelligence (AI) that focuses on algorithms that can learn from data and improve their performance over time. Machine learning algorithms can be used to build intelligent systems that can make decisions, learn from experience, and adapt to new situations.\n",
735 |       "Machine learning algorithms are used in many areas of our daily lives, such as:\n",
736 |       "\n",
737 |       "Automated driving\n",
738 |       "Speech recognition\n",
739 |       "Image recognition\n",
740 |       "Personalized recommendations\n",
741 |       "\n",
742 |       "Machine learning algorithms are used in the development of autonomous cars. The cars are able to navigate roads and react to situations in real time.\n",
743 |       "Speech recognition algorithms are used in voice assistants like Siri and Alexa. They can recognize your voice\n"
744 |      ]
745 |     }
746 |    ],
747 |    "source": [
748 |     "response = serving_function.invoke(path='/predict', body={\"prompt\": \"What is machine learning?\", **generate_kwargs})\n",
749 |     "print(response[\"outputs\"])"
750 |    ]
751 |   },
752 |   {
753 |    "cell_type": "code",
754 |    "execution_count": 15,
755 |    "id": "6aebc785-931c-4ea9-8cd1-ec11d8dc02b1",
756 |    "metadata": {},
757 |    "outputs": [
758 |     {
759 |      "name": "stdout",
760 |      "output_type": "stream",
761 |      "text": [
762 |       "> 2023-07-12 05:43:06,514 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-mlopspedia-bot-yonis-serving.default-tenant.svc.cluster.local:8080/predict'}\n",
763 |       "This bot do not respond to toxicity.\n"
764 |      ]
765 |     }
766 |    ],
767 |    "source": [
768 |     "response = serving_function.invoke(path='/predict', body={\"prompt\": \"You are stupid!\", **generate_kwargs})\n",
769 |     "print(response[\"outputs\"])"
770 |    ]
771 |   },
772 |   {
773 |    "cell_type": "markdown",
774 |    "id": "3caa7b5c-eed9-4fb2-b69d-4927a681f25c",
775 |    "metadata": {},
776 |    "source": [
777 |     "Now, we'll set up a Gradio application and launch it:"
778 |    ]
779 |   },
780 |   {
781 |    "cell_type": "code",
782 |    "execution_count": 16,
783 |    "id": "4055d2ab-cebc-4456-acb6-80627040416a",
784 |    "metadata": {
785 |     "tags": []
786 |    },
787 |    "outputs": [],
788 |    "source": [
789 |     "import json\n",
790 |     "\n",
791 |     "import gradio as gr\n",
792 |     "import requests\n",
793 |     "\n",
794 |     "# Get the serving url to send requests to:\n",
795 |     "serving_url = deployment.outputs[\"endpoint\"]\n",
796 |     "\n",
797 |     "\n",
798 |     "def generate(prompt, temperature, max_length, top_p, top_k, repetition_penalty):\n",
799 |     "    # Build the request for our serving graph:\n",
800 |     "    inputs = {\n",
801 |     "        \"prompt\": prompt,\n",
802 |     "        \"temperature\": temperature,\n",
803 |     "        \"max_length\": max_length,\n",
804 |     "        \"top_p\": top_p,\n",
805 |     "        \"top_k\": top_k,\n",
806 |     "        \"repetition_penalty\": repetition_penalty,\n",
807 |     "    }\n",
808 |     "\n",
809 |     "    # call the serving function with the request:\n",
810 |     "    resp = requests.post(serving_url, data=json.dumps(inputs).encode(\"utf-8\"))\n",
811 |     "\n",
812 |     "    # Return the response:\n",
813 |     "    return resp.json()[\"outputs\"]\n",
814 |     "\n",
815 |     "\n",
816 |     "# Set up a Gradio frontend application:\n",
817 |     "with gr.Blocks(analytics_enabled=False, theme=gr.themes.Soft()) as demo:\n",
818 |     "    gr.Markdown(\n",
819 |     "        \"\"\"# LLM Playground\n",
820 |     "Play with the `generate` configurations and see how they make the LLM's responses better or worse.\n",
821 |     "\"\"\"\n",
822 |     "    )\n",
823 |     "    with gr.Row():\n",
824 |     "        with gr.Column(scale=5):\n",
825 |     "            with gr.Row():\n",
826 |     "                chatbot = gr.Chatbot()\n",
827 |     "            with gr.Row():\n",
828 |     "                prompt = gr.Textbox(label=\"Subject to ask about:\", placeholder=\"Type a question and Enter\")\n",
829 |     "\n",
830 |     "        with gr.Column(scale=1):\n",
831 |     "            temperature = gr.Slider(minimum=0, maximum=1, value=0.9, label=\"Temperature\", info=\"Choose between 0 and 1\")\n",
832 |     "            max_length = gr.Slider(minimum=0, maximum=1500, value=150, label=\"Maximum length\", info=\"Choose between 0 and 1500\")\n",
833 |     "            top_p = gr.Slider(minimum=0, maximum=1, value=0.5, label=\"Top P\", info=\"Choose between 0 and 1\")\n",
834 |     "            top_k = gr.Slider(minimum=0, maximum=500, value=25, label=\"Top k\", info=\"Choose between 0 and 500\")\n",
835 |     "            repetition_penalty = gr.Slider(minimum=0, maximum=1, value=1, label=\"repetition penalty\", info=\"Choose between 0 and 1\")\n",
836 |     "            clear = gr.Button(\"Clear\")\n",
837 |     "\n",
838 |     "    def respond(prompt, chat_history, temperature, max_length, top_p, top_k, repetition_penalty):\n",
839 |     "        bot_message = generate(prompt, temperature, max_length, top_p, top_k, repetition_penalty)\n",
840 |     "        chat_history.append((prompt, bot_message))\n",
841 |     "\n",
842 |     "        return \"\", chat_history\n",
843 |     "\n",
844 |     "    prompt.submit(respond, [prompt, chatbot, temperature, max_length, top_p, top_k, repetition_penalty], [prompt, chatbot])\n",
845 |     "    clear.click(lambda: None, None, chatbot, queue=False)\n"
846 |    ]
847 |   },
848 |   {
849 |    "cell_type": "code",
850 |    "execution_count": 17,
851 |    "id": "0ef771d3-ecb1-4cde-a9bc-6ebf8b76d37e",
852 |    "metadata": {},
853 |    "outputs": [
854 |     {
855 |      "name": "stdout",
856 |      "output_type": "stream",
857 |      "text": [
858 |       "Running on local URL:  http://127.0.0.1:7860\n",
859 |       "Running on public URL: https://b47d16a4d0489c6dde.gradio.live\n",
860 |       "\n",
861 |       "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
862 |      ]
863 |     },
864 |     {
865 |      "data": {
866 |       "text/html": [
867 |        "<div><iframe src=\"https://b47d16a4d0489c6dde.gradio.live\" width=\"100%\" height=\"685\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
868 |       ],
869 |       "text/plain": [
870 |        "<IPython.core.display.HTML object>"
871 |       ]
872 |      },
873 |      "metadata": {},
874 |      "output_type": "display_data"
875 |     },
876 |     {
877 |      "data": {
878 |       "text/plain": []
879 |      },
880 |      "execution_count": 17,
881 |      "metadata": {},
882 |      "output_type": "execute_result"
883 |     }
884 |    ],
885 |    "source": [
886 |     "demo.launch(share=True, height=685)"
887 |    ]
888 |   },
889 |   {
890 |    "cell_type": "markdown",
891 |    "id": "ef6b3b68",
892 |    "metadata": {},
893 |    "source": [
894 |     "<img src=\"./images/gradio.png\" style=\"width: 1000px\"/>"
895 |    ]
896 |   },
897 |   {
898 |    "cell_type": "code",
899 |    "execution_count": null,
900 |    "id": "803b4824",
901 |    "metadata": {
902 |     "collapsed": false,
903 |     "jupyter": {
904 |      "outputs_hidden": false
905 |     }
906 |    },
907 |    "outputs": [],
908 |    "source": []
909 |   }
910 |  ],
911 |  "metadata": {
912 |   "kernelspec": {
913 |    "display_name": "mlrun-base",
914 |    "language": "python",
915 |    "name": "conda-env-mlrun-base-py"
916 |   },
917 |   "language_info": {
918 |    "codemirror_mode": {
919 |     "name": "ipython",
920 |     "version": 3
921 |    },
922 |    "file_extension": ".py",
923 |    "mimetype": "text/x-python",
924 |    "name": "python",
925 |    "nbconvert_exporter": "python",
926 |    "pygments_lexer": "ipython3",
927 |    "version": "3.9.16"
928 |   }
929 |  },
930 |  "nbformat": 4,
931 |  "nbformat_minor": 5
932 | }
933 | 


--------------------------------------------------------------------------------