├── .github └── workflows │ ├── codeql-analysis.yml │ ├── integration-tests.yml │ └── unit-tests.yml ├── .gitignore ├── .isort.cfg ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── bin ├── _brew_install ├── _build_library ├── _cleanup ├── _pyenv_config ├── _pyenv_install ├── _pyenv_uninstall ├── _pyenv_update ├── _release_prod └── _release_test ├── examples ├── Makefile-example ├── README.md ├── cli-examples │ ├── .gitignore │ ├── Makefile │ ├── model.py │ ├── requirements.txt │ └── run-all.sh └── examples-by-ml-library │ ├── .isort.cfg │ ├── Makefile │ ├── libraries │ ├── __init__.py │ ├── annoy_example.py │ ├── catboost_example.py │ ├── causalml_example.py │ ├── fastai_example.py │ ├── gensim_example.py │ ├── huggingface │ │ ├── __init__.py │ │ ├── distilbert.py │ │ ├── dpt.py │ │ ├── gpt2_pytorch.py │ │ ├── gpt2_tensorflow.py │ │ └── sam.py │ ├── keras_example.py │ ├── lightgbm_example.py │ ├── onnx_lightgbm_example.py │ ├── onnx_sklearn_example.py │ ├── prophet_example.py │ ├── pyspark_example.py │ ├── pytorch_example.py │ ├── pytorch_lightning_example.py │ ├── raw_file_example.py │ ├── shap_example.py │ ├── sklearn_example.py │ ├── sklearn_with_explainer_example.py │ ├── sklearn_with_extras_example.py │ ├── skorch_example.py │ ├── tensorflow_example.py │ ├── util │ │ ├── datasets.py │ │ └── domains.py │ ├── xgboost_booster_example.py │ ├── xgboost_example.py │ └── yolo_example.py │ ├── main.py │ ├── modelstores.py │ ├── requirements.txt │ └── run-all.sh ├── modelstore ├── __init__.py ├── __main__.py ├── ids │ ├── __init__.py │ └── model_ids.py ├── metadata │ ├── __init__.py │ ├── code │ │ ├── __init__.py │ │ ├── code.py │ │ ├── dependencies.py │ │ ├── revision.py │ │ └── runtime.py │ ├── dataset │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── features.py │ │ ├── labels.py │ │ └── types.py │ ├── metadata.py │ ├── model │ │ ├── __init__.py │ │ ├── model.py │ │ └── model_type.py │ ├── storage │ │ ├── __init__.py │ │ └── storage.py │ └── utils │ │ ├── __init__.py │ │ └── utils.py ├── model_store.py ├── models │ ├── CONTRIBUTING.md │ ├── __init__.py │ ├── annoy.py │ ├── catboost.py │ ├── causalml.py │ ├── common.py │ ├── fastai.py │ ├── gensim.py │ ├── lightgbm.py │ ├── managers.py │ ├── missing_manager.py │ ├── model_file.py │ ├── model_manager.py │ ├── multiple_models.py │ ├── onnx.py │ ├── prophet.py │ ├── pyspark.py │ ├── pytorch.py │ ├── pytorch_lightning.py │ ├── shap.py │ ├── sklearn.py │ ├── skorch.py │ ├── tensorflow.py │ ├── transformers.py │ ├── util.py │ └── xgboost.py ├── storage │ ├── CONTRIBUTING.md │ ├── __init__.py │ ├── aws.py │ ├── azure.py │ ├── blob_storage.py │ ├── gcloud.py │ ├── hdfs.py │ ├── local.py │ ├── minio.py │ ├── states │ │ ├── __init__.py │ │ └── model_states.py │ ├── storage.py │ └── util │ │ ├── __init__.py │ │ ├── environment.py │ │ ├── paths.py │ │ └── versions.py └── utils │ ├── __init__.py │ ├── cli.py │ ├── exceptions.py │ └── log.py ├── pytest.ini ├── requirements-dev0.txt ├── requirements-dev1.txt ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── ids │ ├── __init__.py │ └── test_model_ids.py ├── metadata │ ├── __init__.py │ ├── code │ │ ├── __init__.py │ │ ├── test_code.py │ │ ├── test_dependencies.py │ │ ├── test_revision.py │ │ └── test_runtime.py │ ├── dataset │ │ ├── __init__.py │ │ ├── fixtures.py │ │ ├── test_dataset.py │ │ ├── test_features.py │ │ ├── test_labels.py │ │ └── test_types.py │ ├── model │ │ ├── __init__.py │ │ ├── test_model.py │ │ └── test_model_type.py │ ├── storage │ │ ├── __init__.py │ │ └── test_storage.py │ ├── test_metadata.py │ └── utils │ │ ├── __init__.py │ │ └── test_utils.py ├── models │ ├── __init__.py │ ├── test_annoy.py │ ├── test_catboost.py │ ├── test_causalml.py │ ├── test_common.py │ ├── test_fastai.py │ ├── test_gensim.py │ ├── test_lightgbm.py │ ├── test_managers.py │ ├── test_missing_manager.py │ ├── test_model_file.py │ ├── test_model_manager.py │ ├── test_multiple_models.py │ ├── test_onnx.py │ ├── test_prophet.py │ ├── test_pyspark.py │ ├── test_pytorch.py │ ├── test_pytorch_lightning.py │ ├── test_shap.py │ ├── test_sklearn.py │ ├── test_skorch.py │ ├── test_tensorflow.py │ ├── test_transformers.py │ ├── test_xgboost.py │ └── utils.py ├── storage │ ├── __init__.py │ ├── states │ │ ├── __init__.py │ │ └── test_model_states.py │ ├── test_aws.py │ ├── test_azure.py │ ├── test_blob_storage.py │ ├── test_blob_storage_artifacts.py │ ├── test_blob_storage_meta_data.py │ ├── test_blob_storage_states.py │ ├── test_gcloud.py │ ├── test_hdfs.py │ ├── test_local.py │ ├── test_minio.py │ ├── test_utils.py │ └── util │ │ ├── __init__.py │ │ └── test_paths.py ├── test_model_store.py ├── test_model_store_filesystem.py └── test_utils.py └── workflows ├── Makefile ├── actions ├── __init__.py ├── actions.py ├── cli.py ├── models.py └── storage.py ├── fixtures ├── __init__.py ├── extra.py ├── models.py └── modelstores.py ├── main.py ├── requirements.txt └── requirements ├── aws-s3.txt ├── azure-container.txt ├── filesystem.txt ├── google-cloud-storage.txt └── minio.txt /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | 21 | jobs: 22 | analyze: 23 | name: Analyze 24 | runs-on: ubuntu-latest 25 | permissions: 26 | actions: read 27 | contents: read 28 | security-events: write 29 | 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | language: [ 'python' ] 34 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 35 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 36 | 37 | steps: 38 | - name: Checkout repository 39 | uses: actions/checkout@v3 40 | 41 | # Initializes the CodeQL tools for scanning. 42 | - name: Initialize CodeQL 43 | uses: github/codeql-action/init@v1 44 | with: 45 | languages: ${{ matrix.language }} 46 | # If you wish to specify custom queries, you can do so here or in a config file. 47 | # By default, queries listed here will override any specified in a config file. 48 | # Prefix the list here with "+" to use these queries and those in the config file. 49 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 50 | 51 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 52 | # If this step fails, then you should remove it and run the build manually (see below) 53 | - name: Autobuild 54 | uses: github/codeql-action/autobuild@v1 55 | 56 | # ℹ️ Command-line programs to run using the OS shell. 57 | # 📚 https://git.io/JvXDl 58 | 59 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 60 | # and modify them (or add more) to build your code if your project 61 | # uses a compiled language 62 | 63 | #- run: | 64 | # make bootstrap 65 | # make release 66 | 67 | - name: Perform CodeQL Analysis 68 | uses: github/codeql-action/analyze@v1 69 | -------------------------------------------------------------------------------- /.github/workflows/integration-tests.yml: -------------------------------------------------------------------------------- 1 | name: Integration tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | MODEL_STORE_AWS_BUCKET: ${{ secrets.MODEL_STORE_AWS_BUCKET }} 11 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 12 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 13 | MODEL_STORE_GCP_PROJECT: ${{ secrets.MODEL_STORE_GCP_PROJECT }} 14 | MODEL_STORE_GCP_BUCKET: ${{ secrets.MODEL_STORE_GCP_BUCKET }} 15 | MODEL_STORE_AZURE_CONTAINER: ${{ secrets.MODEL_STORE_AZURE_CONTAINER }} 16 | AZURE_ACCOUNT_NAME: ${{ secrets.AZURE_ACCOUNT_NAME }} 17 | AZURE_ACCESS_KEY: ${{ secrets.AZURE_ACCESS_KEY }} 18 | AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }} 19 | MODEL_STORE_ROOT_PREFIX: "/home/runner" 20 | MODEL_STORE_MINIO_BUCKET: ${{ secrets.MODEL_STORE_AWS_BUCKET }} 21 | MINIO_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY_ID }} 22 | MINIO_SECRET_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 23 | 24 | jobs: 25 | build: 26 | runs-on: ubuntu-latest 27 | permissions: 28 | id-token: write 29 | contents: read 30 | strategy: 31 | matrix: 32 | storage-type: ["filesystem", "aws-s3", "google-cloud-storage", "azure-container", "minio"] 33 | env: 34 | MODEL_STORE_STORAGE: ${{ matrix.storage-type }} 35 | steps: 36 | - name: 'Check out repo' 37 | uses: actions/checkout@v3 38 | - name: Set up Python 3.8 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: 3.8 42 | - name: Install dependencies 43 | run: | 44 | cd workflows/ 45 | make setup 46 | pip install -r requirements/${{ matrix.storage-type }}.txt 47 | pip install -e .. 48 | - name: 'Authenticate to Google Cloud' 49 | if: ${{ env.MODEL_STORE_STORAGE == 'google-cloud-storage' }} 50 | uses: 'google-github-actions/auth@v1' 51 | with: 52 | create_credentials_file: true 53 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_ID_PROVIDER }} 54 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} 55 | - name: Run integration script 56 | run: | 57 | cd workflows/ 58 | python main.py --modelstore-in ${{ matrix.storage-type }} 59 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: [3.8, 3.9] 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Free Disk Space (Ubuntu) 22 | uses: jlumbroso/free-disk-space@main 23 | with: 24 | # this might remove tools that are actually needed, 25 | # if set to "true" but frees about 6 GB 26 | tool-cache: false 27 | 28 | # all of these default to true, but feel free to set to 29 | # "false" if necessary for your workflow 30 | android: true 31 | dotnet: true 32 | haskell: true 33 | large-packages: true 34 | docker-images: true 35 | swap-storage: true 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install --upgrade pip setuptools wheel 39 | pip install -r requirements.txt 40 | pip install -r requirements-dev0.txt 41 | pip install -r requirements-dev1.txt 42 | pip install -e . 43 | - name: Test with pytest 44 | run: | 45 | pytest -x 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | .idea/ 4 | 5 | .python-version 6 | 7 | .vscode/ 8 | 9 | dist/ 10 | 11 | *build/ 12 | 13 | *egg-info/ 14 | 15 | *.tar.gz 16 | 17 | catboost_info/ 18 | 19 | *.pyc 20 | 21 | *operatorai-model-store* 22 | 23 | *.json 24 | 25 | *.joblib 26 | 27 | *.xgboost 28 | 29 | modelstore.egg-info 30 | 31 | *.pt 32 | 33 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | multi_line_output = 3 3 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | WORKDIR /usr/src/app 3 | 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | RUN apt-get update && \ 6 | apt-get install -y build-essential && \ 7 | apt-get install -y git ninja-build ccache libopenblas-dev libopencv-dev cmake && \ 8 | apt-get install -y gcc mono-mcs g++ && \ 9 | apt-get install -y python3 python3-pip && \ 10 | apt-get install -y default-jdk && \ 11 | apt-get install -y libhdf5-dev && \ 12 | rm -rf /var/lib/apt/lists/* 13 | 14 | RUN pip3 install --upgrade pip setuptools wheel 15 | 16 | # Install & install requirements 17 | COPY requirements-dev0.txt ./requirements-dev0.txt 18 | COPY requirements-dev1.txt ./requirements-dev1.txt 19 | COPY requirements.txt ./requirements.txt 20 | 21 | RUN pip3 install -r requirements-dev0.txt 22 | RUN pip3 install -r requirements-dev1.txt 23 | RUN pip3 install -r requirements.txt 24 | 25 | # Copy library source 26 | COPY modelstore ./modelstore 27 | COPY tests ./tests 28 | 29 | # Run tests 30 | ENTRYPOINT ["python3", "-m", "pytest", "--exitfirst", "./tests"] 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VIRTUALENV_NAME=$(shell pwd | rev | cut -d '/' -f 1 | rev)-dev 2 | 3 | .PHONY: uninstall 4 | uninstall: 5 | @./bin/_pyenv_uninstall $(VIRTUALENV_NAME) 6 | 7 | .PHONY: setup 8 | setup: 9 | @./bin/_brew_install 10 | 11 | .PHONY: install 12 | install: uninstall 13 | @./bin/_pyenv_install $(VIRTUALENV_NAME) 14 | 15 | .PHONY: update 16 | update: 17 | @./bin/_pyenv_update 18 | 19 | .PHONY: build 20 | build: 21 | @./bin/_build_library 22 | 23 | .PHONY: test 24 | test: 25 | @docker build . -t modelstore-dev 26 | @docker run -it --rm modelstore-dev 27 | 28 | .PHONY: release-test 29 | release-test: build 30 | @./bin/_release_test 31 | 32 | .PHONY: release-prod 33 | release-prod: 34 | @./bin/_release_prod 35 | 36 | .PHONY: cleanup 37 | clean: 38 | @./bin/_cleanup 39 | -------------------------------------------------------------------------------- /bin/_brew_install: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | function install { 5 | if brew ls --versions "$1" >/dev/null; then 6 | HOMEBREW_NO_AUTO_UPDATE=1 brew upgrade "$1" 7 | else 8 | HOMEBREW_NO_AUTO_UPDATE=1 brew install "$1" 9 | fi 10 | } 11 | 12 | echo -e "\n 💬 Running brew update..." 13 | brew update 14 | 15 | echo -e "\n 💬 Installing pyenv & pyenv-virtualenv..." 16 | install pyenv 17 | install pyenv-virtualenv 18 | 19 | # To get pystan to install correctly (required by prophet) 20 | # https://stackoverflow.com/questions/52814868/pystan-compileerror-command-gcc-failed-with-exit-status-1-macos 21 | echo -e "\n 💬 Installing gcc..." 22 | install gcc 23 | export CC=gcc-11 24 | export CXX=g++-11 25 | 26 | # To use xgboost models on mac 27 | # https://xgboost.readthedocs.io/en/latest/build.html#building-on-osx 28 | echo -e "\n 💬 Installing libomp..." 29 | install libomp 30 | 31 | # To use pyspark models on mac 32 | # To use hdfs storage on mac 33 | # echo -e "\n 💬 Installing java and hadoop..." 34 | # install java 35 | # install hadoop 36 | 37 | echo "\n ✅ Done." 38 | -------------------------------------------------------------------------------- /bin/_build_library: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | VIRTUALENV_NAME=$(pyenv local) 5 | 6 | echo "\n ⏱ Building library: $VIRTUALENV_NAME" 7 | 8 | rm -rf dist build modelstore.egg_info 9 | pip install --upgrade pip setuptools wheel 10 | 11 | python setup.py sdist bdist_wheel 12 | 13 | echo "\n ✅ Done: results are in the dist/ directory." 14 | -------------------------------------------------------------------------------- /bin/_cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo -e "\n 🧼 Removing pycache files" 3 | find . | grep -E "(__pycache__|\.pyc|\.pyo$)" | xargs rm -rf 4 | 5 | echo -e "\n 🧼 Removing build directories" 6 | rm -rf *.egg-info 7 | rm -rf build 8 | rm -rf dist 9 | 10 | echo -e "\n 🎉 Done." 11 | -------------------------------------------------------------------------------- /bin/_pyenv_config: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # export PYTHON_VERSION=3.7.15 3 | export PYTHON_VERSION=3.8.12 4 | # export PYTHON_VERSION=3.9.16 5 | 6 | export VIRTUALENV_NAME="$1-${PYTHON_VERSION//./-}" 7 | export REPO_ROOT=$(cd $(dirname $0)/.. && pwd) 8 | 9 | echo -e "\n 💬 Using a venv called: ${VIRTUALENV_NAME}" 10 | 11 | eval "$(pyenv init --path)" 12 | eval "$(pyenv virtualenv-init -)" 13 | -------------------------------------------------------------------------------- /bin/_pyenv_install: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo -e "\n 💬 Installing..." 5 | 6 | source $(dirname $0)/_pyenv_config "$@" 7 | 8 | if [[ $(pyenv versions | grep -L $PYTHON_VERSION) ]]; then 9 | echo -e "\n 💬 Installing Python $PYTHON_VERSION" 10 | pyenv install $PYTHON_VERSION 11 | fi 12 | 13 | echo -e "\n 💬 Creating a $PYTHON_VERSION environment: $VIRTUALENV_NAME" 14 | env PYTHON_CONFIGURE_OPTS="--enable-framework CC=clang" \ 15 | pyenv virtualenv \ 16 | --force $PYTHON_VERSION \ 17 | "$VIRTUALENV_NAME" 18 | 19 | echo -e "\n 💬 Setting local: $VIRTUALENV_NAME" 20 | pyenv local $VIRTUALENV_NAME 21 | 22 | echo -e "\n 💬 Upgrading pip" 23 | pip install --upgrade pip setuptools wheel 24 | 25 | for i in ./requirements*txt; do 26 | echo -e "\n\n 💬 Installing requirements in: $i" 27 | pip install -r $i 28 | done 29 | 30 | pip install -e $REPO_ROOT 31 | echo -e "\n ✅ Done." 32 | -------------------------------------------------------------------------------- /bin/_pyenv_uninstall: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo -e "\n 💬 Uninstalling..." 5 | 6 | source $(dirname $0)/_pyenv_config "$@" 7 | 8 | if [[ -f ".python-version" ]]; then 9 | # Keep pyenv-virtualenvs for other versions of Python 10 | if [ "${VIRTUALENV_NAME}" == "$(cat .python-version)" ] ;then 11 | echo -e "\n ⏱ Force removing: $VIRTUALENV_NAME" 12 | pyenv uninstall -f $VIRTUALENV_NAME 13 | fi 14 | rm .python-version 15 | echo -e "\n ✅ Done." 16 | else 17 | echo -e "\n ✅ Nothing to do." 18 | fi 19 | 20 | -------------------------------------------------------------------------------- /bin/_pyenv_update: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo -e "\n 💬 Updating..." 5 | 6 | pip install --upgrade pip setuptools wheel 7 | for i in ./requirements*txt; do 8 | echo -e "\n\n 💬 Updating requirements in: $i" 9 | pip install --upgrade -r $i 10 | done 11 | 12 | pip install -e $REPO_ROOT 13 | echo -e "\n ✅ Done." 14 | -------------------------------------------------------------------------------- /bin/_release_prod: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "\n ⏱ Uploading library to pypi..." 5 | 6 | pip install --upgrade twine 7 | 8 | twine check dist/* 9 | 10 | twine upload \ 11 | --username $TWINE_PROD_USERNAME \ 12 | --password $TWINE_PROD_PWD \ 13 | dist/* 14 | 15 | echo "\n 🎉 Done." 16 | -------------------------------------------------------------------------------- /bin/_release_test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "\n ⏱ Uploading library to testpypi..." 5 | 6 | pip install --upgrade twine 7 | 8 | twine check dist/* 9 | 10 | twine upload \ 11 | --username $TWINE_TEST_USERNAME \ 12 | --password $TWINE_TEST_PWD \ 13 | --repository testpypi dist/* 14 | 15 | echo "\n 🚢 Done." 16 | -------------------------------------------------------------------------------- /examples/Makefile-example: -------------------------------------------------------------------------------- 1 | VIRTUALENV_NAME=modelstore.$(shell pwd | rev | cut -d '/' -f 1 | rev) 2 | REPO_ROOT=$(shell cd ../../ && pwd) 3 | 4 | .PHONY: name pyenv pyenv-local pyenv-prod pyenv-test pyenv-uninstall refresh gcloud 5 | 6 | name: 7 | @echo $(VIRTUALENV_NAME) 8 | 9 | pyenv-uninstall: 10 | @$(REPO_ROOT)/bin/_pyenv_uninstall $(VIRTUALENV_NAME) 11 | 12 | gcloud: 13 | @gcloud components update 14 | @gcloud auth application-default login 15 | 16 | pyenv: pyenv-uninstall 17 | # @$(REPO_ROOT)/bin/_setup_brew 18 | @$(REPO_ROOT)/bin/_pyenv_install $(VIRTUALENV_NAME) 19 | pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt 20 | 21 | pyenv-local: pyenv 22 | pip uninstall -y modelstore 23 | pip install -e $(REPO_ROOT) 24 | 25 | pyenv-test: pyenv 26 | pip uninstall -y modelstore 27 | pip install --no-cache-dir -i https://test.pypi.org/simple/ modelstore 28 | 29 | pyenv-prod: pyenv 30 | pip uninstall -y modelstore 31 | pip install --no-cache-dir --upgrade modelstore 32 | 33 | refresh: 34 | @echo "\n 🔵 Refreshing installation of modelstore" 35 | pip install --upgrade pip setuptools wheel 36 | pip uninstall -y modelstore 37 | pip install --no-cache-dir -e $(REPO_ROOT) 38 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # modelstore examples 2 | 3 | This directory contains examples of training models and storing them into a model store over different types of storage. 4 | 5 | The Python script in `examples-by-ml-model` iterates over all of the supported ML frameworks and all of the supported storage types. For each pair, it trains a model, uploads it to storage, and then downloads/loads it back. 6 | 7 | The bash script `cli-examples` has exaples of how to run `python -m modelstore` commands. 8 | 9 | ## Pre-requisites 10 | 11 | As with the main library, these scripts have been developed using [pyenv](https://github.com/pyenv/pyenv) and [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv). 12 | 13 | ## Set up - examples by ML model 14 | 15 | Warning: the `examples-by-ml-model` virtual environment installs ALL of the machine learning frameworks that are supported by `modelstore`. In your own project, you will only need to install the machine learning frameworks that you need. 16 | 17 | Start by `cd`'ing into the directory containing the example you want to run: 18 | 19 | ```bash 20 | ❯ cd examples-by-ml-model/ 21 | ``` 22 | 23 | And then you can use this `Makefile` command that creates a new virtual environment 24 | and installs all of the requirements: 25 | 26 | ```bash 27 | ❯ make pyenv 28 | ``` 29 | 30 | ## Running all of the examples 31 | 32 | After creating a virtual environment, you can run all of the examples using: 33 | 34 | ```bash 35 | ❯ make run 36 | ``` 37 | 38 | This will run all of the examples - you can expect it to take some time! 39 | 40 | ## Running a specific example 41 | 42 | Start by `cd`'ing into the directory containing the example you want to run: 43 | 44 | ```bash 45 | ❯ cd examples-by-ml-model/ 46 | ``` 47 | 48 | After creating a virtual environment, you can run all of the examples using: 49 | 50 | ```bash 51 | ❯ python main.py --modelstore-in $backend --ml-framework $framework 52 | ``` 53 | -------------------------------------------------------------------------------- /examples/cli-examples/.gitignore: -------------------------------------------------------------------------------- 1 | model.joblib 2 | 3 | downloaded_model/ 4 | -------------------------------------------------------------------------------- /examples/cli-examples/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile-example 2 | 3 | .PHONY: run 4 | run: 5 | @./run-all.sh 6 | -------------------------------------------------------------------------------- /examples/cli-examples/model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import joblib 16 | import numpy as np 17 | from sklearn.datasets import load_diabetes 18 | from sklearn.ensemble import GradientBoostingRegressor 19 | from sklearn.metrics import mean_squared_error 20 | from sklearn.model_selection import train_test_split 21 | 22 | from modelstore.utils.cli import info 23 | 24 | 25 | def train_and_save(): 26 | diabetes = load_diabetes() 27 | X_train, X_test, y_train, y_test = train_test_split( 28 | diabetes.data, diabetes.target, test_size=0.1, random_state=13 29 | ) 30 | 31 | params = { 32 | "n_estimators": 500, 33 | "max_depth": 4, 34 | "min_samples_split": 5, 35 | "learning_rate": 0.01, 36 | "loss": "ls", 37 | } 38 | model = GradientBoostingRegressor(**params) 39 | model.fit(X_train, y_train) 40 | 41 | preds = model.predict(X_test) 42 | rmse = np.sqrt(mean_squared_error(y_test, preds)) 43 | info(f"📈 Trained a model with RMSE={rmse}.") 44 | 45 | file_name = "model.joblib" 46 | joblib.dump(model, file_name) 47 | info(f"✅ Model saved to file={file_name}.") 48 | 49 | 50 | if __name__ == "__main__": 51 | train_and_save() 52 | -------------------------------------------------------------------------------- /examples/cli-examples/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn 2 | -------------------------------------------------------------------------------- /examples/cli-examples/run-all.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | DOMAIN_NAME="cli-example" 4 | TARGET_DIR="downloaded_model" 5 | FILE_NAME="model.joblib" 6 | 7 | echo "\n🔵 Training a model...\n" 8 | python model.py 9 | 10 | echo "\n🔵 Uploading the model via the CLI...\n" 11 | MODEL_ID=$(python -m modelstore upload "$DOMAIN_NAME" "$FILE_NAME") 12 | 13 | echo "\n🔵 Downloading model=$MODEL_ID via the CLI...\n" 14 | mkdir -p "$TARGET_DIR" 15 | python -m modelstore download "$DOMAIN_NAME" "$MODEL_ID" "$TARGET_DIR" 16 | 17 | echo "\n✅ Done! Cleaning up..." 18 | 19 | rm -rf "$TARGET_DIR" 20 | rm -rf "$FILE_NAME" 21 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | multi_line_output = 3 3 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/Makefile: -------------------------------------------------------------------------------- 1 | include ../Makefile-example 2 | 3 | .PHONY: run 4 | run: 5 | @./run-all.sh 6 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/examples/examples-by-ml-library/libraries/__init__.py -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/annoy_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import random 15 | 16 | from annoy import AnnoyIndex 17 | 18 | from modelstore.model_store import ModelStore 19 | 20 | _NUM_DIMENSIONS = 40 21 | _NUM_TREES = 10 22 | _METRIC = "angular" 23 | _DOMAIN_NAME = "example-annoy-index" 24 | 25 | 26 | def _train_example_model() -> AnnoyIndex: 27 | # Create an index 28 | print("🤖 Creating an Annoy index...") 29 | model = AnnoyIndex(_NUM_DIMENSIONS, _METRIC) 30 | for i in range(1000): 31 | vector = [random.gauss(0, 1) for z in range(_NUM_DIMENSIONS)] 32 | model.add_item(i, vector) 33 | model.build(_NUM_TREES) 34 | 35 | # Show some nearest neighbours 36 | results = model.get_nns_by_item(0, 10) 37 | print(f"🔍 Nearest neighbors = {results}.") 38 | return model 39 | 40 | 41 | def train_and_upload(modelstore: ModelStore) -> dict: 42 | # Train an Annoy index 43 | model = _train_example_model() 44 | 45 | # Upload the model to the model store 46 | print(f'⤴️ Uploading the Annoy model to the "{_DOMAIN_NAME}" domain.') 47 | meta_data = modelstore.upload( 48 | _DOMAIN_NAME, 49 | model=model, 50 | num_dimensions=_NUM_DIMENSIONS, 51 | metric=_METRIC, 52 | num_trees=_NUM_TREES, 53 | ) 54 | return meta_data 55 | 56 | 57 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 58 | # Load the model back into memory! 59 | print(f'⤵️ Loading the Annoy "{model_domain}" domain model={model_id}') 60 | model = modelstore.load(model_domain, model_id) 61 | 62 | # Find some nearest neighbours 63 | results = model.get_nns_by_item(0, 10) 64 | print(f"🔍 Nearest neighbors = {results}.") 65 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/catboost_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import catboost as ctb 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.metrics import mean_squared_error 19 | 20 | from modelstore.model_store import ModelStore 21 | 22 | 23 | def _train_example_model() -> ctb.CatBoostRegressor: 24 | # Load the data 25 | X_train, X_test, y_train, y_test = load_regression_dataset() 26 | 27 | # Train the model 28 | print("🤖 Training a CatBoostRegressor") 29 | model = ctb.CatBoostRegressor(allow_writing_files=False) 30 | model.fit(X_train, y_train) 31 | 32 | results = mean_squared_error(y_test, model.predict(X_test)) 33 | print(f"🔍 Fit model MSE={results}.") 34 | return model 35 | 36 | 37 | def train_and_upload(modelstore: ModelStore) -> dict: 38 | # Train a Catboost model 39 | model = _train_example_model() 40 | 41 | # Upload the model to the model store 42 | print(f'⤴️ Uploading the catboost model to the "{DIABETES_DOMAIN}" domain.') 43 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 44 | return meta_data 45 | 46 | 47 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 48 | # Load the model back into memory! 49 | print(f'⤵️ Loading the catboost "{model_domain}" domain model={model_id}') 50 | model = modelstore.load(model_domain, model_id) 51 | 52 | # Run some example predictions 53 | _, X_test, _, y_test = load_regression_dataset() 54 | results = mean_squared_error(y_test, model.predict(X_test)) 55 | print(f"🔍 Loaded model MSE={results}.") 56 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/fastai_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from fastai.tabular.all import * 16 | from libraries.util.datasets import load_regression_dataframe 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | 19 | from modelstore.model_store import ModelStore 20 | 21 | 22 | def _train_example_model() -> TabularLearner: 23 | # Load the data 24 | df = load_regression_dataframe() 25 | 26 | # Train the model 27 | print(f"🤖 Training a fastai tabular learner...") 28 | dl = TabularDataLoaders.from_df(df, y_names=["y"]) 29 | learner = tabular_learner(dl) 30 | learner.fit_one_cycle(n_epoch=1) 31 | return learner 32 | 33 | 34 | def train_and_upload(modelstore: ModelStore) -> dict: 35 | # Train a fast.ai model 36 | learner = _train_example_model() 37 | 38 | # Upload the model to the model store 39 | print(f'⤴️ Uploading the fastai model to the "{DIABETES_DOMAIN}" domain.') 40 | meta_data = modelstore.upload(DIABETES_DOMAIN, learner=learner) 41 | return meta_data 42 | 43 | 44 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 45 | # Load the model back into memory! 46 | print(f'⤵️ Loading the fastai "{model_domain}" domain model={model_id}') 47 | model = modelstore.load(model_domain, model_id) 48 | # ... use for inference 49 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/gensim_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from gensim.models import word2vec 16 | from libraries.util.datasets import load_text_dataset 17 | from libraries.util.domains import NEWSGROUP_EMBEDDINGS_DOMAIN 18 | 19 | from modelstore.model_store import ModelStore 20 | 21 | 22 | def _train_example_model() -> word2vec.Word2Vec: 23 | # Load the data 24 | sentences = load_text_dataset() 25 | 26 | # Train a word2vec model 27 | print(f"🤖 Training a word2vec model...") 28 | model = word2vec.Word2Vec(sentences, min_count=2) 29 | 30 | most_similar = set([k[0] for k in model.wv.most_similar("cool", topn=5)]) 31 | print(f"🤖 Most similar to 'cool': {most_similar}") 32 | return model 33 | 34 | 35 | def train_and_upload(modelstore: ModelStore) -> dict: 36 | # Train a word2vec model 37 | model = _train_example_model() 38 | 39 | # Upload the model to the model store 40 | print( 41 | f"⤴️ Uploading the word2vec model to the {NEWSGROUP_EMBEDDINGS_DOMAIN} domain." 42 | ) 43 | meta_data = modelstore.upload(NEWSGROUP_EMBEDDINGS_DOMAIN, model=model) 44 | return meta_data 45 | 46 | 47 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 48 | # Load the model back into memory! 49 | print(f'⤵️ Loading the word2vec "{model_domain}" domain model={model_id}') 50 | model = modelstore.load(model_domain, model_id) 51 | 52 | # Find some nearest neighbours 53 | most_similar = set([k[0] for k in model.wv.most_similar("cool", topn=5)]) 54 | print(f"🤖 Most similar to 'cool': {most_similar}") 55 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/huggingface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/examples/examples-by-ml-library/libraries/huggingface/__init__.py -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/huggingface/distilbert.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from transformers import ( 16 | AutoConfig, 17 | AutoModelForSequenceClassification, 18 | AutoTokenizer 19 | ) 20 | 21 | from modelstore.model_store import ModelStore 22 | 23 | _DOMAIN_NAME = "example-distilbert-model" 24 | 25 | 26 | def _train_example_model(): 27 | model_name = "distilbert-base-cased" 28 | config = AutoConfig.from_pretrained( 29 | model_name, 30 | num_labels=2, 31 | finetuning_task="mnli", 32 | ) 33 | tokenizer = AutoTokenizer.from_pretrained(model_name) 34 | model = AutoModelForSequenceClassification.from_pretrained( 35 | model_name, 36 | config=config, 37 | ) 38 | 39 | # Skipped for brevity! 40 | # trainer = Trainer( 41 | # model=model, 42 | # args=training_args, 43 | # train_dataset=train_dataset, 44 | # eval_dataset=eval_dataset, 45 | # compute_metrics=build_compute_metrics_fn(data_args.task_name), 46 | # ) 47 | # trainer.train() 48 | return model, tokenizer, config 49 | 50 | 51 | def train_and_upload(modelstore: ModelStore) -> dict: 52 | # Train a model 53 | model, tokenizer, config = _train_example_model() 54 | 55 | # Upload the model to the model store 56 | print(f'⤴️ Uploading the transformers model to the "{_DOMAIN_NAME}" domain.') 57 | meta_data = modelstore.upload( 58 | _DOMAIN_NAME, 59 | config=config, 60 | model=model, 61 | tokenizer=tokenizer, 62 | ) 63 | return meta_data 64 | 65 | 66 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 67 | # Load the model back into memory! 68 | print(f'⤵️ Loading the transformers "{model_domain}" domain model={model_id}') 69 | model, tokenizer, config = modelstore.load(model_domain, model_id) 70 | 71 | # Run some example predictions 72 | # ... 73 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/huggingface/dpt.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Tuple 15 | 16 | from transformers import ( 17 | DPTForDepthEstimation, 18 | DPTImageProcessor, 19 | DPTPreTrainedModel 20 | ) 21 | 22 | from modelstore.model_store import ModelStore 23 | 24 | _DOMAIN_NAME = "example-dpt-model" 25 | 26 | 27 | def _load_dpt_model( 28 | source: str = "Intel/dpt-large", 29 | ) -> Tuple[DPTPreTrainedModel, DPTImageProcessor]: 30 | print(f"Loading a dpt anything model from:{source}.") 31 | processor = DPTImageProcessor.from_pretrained(source) 32 | model = DPTForDepthEstimation.from_pretrained(source) 33 | return model, processor 34 | 35 | 36 | def train_and_upload(modelstore: ModelStore) -> dict: 37 | # Train a model 38 | model, processor = _load_dpt_model() 39 | 40 | # Upload the model to the model store 41 | print(f'⤴️ Uploading the transformers model to the "{_DOMAIN_NAME}" domain.') 42 | meta_data = modelstore.upload( 43 | _DOMAIN_NAME, 44 | model=model, 45 | processor=processor, 46 | ) 47 | return meta_data 48 | 49 | 50 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 51 | # Load the model back into memory! 52 | print(f'⤵️ Loading the transformers "{model_domain}" domain model={model_id}') 53 | model, processor, config = modelstore.load(model_domain, model_id) 54 | 55 | print(f"Loaded model={type(model)}") 56 | print(f"Loaded processor={type(processor)}") 57 | print(f"Loaded config={type(config)}") 58 | # Run some example predictions 59 | # ... 60 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/huggingface/gpt2_pytorch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from transformers import GPT2LMHeadModel, GPT2Tokenizer 16 | 17 | from modelstore.model_store import ModelStore 18 | 19 | _DOMAIN_NAME = "example-gpt2-model" 20 | 21 | 22 | def _run_prediction(model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer): 23 | text = "What is MLOps, and why is it important?" 24 | encoded_input = tokenizer(text, return_tensors="pt") 25 | output = model.generate(**encoded_input) 26 | decoded = tokenizer.decode(output[0]) 27 | print(f"🔍 Model output={decoded}.") 28 | 29 | 30 | def _train_example_model(): 31 | # Returns a PyTorch model 32 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2") 33 | model = GPT2LMHeadModel.from_pretrained("gpt2") 34 | 35 | _run_prediction(model, tokenizer) 36 | return model, tokenizer 37 | 38 | 39 | def train_and_upload(modelstore: ModelStore) -> dict: 40 | # Train a model 41 | model, tokenizer = _train_example_model() 42 | 43 | # Upload the model to the model store 44 | print(f'⤴️ Uploading the transformers model to the "{_DOMAIN_NAME}" domain.') 45 | meta_data = modelstore.upload( 46 | _DOMAIN_NAME, 47 | model=model, 48 | tokenizer=tokenizer, 49 | ) 50 | return meta_data 51 | 52 | 53 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 54 | # Load the model back into memory! 55 | print(f'⤵️ Loading the transformers "{model_domain}" domain model={model_id}') 56 | model, tokenizer, _ = modelstore.load(model_domain, model_id) 57 | _run_prediction(model, tokenizer) 58 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/huggingface/gpt2_tensorflow.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from transformers import GPT2Tokenizer, TFGPT2LMHeadModel 16 | 17 | from modelstore.model_store import ModelStore 18 | 19 | _DOMAIN_NAME = "example-gpt2-model" 20 | 21 | 22 | def _run_prediction(model: TFGPT2LMHeadModel, tokenizer: GPT2Tokenizer): 23 | text = "What is MLOps, and why is it important?" 24 | encoded_input = tokenizer(text, return_tensors="tf") 25 | output = model.generate(**encoded_input) 26 | decoded = tokenizer.decode(output[0]) 27 | print(f"🔍 Model output={decoded}.") 28 | 29 | 30 | def _train_example_model(): 31 | # Returns a Tensorflow model 32 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2") 33 | model = TFGPT2LMHeadModel.from_pretrained("gpt2") 34 | 35 | _run_prediction(model, tokenizer) 36 | return model, tokenizer 37 | 38 | 39 | def train_and_upload(modelstore: ModelStore) -> dict: 40 | # Train a model 41 | model, tokenizer = _train_example_model() 42 | 43 | # Upload the model to the model store 44 | print(f'⤴️ Uploading the transformers model to the "{_DOMAIN_NAME}" domain.') 45 | meta_data = modelstore.upload( 46 | _DOMAIN_NAME, 47 | model=model, 48 | tokenizer=tokenizer, 49 | ) 50 | return meta_data 51 | 52 | 53 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 54 | # Load the model back into memory! 55 | print(f'⤵️ Loading the transformers "{model_domain}" domain model={model_id}') 56 | model, tokenizer, _ = modelstore.load(model_domain, model_id) 57 | _run_prediction(model, tokenizer) 58 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/huggingface/sam.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Tuple 15 | 16 | from transformers import SamModel, SamProcessor 17 | 18 | from modelstore.model_store import ModelStore 19 | 20 | _DOMAIN_NAME = "example-sam-model" 21 | 22 | 23 | def _load_sam_model( 24 | source: str = "facebook/sam-vit-base", 25 | ) -> Tuple[SamModel, SamProcessor]: 26 | print(f"Loading a segment anything model from:{source}.") 27 | model = SamModel.from_pretrained(source) 28 | processor = SamProcessor.from_pretrained(source) 29 | return model, processor 30 | 31 | 32 | def train_and_upload(modelstore: ModelStore) -> dict: 33 | # Train a model 34 | model, processor = _load_sam_model() 35 | 36 | # Upload the model to the model store 37 | print(f'⤴️ Uploading the transformers model to the "{_DOMAIN_NAME}" domain.') 38 | meta_data = modelstore.upload( 39 | _DOMAIN_NAME, 40 | model=model, 41 | processor=processor, 42 | ) 43 | return meta_data 44 | 45 | 46 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 47 | # Load the model back into memory! 48 | print(f'⤵️ Loading the transformers "{model_domain}" domain model={model_id}') 49 | model, processor, config = modelstore.load(model_domain, model_id) 50 | 51 | print(f"Loaded model={type(model)}") 52 | print(f"Loaded processor={type(processor)}") 53 | print(f"Loaded config={type(config)}") 54 | # Run some example predictions 55 | # ... 56 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/keras_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from libraries.util.datasets import load_regression_dataset 16 | from libraries.util.domains import DIABETES_DOMAIN 17 | from sklearn.metrics import mean_squared_error 18 | from tensorflow import keras 19 | 20 | from modelstore.model_store import ModelStore 21 | 22 | 23 | def _train_example_model() -> keras.Model: 24 | # Load the data 25 | X_train, X_test, y_train, y_test = load_regression_dataset() 26 | 27 | # Train a model 28 | print(f"🤖 Training a keras model...") 29 | inputs = keras.Input(shape=(10,)) 30 | outputs = keras.layers.Dense(1)(inputs) 31 | model = keras.Model(inputs, outputs) 32 | model.compile(optimizer="adam", loss="mean_squared_error") 33 | model.fit(X_train, y_train, epochs=10) 34 | 35 | results = mean_squared_error(y_test, model.predict(X_test)) 36 | print(f"🔍 Trained model MSE={results}.") 37 | return model 38 | 39 | 40 | def train_and_upload(modelstore: ModelStore) -> dict: 41 | # Train a word2vec model 42 | model = _train_example_model() 43 | 44 | # Upload the model to the model store 45 | print(f'⤴️ Uploading the keras model to the "{DIABETES_DOMAIN}" domain.') 46 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 47 | return meta_data 48 | 49 | 50 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 51 | # Load the model back into memory! 52 | print(f'⤵️ Loading the keras "{model_domain}" domain model={model_id}') 53 | model = modelstore.load(model_domain, model_id) 54 | 55 | # Run some test predictions 56 | _, X_test, _, y_test = load_regression_dataset() 57 | results = mean_squared_error(y_test, model.predict(X_test)) 58 | print(f"🔍 Loaded model MSE={results}.") 59 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/lightgbm_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import lightgbm as lgb 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.metrics import mean_squared_error 19 | 20 | from modelstore.model_store import ModelStore 21 | 22 | 23 | def _train_example_model() -> lgb.Booster: 24 | # Load the data 25 | X_train, X_test, y_train, y_test = load_regression_dataset() 26 | 27 | # Train the model 28 | print("🤖 Training a light GBM model...") 29 | train_data = lgb.Dataset(X_train, label=y_train) 30 | validation_data = lgb.Dataset(X_test, y_test) 31 | num_round = 5 32 | param = {"num_leaves": 31, "objective": "binary"} 33 | model = lgb.train(param, train_data, num_round, valid_sets=[validation_data]) 34 | 35 | results = mean_squared_error(y_test, model.predict(X_test)) 36 | print(f"🔍 Trained model MSE={results}.") 37 | return model 38 | 39 | 40 | def train_and_upload(modelstore: ModelStore) -> dict: 41 | # Train a Light GBM model 42 | model = _train_example_model() 43 | 44 | # Upload the model to the model store 45 | print(f'⤴️ Uploading the light gbm model to the "{DIABETES_DOMAIN}" domain.') 46 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 47 | return meta_data 48 | 49 | 50 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 51 | # Load the model back into memory! 52 | print(f'⤵️ Loading the light gbm "{model_domain}" domain model={model_id}') 53 | model = modelstore.load(model_domain, model_id) 54 | 55 | # Run some example predictions 56 | _, X_test, _, y_test = load_regression_dataset() 57 | results = mean_squared_error(y_test, model.predict(X_test)) 58 | print(f"🔍 Loaded model MSE={results}.") 59 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/onnx_sklearn_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numpy as np 16 | import onnx 17 | from libraries.util.datasets import load_regression_dataset 18 | from libraries.util.domains import DIABETES_DOMAIN 19 | from onnxruntime import InferenceSession 20 | from skl2onnx import to_onnx 21 | from sklearn.ensemble import RandomForestRegressor 22 | from sklearn.metrics import mean_squared_error 23 | 24 | from modelstore.model_store import ModelStore 25 | 26 | 27 | def _train_example_model() -> onnx.ModelProto: 28 | X_train, X_test, y_train, y_test = load_regression_dataset() 29 | 30 | print(f"🔍 Training a random forest regressor") 31 | clf = RandomForestRegressor(random_state=12) 32 | clf.fit(X_train, y_train) 33 | 34 | print(f"🔍 Converting the model to onnx") 35 | model = to_onnx(clf, X_train[:1].astype(np.float32), target_opset=12) 36 | 37 | print(f"🔍 Loading the onnx model as an inference session") 38 | sess = InferenceSession(model.SerializeToString()) 39 | y_pred = sess.run(None, {"X": X_test.astype(np.float32)})[0] 40 | 41 | results = mean_squared_error(y_test, y_pred) 42 | print(f"🔍 Trained model MSE={results}.") 43 | return model 44 | 45 | 46 | def train_and_upload(modelstore: ModelStore) -> dict: 47 | # Train a scikit-learn model 48 | model = _train_example_model() 49 | 50 | # Upload the model to the model store 51 | print(f'⤴️ Uploading the onnx model to the "{DIABETES_DOMAIN}" domain.') 52 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 53 | return meta_data 54 | 55 | 56 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 57 | # Load the model back into memory! 58 | print(f'⤵️ Loading the onnx "{model_domain}" domain model={model_id}') 59 | sess = modelstore.load(model_domain, model_id) 60 | 61 | # Run some example predictions 62 | _, X_test, _, y_test = load_regression_dataset() 63 | y_pred = sess.run(None, {"X": X_test.astype(np.float32)})[0] 64 | results = mean_squared_error(y_test, y_pred) 65 | print(f"🔍 Loaded model MSE={results}.") 66 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/prophet_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import random 16 | from datetime import datetime, timedelta 17 | 18 | import pandas as pd 19 | from prophet import Prophet 20 | 21 | from modelstore.model_store import ModelStore 22 | 23 | _DOMAIN_NAME = "example-prophet-forecast" 24 | 25 | 26 | def _train_example_model() -> Prophet: 27 | print("🤖 Creating fake time series data...") 28 | now = datetime.now() 29 | rows = [] 30 | for i in range(100): 31 | rows.append({"ds": now + timedelta(days=i), "y": random.gauss(0, 1)}) 32 | df = pd.DataFrame(rows) 33 | 34 | model = Prophet() 35 | model.fit(df) 36 | 37 | # Show some predictions 38 | future = model.make_future_dataframe(periods=5) 39 | print(f"🔍 Predictions = {future.tail().to_dict(orient='records')}.") 40 | return model 41 | 42 | 43 | def train_and_upload(modelstore: ModelStore) -> dict: 44 | # Train an Annoy index 45 | model = _train_example_model() 46 | 47 | # Upload the model to the model store 48 | print(f'⤴️ Uploading the Prophet model to the "{_DOMAIN_NAME}" domain.') 49 | meta_data = modelstore.upload( 50 | _DOMAIN_NAME, 51 | model=model, 52 | ) 53 | return meta_data 54 | 55 | 56 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 57 | # Load the model back into memory! 58 | print(f'⤵️ Loading the Prophet "{model_domain}" domain model={model_id}') 59 | model = modelstore.load(model_domain, model_id) 60 | 61 | # Show some predictions 62 | future = model.make_future_dataframe(periods=5) 63 | print(f"🔍 Predictions = {future.tail().to_dict(orient='records')}.") 64 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/pyspark_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from libraries.util.datasets import load_regression_dataframe 16 | from libraries.util.domains import DIABETES_DOMAIN 17 | from pyspark.ml import Pipeline, PipelineModel 18 | from pyspark.ml.feature import VectorAssembler 19 | from pyspark.ml.regression import RandomForestRegressor 20 | from pyspark.sql import SparkSession, SQLContext 21 | from sklearn.metrics import mean_squared_error 22 | 23 | from modelstore.model_store import ModelStore 24 | 25 | 26 | def _spark_dataset(sqlContext): 27 | df = load_regression_dataframe() 28 | features = [c for c in df.columns if c != "y"] 29 | spark_df = sqlContext.createDataFrame(df) 30 | assembler = VectorAssembler(inputCols=features, outputCol="x") 31 | return assembler.transform(spark_df).drop(*features) 32 | 33 | 34 | def _train_example_model() -> PipelineModel: 35 | sc = SparkSession.builder.getOrCreate() 36 | sqlContext = SQLContext(sc) 37 | 38 | # Load the data into Spark 39 | spark_df = _spark_dataset(sqlContext) 40 | 41 | # Train a pipeline 42 | rf = RandomForestRegressor(labelCol="y", featuresCol="x", numTrees=5) 43 | pipeline = Pipeline(stages=[rf]) 44 | model = pipeline.fit(spark_df) 45 | 46 | predictions = model.transform(spark_df).toPandas() 47 | y_pred = predictions["prediction"] 48 | y_test = predictions["y"] 49 | results = mean_squared_error(y_test, y_pred) 50 | print(f"🔍 Trained model MSE={results}.") 51 | return model 52 | 53 | 54 | def train_and_upload(modelstore: ModelStore) -> dict: 55 | # Train a model 56 | model = _train_example_model() 57 | 58 | # Upload the model to the model store 59 | print(f'⤴️ Uploading the pyspark model to the "{DIABETES_DOMAIN}" domain.') 60 | return modelstore.upload(DIABETES_DOMAIN, model=model) 61 | 62 | 63 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 64 | # Create a context 65 | sc = SparkSession.builder.getOrCreate() 66 | sqlContext = SQLContext(sc) 67 | 68 | # Load the model back into memory! 69 | print(f'⤵️ Loading the pyspark "{model_domain}" domain model={model_id}') 70 | model = modelstore.load(model_domain, model_id) 71 | 72 | # Load the data into Spark 73 | spark_df = _spark_dataset(sqlContext) 74 | 75 | # Run some example predictions 76 | predictions = model.transform(spark_df).toPandas() 77 | y_pred = predictions["prediction"] 78 | y_test = predictions["y"] 79 | results = mean_squared_error(y_test, y_pred) 80 | print(f"🔍 Loaded model MSE={results}.") 81 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/pytorch_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.metrics import mean_squared_error 19 | from torch import nn 20 | 21 | from modelstore.model_store import ModelStore 22 | 23 | 24 | # pylint: disable=missing-class-docstring 25 | class ExampleNet(nn.Module): 26 | def __init__(self): 27 | super(ExampleNet, self).__init__() 28 | self.linear = nn.Linear(10, 1) 29 | 30 | def forward(self, x): 31 | return self.linear(x) 32 | 33 | 34 | def _train_example_model() -> ExampleNet: 35 | # Load the data 36 | X_train, X_test, y_train, y_test = load_regression_dataset(as_numpy=True) 37 | 38 | # Train the model 39 | model = ExampleNet() 40 | criterion = torch.nn.MSELoss() 41 | optimizer = torch.optim.Adam(model.parameters()) 42 | 43 | for epoch in range(5): 44 | print(f"🤖 Training epoch: {epoch}...") 45 | optimizer.zero_grad() 46 | outputs = model(X_train) 47 | loss = criterion(outputs, y_train) 48 | loss.backward() 49 | optimizer.step() 50 | 51 | results = mean_squared_error(y_test, model(X_test).detach().numpy()) 52 | print(f"🔍 Fit model MSE={results}.") 53 | return model, optimizer 54 | 55 | 56 | def train_and_upload(modelstore: ModelStore) -> dict: 57 | # Train a PyTorch model 58 | model, optimizer = _train_example_model() 59 | 60 | # Upload the model to the model store 61 | print(f'⤴️ Uploading the pytorch model to the "{DIABETES_DOMAIN}" domain.') 62 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model, optimizer=optimizer) 63 | return meta_data 64 | 65 | 66 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 67 | # Load the model back into memory! 68 | print(f'⤵️ Loading the pytorch "{model_domain}" domain model={model_id}') 69 | model = modelstore.load(model_domain, model_id) 70 | model.eval() 71 | 72 | _, X_test, _, y_test = load_regression_dataset(as_numpy=True) 73 | results = mean_squared_error(y_test, model(X_test).detach().numpy()) 74 | print(f"🔍 Loaded model MSE={results}.") 75 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/raw_file_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import json 16 | import os 17 | import tempfile 18 | 19 | from modelstore.model_store import ModelStore 20 | 21 | _DOMAIN_NAME = "example-model-file" 22 | 23 | 24 | def _train_and_save_example_model(tmp_dir: str) -> str: 25 | # Create a file with a "model" -- in this case it is a json file, 26 | # but modelstore can handle any file type 27 | model_path = os.path.join(tmp_dir, "model.json") 28 | with open(model_path, "w") as out: 29 | out.write(json.dumps({"weights": [0.1, 0.2, 0.3]})) 30 | return model_path 31 | 32 | 33 | def train_and_upload(modelstore: ModelStore) -> dict: 34 | # Train a "model" and save it into a temp directory 35 | with tempfile.TemporaryDirectory() as tmp_dir: 36 | model_path = _train_and_save_example_model(tmp_dir) 37 | 38 | # Upload the model to the model store 39 | print(f'⤴️ Uploading the saved model to the "{_DOMAIN_NAME}" domain.') 40 | meta_data = modelstore.upload(_DOMAIN_NAME, model=model_path) 41 | return meta_data 42 | 43 | 44 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 45 | # Loading the model back into memory is not supported 46 | # for models that have been saved to disk manually 47 | pass 48 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/shap_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import shap 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.ensemble import GradientBoostingRegressor 19 | from sklearn.metrics import mean_squared_error 20 | from sklearn.pipeline import Pipeline 21 | 22 | from modelstore.model_store import ModelStore 23 | 24 | EXPLAINER_DOMAIN = f"{DIABETES_DOMAIN}-explainer" 25 | 26 | 27 | def _train_example_model() -> Pipeline: 28 | X_train, X_test, y_train, y_test = load_regression_dataset() 29 | 30 | # Train a model using an sklearn pipeline 31 | params = { 32 | "n_estimators": 250, 33 | "max_depth": 4, 34 | "min_samples_split": 5, 35 | "learning_rate": 0.01, 36 | "loss": "ls", 37 | } 38 | model = GradientBoostingRegressor(**params) 39 | model.fit(X_train, y_train) 40 | results = mean_squared_error(y_test, model.predict(X_test)) 41 | print(f"🔍 Trained model MSE={results}.") 42 | 43 | explainer = shap.TreeExplainer(model) 44 | 45 | # Example only 46 | shap_values = explainer.shap_values(X_test)[0] 47 | print(f"🔍 Shap values={shap_values[:10]}.") 48 | 49 | return explainer 50 | 51 | 52 | def train_and_upload(modelstore: ModelStore) -> dict: 53 | # Train a model and return the explainer 54 | explainer = _train_example_model() 55 | 56 | # Upload the explainer to the model store 57 | print(f'⤴️ Uploading the explainer to the "{EXPLAINER_DOMAIN}" domain.') 58 | meta_data = modelstore.upload(EXPLAINER_DOMAIN, explainer=explainer) 59 | return meta_data 60 | 61 | 62 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 63 | # Load the explainer back into memory! 64 | print(f'⤵️ Loading the explainer "{model_domain}" domain model={model_id}') 65 | explainer = modelstore.load(model_domain, model_id) 66 | 67 | # Run some example predictions 68 | _, X_test, _, _ = load_regression_dataset() 69 | shap_values = explainer.shap_values(X_test)[0] 70 | print(f"🔍 Shap values={shap_values[:10]}.") 71 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/sklearn_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from libraries.util.datasets import load_regression_dataset 16 | from libraries.util.domains import DIABETES_DOMAIN 17 | from sklearn.ensemble import GradientBoostingRegressor 18 | from sklearn.metrics import mean_squared_error 19 | from sklearn.pipeline import Pipeline 20 | from sklearn.preprocessing import StandardScaler 21 | 22 | from modelstore.model_store import ModelStore 23 | 24 | 25 | def _train_example_model() -> Pipeline: 26 | X_train, X_test, y_train, y_test = load_regression_dataset() 27 | 28 | # Train a model using an sklearn pipeline 29 | params = { 30 | "n_estimators": 250, 31 | "max_depth": 4, 32 | "min_samples_split": 5, 33 | "learning_rate": 0.01, 34 | "loss": "squared_error", 35 | } 36 | pipeline = Pipeline( 37 | [ 38 | ("scaler", StandardScaler()), 39 | ("regressor", GradientBoostingRegressor(**params)), 40 | ] 41 | ) 42 | pipeline.fit(X_train, y_train) 43 | results = mean_squared_error(y_test, pipeline.predict(X_test)) 44 | print(f"🔍 Trained model MSE={results}.") 45 | return pipeline 46 | 47 | 48 | def train_and_upload(modelstore: ModelStore) -> dict: 49 | # Train a scikit-learn model 50 | model = _train_example_model() 51 | 52 | # Upload the model to the model store 53 | print(f'⤴️ Uploading the sklearn model to the "{DIABETES_DOMAIN}" domain.') 54 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 55 | return meta_data 56 | 57 | 58 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 59 | # Load the model back into memory! 60 | print(f'⤵️ Loading the sklearn "{model_domain}" domain model={model_id}') 61 | model = modelstore.load(model_domain, model_id) 62 | 63 | # Run some example predictions 64 | _, X_test, _, y_test = load_regression_dataset() 65 | results = mean_squared_error(y_test, model.predict(X_test)) 66 | print(f"🔍 Loaded model MSE={results}.") 67 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/sklearn_with_explainer_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import shap 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.ensemble import GradientBoostingRegressor 19 | from sklearn.metrics import mean_squared_error 20 | from sklearn.pipeline import Pipeline 21 | 22 | from modelstore.model_store import ModelStore 23 | 24 | 25 | def _train_example_model() -> Pipeline: 26 | X_train, X_test, y_train, y_test = load_regression_dataset() 27 | 28 | # Train a model using an sklearn pipeline 29 | params = { 30 | "n_estimators": 250, 31 | "max_depth": 4, 32 | "min_samples_split": 5, 33 | "learning_rate": 0.01, 34 | "loss": "squared_error", 35 | } 36 | model = GradientBoostingRegressor(**params) 37 | model.fit(X_train, y_train) 38 | results = mean_squared_error(y_test, model.predict(X_test)) 39 | print(f"🔍 Trained model MSE={results}.") 40 | 41 | explainer = shap.TreeExplainer(model) 42 | 43 | # Example only 44 | shap_values = explainer.shap_values(X_test)[0] 45 | print(f"🔍 Shap values={shap_values[:10]}.") 46 | 47 | return model, explainer 48 | 49 | 50 | def train_and_upload(modelstore: ModelStore) -> dict: 51 | # Train a scikit-learn model and an explainer 52 | model, explainer = _train_example_model() 53 | 54 | # Upload the model to the model store 55 | print(f'⤴️ Uploading the sklearn model to the "{DIABETES_DOMAIN}" domain.') 56 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model, explainer=explainer) 57 | return meta_data 58 | 59 | 60 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 61 | # Load the model back into memory! 62 | print(f'⤵️ Loading sklearn/shap modelsL domain="{model_domain}" model={model_id}') 63 | models = modelstore.load(model_domain, model_id) 64 | clf = models["sklearn"] 65 | shp = models["shap"] 66 | 67 | # Run some example predictions 68 | _, X_test, _, y_test = load_regression_dataset() 69 | results = mean_squared_error(y_test, clf.predict(X_test)) 70 | print(f"🔍 Loaded model MSE={results}.") 71 | 72 | # Run some example explanations 73 | shap_values = shp.shap_values(X_test)[0] 74 | print(f"🔍 Shap values={shap_values[:10]}.") 75 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/sklearn_with_extras_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import tempfile 17 | from typing import Tuple 18 | 19 | import numpy 20 | from libraries.util.datasets import load_regression_dataset 21 | from libraries.util.domains import DIABETES_DOMAIN 22 | from sklearn.ensemble import GradientBoostingRegressor 23 | from sklearn.metrics import mean_squared_error 24 | from sklearn.pipeline import Pipeline 25 | 26 | from modelstore.model_store import ModelStore 27 | 28 | 29 | def _train_example_model(tmp_dir: str) -> Tuple[Pipeline, str]: 30 | X_train, X_test, y_train, y_test = load_regression_dataset() 31 | 32 | # Train a model using an sklearn pipeline 33 | params = { 34 | "n_estimators": 250, 35 | "max_depth": 4, 36 | "min_samples_split": 5, 37 | "learning_rate": 0.01, 38 | "loss": "squared_error", 39 | } 40 | model = GradientBoostingRegressor(**params) 41 | model.fit(X_train, y_train) 42 | 43 | predictions = model.predict(X_test) 44 | results = mean_squared_error(y_test, model.predict(X_test)) 45 | print(f"🔍 Trained model MSE={results}.") 46 | 47 | file_path = os.path.join(tmp_dir, "predictions.csv") 48 | numpy.savetxt(file_path, predictions, delimiter=",") 49 | 50 | return model, file_path 51 | 52 | 53 | def train_and_upload(modelstore: ModelStore) -> dict: 54 | # Train a scikit-learn model and create an extra file (with predictions) 55 | # in a temporary directory 56 | with tempfile.TemporaryDirectory() as tmp_dir: 57 | model, file_path = _train_example_model(tmp_dir) 58 | 59 | # Upload the model to the model store, with an extra file 60 | print(f'⤴️ Uploading the sklearn model to the "{DIABETES_DOMAIN}" domain.') 61 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model, extras=file_path) 62 | return meta_data 63 | 64 | 65 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 66 | # Load the model back into memory! 67 | print(f'⤵️ Loading sklearn/shap modelsL domain="{model_domain}" model={model_id}') 68 | clf = modelstore.load(model_domain, model_id) 69 | 70 | # Run some example predictions 71 | _, X_test, _, y_test = load_regression_dataset() 72 | results = mean_squared_error(y_test, clf.predict(X_test)) 73 | print(f"🔍 Loaded model MSE={results}.") 74 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/skorch_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from libraries.util.datasets import load_regression_dataset 16 | from libraries.util.domains import DIABETES_DOMAIN 17 | from sklearn.metrics import mean_squared_error 18 | from skorch.regressor import NeuralNetRegressor 19 | from torch import nn 20 | 21 | from modelstore.model_store import ModelStore 22 | 23 | 24 | class ExampleModule(nn.Module): 25 | def __init__(self, num_units=1): 26 | super(ExampleModule, self).__init__() 27 | self.linear = nn.Linear(10, num_units) 28 | 29 | def forward(self, X, **kwargs): 30 | return self.linear(X) 31 | 32 | 33 | def _train_example_model() -> NeuralNetRegressor: 34 | # Load the data 35 | X_train, X_test, y_train, y_test = load_regression_dataset(as_numpy=True) 36 | 37 | # Train a model 38 | net = NeuralNetRegressor( 39 | ExampleModule, 40 | max_epochs=1, 41 | lr=0.1, 42 | # Shuffle training data on each epoch 43 | iterator_train__shuffle=True, 44 | ) 45 | net.fit(X_train, y_train) 46 | 47 | results = mean_squared_error(y_test, net.predict(X_test)) 48 | print(f"🔍 Trained model MSE={results}.") 49 | return net 50 | 51 | 52 | def train_and_upload(modelstore: ModelStore) -> dict: 53 | # Train a skorch model 54 | model = _train_example_model() 55 | 56 | # Upload the model to the model store 57 | print(f'⤴️ Uploading the skorch model to the "{DIABETES_DOMAIN}" domain.') 58 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 59 | return meta_data 60 | 61 | 62 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 63 | # Load the model back into memory! 64 | print(f'⤵️ Loading the skorch "{model_domain}" domain model={model_id}') 65 | model = modelstore.load(model_domain, model_id) 66 | 67 | # Run some example predictions 68 | _, X_test, _, y_test = load_regression_dataset(as_numpy=True) 69 | results = mean_squared_error(y_test, model.predict(X_test)) 70 | print(f"🔍 Loaded model MSE={results}.") 71 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/tensorflow_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import tensorflow as tf 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.metrics import mean_squared_error 19 | 20 | from modelstore.model_store import ModelStore 21 | 22 | 23 | def _train_example_model() -> tf.keras.models.Sequential: 24 | # Load the data 25 | X_train, X_test, y_train, y_test = load_regression_dataset() 26 | 27 | # Train a model 28 | model = tf.keras.models.Sequential( 29 | [ 30 | tf.keras.layers.Dense(5, activation="relu", input_shape=(10,)), 31 | tf.keras.layers.Dropout(0.2), 32 | tf.keras.layers.Dense(1), 33 | ] 34 | ) 35 | model.compile(optimizer="adam", loss="mean_squared_error") 36 | model.fit(X_train, y_train, epochs=10) 37 | 38 | results = mean_squared_error(y_test, model.predict(X_test)) 39 | print(f"🔍 Trained model MSE={results}.") 40 | return model 41 | 42 | 43 | def train_and_upload(modelstore: ModelStore) -> dict: 44 | # Train a model 45 | model = _train_example_model() 46 | 47 | # Upload the model to the model store 48 | print(f'⤴️ Uploading the tensorflow model to the "{DIABETES_DOMAIN}" domain.') 49 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 50 | return meta_data 51 | 52 | 53 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 54 | # Load the model back into memory! 55 | print(f'⤵️ Loading the tensorflow "{model_domain}" domain model={model_id}') 56 | model = modelstore.load(model_domain, model_id) 57 | 58 | # Run some test predictions 59 | _, X_test, _, y_test = load_regression_dataset() 60 | results = mean_squared_error(y_test, model.predict(X_test)) 61 | print(f"🔍 Loaded model MSE={results}.") 62 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/util/domains.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | DIABETES_DOMAIN = "diabetes-boosting-demo" 16 | NEWSGROUP_EMBEDDINGS_DOMAIN = "newsgroups-embeddings" 17 | BREAST_CANCER_DOMAIN = "breast-cancer-demo" 18 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/xgboost_booster_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import xgboost as xgb 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.metrics import mean_squared_error 19 | 20 | from modelstore.model_store import ModelStore 21 | 22 | 23 | def _train_example_model() -> xgb.Booster: 24 | # Load the data 25 | X_train, X_test, y_train, y_test = load_regression_dataset() 26 | 27 | # Train a model 28 | model = xgb.XGBRegressor( 29 | objective="reg:squarederror", 30 | colsample_bytree=0.3, 31 | learning_rate=0.1, 32 | max_depth=5, 33 | alpha=10, 34 | n_estimators=10, 35 | ) 36 | model.fit(X_train, y_train) 37 | booster = model.get_booster() 38 | 39 | results = mean_squared_error(y_test, booster.predict(xgb.DMatrix(X_test))) 40 | print(f"🔍 Trained model MSE={results}.") 41 | return booster 42 | 43 | 44 | def train_and_upload(modelstore: ModelStore) -> dict: 45 | # Train a model 46 | model = _train_example_model() 47 | 48 | # Upload the model to the model store 49 | print(f'⤴️ Uploading the xgboost booster to the "{DIABETES_DOMAIN}" domain.') 50 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 51 | return meta_data 52 | 53 | 54 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 55 | # Load the model back into memory! 56 | print(f'⤵️ Loading the xgboost booster "{model_domain}" domain model={model_id}') 57 | booster = modelstore.load(model_domain, model_id) 58 | 59 | # Run some example predictions 60 | _, X_test, _, y_test = load_regression_dataset() 61 | results = mean_squared_error(y_test, booster.predict(xgb.DMatrix(X_test))) 62 | print(f"🔍 Loaded model MSE={results}.") 63 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/xgboost_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import xgboost as xgb 16 | from libraries.util.datasets import load_regression_dataset 17 | from libraries.util.domains import DIABETES_DOMAIN 18 | from sklearn.metrics import mean_squared_error 19 | 20 | from modelstore.model_store import ModelStore 21 | 22 | 23 | def _train_example_model() -> xgb.XGBRegressor: 24 | # Load the data 25 | X_train, X_test, y_train, y_test = load_regression_dataset() 26 | 27 | # Train a model 28 | model = xgb.XGBRegressor( 29 | objective="reg:squarederror", 30 | colsample_bytree=0.3, 31 | learning_rate=0.1, 32 | max_depth=5, 33 | alpha=10, 34 | n_estimators=10, 35 | ) 36 | model.fit(X_train, y_train) 37 | 38 | results = mean_squared_error(y_test, model.predict(X_test)) 39 | print(f"🔍 Trained model MSE={results}.") 40 | return model 41 | 42 | 43 | def train_and_upload(modelstore: ModelStore) -> dict: 44 | # Train a model 45 | model = _train_example_model() 46 | 47 | # Upload the model to the model store 48 | print(f'⤴️ Uploading the xgboost model to the "{DIABETES_DOMAIN}" domain.') 49 | meta_data = modelstore.upload(DIABETES_DOMAIN, model=model) 50 | return meta_data 51 | 52 | 53 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 54 | # Load the model back into memory! 55 | print(f'⤵️ Loading the xgboost "{model_domain}" domain model={model_id}') 56 | model = modelstore.load(model_domain, model_id) 57 | 58 | # Run some example predictions 59 | _, X_test, _, y_test = load_regression_dataset() 60 | results = mean_squared_error(y_test, model.predict(X_test)) 61 | print(f"🔍 Loaded model MSE={results}.") 62 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/libraries/yolo_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | 17 | from modelstore.model_store import ModelStore 18 | 19 | _YOLO_DOMAIN = "yolov5" 20 | 21 | 22 | def _predict(model): 23 | model.eval() 24 | img = "https://ultralytics.com/images/zidane.jpg" 25 | results = model(img) 26 | print(f"🔍 Prediction result: \n{results.pandas().xyxy[0]}.") 27 | 28 | 29 | def train_and_upload(modelstore: ModelStore) -> dict: 30 | # Load the yolov5 model 31 | model = torch.hub.load("ultralytics/yolov5", "yolov5s") 32 | _predict(model) 33 | 34 | # Upload the model to the model store 35 | print(f'⤴️ Uploading the yolo model to the "{_YOLO_DOMAIN}" domain.') 36 | meta_data = modelstore.upload(_YOLO_DOMAIN, model=model) 37 | return meta_data 38 | 39 | 40 | def load_and_test(modelstore: ModelStore, model_domain: str, model_id: str): 41 | # Load the model back into memory! 42 | print(f'⤵️ Loading the yolo "{model_domain}" domain model={model_id}') 43 | model = modelstore.load(model_domain, model_id) 44 | _predict(model) 45 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/requirements.txt: -------------------------------------------------------------------------------- 1 | # Demo requirements 2 | # Versions unpinned to make it easier to test everything with 3 | # the latest version 4 | black 5 | isort 6 | click 7 | 8 | # Storage requirements 9 | azure-core 10 | azure-storage-blob 11 | boto3 12 | google-cloud-storage 13 | minio 14 | 15 | # Data / dependencies for ML libraries 16 | numpy==1.23.5 17 | numba>=0.55.1 18 | Cython>=0.29.28 19 | python-Levenshtein>=0.12.2 20 | 21 | # Prophet 22 | pystan>=2.19.1.1 # required to be installed before prophet 23 | 24 | # Machine learning libraries 25 | annoy 26 | catboost 27 | causalml 28 | fastai 29 | gensim 30 | lightgbm<4.0.0 # ImportError: cannot import name 'FEATURE_IMPORTANCE_TYPE_MAPPER' from 'lightgbm.basic' 31 | onnx 32 | onnxruntime 33 | onnxmltools 34 | prophet 35 | pyspark 36 | pytorch-lightning 37 | scikit-learn 38 | scipy==1.10.1 # More recent versions were not compatible with Gensim releases https://github.com/piskvorky/gensim/issues/3525 39 | shap 40 | skl2onnx 41 | skorch 42 | tensorflow; sys_platform != 'darwin' 43 | tensorflow-macos; sys_platform == 'darwin' 44 | tf-keras 45 | transformers 46 | torch 47 | torchvision 48 | xgboost 49 | -------------------------------------------------------------------------------- /examples/examples-by-ml-library/run-all.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | backends=( filesystem aws-s3 google-cloud-storage azure-container minio ) 3 | frameworks=( annoy catboost causalml fastai file gensim keras lightgbm \ 4 | onnx-sklearn onnx-lightgbm prophet pyspark pytorch pytorch-lightning \ 5 | sklearn sklearn-with-explainer sklearn-with-extras skorch xgboost xgboost-booster \ 6 | tensorflow hf-distilbert hf-gpt2-pt hf-gpt2-tf segment-anything yolov5 ) 7 | 8 | for framework in "${frameworks[@]}" 9 | do 10 | for backend in "${backends[@]}" 11 | do 12 | echo -e "\n 🔵 Running the $framework example in a $backend modelstore." 13 | python main.py --modelstore-in $backend --ml-framework $framework 14 | echo -e "\n ✅ Finished running the $framework example in $backend." 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /modelstore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from pkg_resources import DistributionNotFound, get_distribution 15 | 16 | # pylint: disable=unused-import 17 | from modelstore.model_store import ModelStore 18 | 19 | try: 20 | __version__ = get_distribution("modelstore").version 21 | except DistributionNotFound: 22 | __version__ = "unavailable" 23 | -------------------------------------------------------------------------------- /modelstore/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import click 17 | 18 | from modelstore.utils import cli as modelstorecli 19 | 20 | 21 | @click.group() 22 | def download_model(): 23 | pass 24 | 25 | 26 | @click.group() 27 | def upload_model(): 28 | pass 29 | 30 | 31 | @download_model.command() 32 | @click.argument("domain", type=str, required=True) 33 | @click.argument("model_id", type=str, required=True) 34 | @click.argument("parent_dir", type=str, required=False, default=None) 35 | def download(domain: str, model_id: str, parent_dir: str): 36 | """Download a model from the modelstore. Usage:\n 37 | ❯ python -m modelstore download 38 | """ 39 | try: 40 | target_dir = ( 41 | os.path.join(parent_dir, domain, model_id) 42 | if parent_dir is not None 43 | else os.path.join(domain, model_id) 44 | ) 45 | os.makedirs(target_dir, exist_ok=True) 46 | 47 | model_store = modelstorecli.model_store_from_env() 48 | archive_path = model_store.download(target_dir, domain, model_id) 49 | modelstorecli.success(f"✅ Downloaded: {domain}={model_id} to {archive_path}") 50 | except Exception: 51 | modelstorecli.failure("❌ Failed to download model:") 52 | raise 53 | 54 | 55 | @upload_model.command() 56 | @click.argument("domain", type=str, required=True) 57 | @click.argument("model", type=click.Path(exists=True)) 58 | def upload(domain: str, model: str): 59 | """Upload a model to the modelstore. Usage:\n 60 | ❯ python -m modelstore upload /path/to/file 61 | """ 62 | try: 63 | model_store = modelstorecli.model_store_from_env() 64 | meta_data = model_store.upload(domain, model=model) 65 | model_id = meta_data["model"]["model_id"] 66 | modelstorecli.success(f"✅ Uploaded: {domain}={model_id}") 67 | print(model_id) 68 | except SystemExit: 69 | # Failed to instantiate a model store from environment variables 70 | pass 71 | except Exception: 72 | modelstorecli.failure("❌ Failed to upload model:") 73 | raise 74 | 75 | 76 | cli = click.CommandCollection(sources=[download_model, upload_model]) 77 | 78 | if __name__ == "__main__": 79 | cli() 80 | -------------------------------------------------------------------------------- /modelstore/ids/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/ids/__init__.py -------------------------------------------------------------------------------- /modelstore/ids/model_ids.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import re 15 | import uuid 16 | 17 | from modelstore.utils.log import logger 18 | 19 | # Avoids characters that can't be used on Windows 20 | # https://github.com/operatorai/modelstore/issues/140 21 | _RESERVED_CHARACTERS = [ 22 | "<", 23 | ">", 24 | ":", 25 | '"', 26 | "/", 27 | "\\", 28 | "|", 29 | "?", 30 | "*", 31 | "#", 32 | "^", 33 | "`", 34 | "%", 35 | "~", 36 | "{", 37 | "}", 38 | "[", 39 | "]", 40 | ] 41 | 42 | 43 | def new() -> str: 44 | """Currently returns a uuid4 ID; in the future 45 | we can support different ID types & lengths 46 | """ 47 | return str(uuid.uuid4()) 48 | 49 | 50 | def validate(model_id: str) -> bool: 51 | """Model ids need to comply with various 52 | conditions so that we can use their ID when storing 53 | models into the different storage layers 54 | """ 55 | if re.search(" +", model_id) is not None: 56 | logger.info("Model id contains one or more spaces") 57 | return False 58 | 59 | matches = [x for x in _RESERVED_CHARACTERS if x in model_id] 60 | if len(matches) == 0: 61 | return True 62 | logger.info("Model id contains reserved characters: %s", matches) 63 | return False 64 | -------------------------------------------------------------------------------- /modelstore/metadata/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/metadata/__init__.py -------------------------------------------------------------------------------- /modelstore/metadata/code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/metadata/code/__init__.py -------------------------------------------------------------------------------- /modelstore/metadata/code/code.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from datetime import datetime 16 | from typing import Optional 17 | 18 | from dataclasses_json import dataclass_json 19 | from dataclasses_json.cfg import config 20 | 21 | from modelstore.metadata.code import dependencies, revision, runtime 22 | from modelstore.metadata.utils.utils import exclude_field, remove_nones 23 | 24 | 25 | @dataclass_json 26 | @dataclass 27 | class Code: 28 | 29 | """Code contains fields that are captured about 30 | the code/runtime when a model is saved""" 31 | 32 | runtime: str 33 | user: str 34 | created: str 35 | dependencies: dict 36 | git: Optional[dict] = field(default=None, metadata=config(exclude=exclude_field)) 37 | 38 | @classmethod 39 | def generate(cls, deps_list: list, created: datetime = None) -> "Code": 40 | """Generates the meta data for the code being run to create the model""" 41 | versioned_deps = dependencies.get_dependency_versions(deps_list) 42 | if created is None: 43 | # created can be overridden in unit tests where we need to 44 | # control time stamps of mock model objects 45 | created = datetime.now() 46 | return Code( 47 | runtime=runtime.get_python_version(), 48 | user=runtime.get_user(), 49 | created=created.strftime("%Y/%m/%d/%H:%M:%S"), 50 | dependencies=remove_nones(versioned_deps), 51 | git=revision.git_meta(), 52 | ) 53 | -------------------------------------------------------------------------------- /modelstore/metadata/code/dependencies.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import importlib 15 | import sys 16 | 17 | import pkg_resources 18 | 19 | from modelstore.utils.log import logger 20 | 21 | # pylint: disable=broad-except 22 | 23 | 24 | def _get_version(modname: str) -> str: 25 | try: 26 | if modname == "pickle": 27 | # pylint: disable=import-outside-toplevel 28 | import pickle 29 | 30 | return pickle.format_version 31 | if modname in sys.modules: 32 | mod = sys.modules[modname] 33 | else: 34 | logger.debug("Trying to import: %s", modname) 35 | mod = importlib.import_module(modname) 36 | return mod.__version__ 37 | except AttributeError: 38 | try: 39 | #  Annoy does not have a __version__ 40 | return pkg_resources.get_distribution(modname).version 41 | except Exception: 42 | logger.debug("Unable to get %s's version", modname) 43 | return None 44 | except ImportError: 45 | logger.debug("%s is not installed.", modname) 46 | return None 47 | except Exception: 48 | logger.error("Error importing: %s.", modname) 49 | return None 50 | 51 | 52 | def get_dependency_versions(modnames: list) -> dict: 53 | """ 54 | This function re-implements the functionality of the 'private' `_get_deps_info()` 55 | function in sklearn: 56 | 57 | https://github.com/scikit-learn/scikit-learn/blob/a0a76fcfbe1e19c8f9e422b41260471f05d8f560/sklearn/utils/_show_versions.py#L35 58 | """ # noqa 59 | return {modname: _get_version(modname) for modname in modnames} 60 | 61 | 62 | def module_exists(modname: str) -> bool: 63 | """Returns True if a module has been installed""" 64 | return _get_version(modname) is not None 65 | -------------------------------------------------------------------------------- /modelstore/metadata/code/revision.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from modelstore.utils.log import logger 15 | 16 | try: 17 | import git 18 | 19 | GIT_EXISTS = True 20 | except ImportError: 21 | logger.info("Warning: no git installation. Will not collect git meta data.") 22 | GIT_EXISTS = False 23 | 24 | 25 | def _repo_name(repo: "git.Repo") -> str: 26 | if not GIT_EXISTS: 27 | return "" 28 | # pylint: disable=broad-except 29 | try: 30 | repo_url = repo.remotes.origin.url 31 | return repo_url.split(".git")[0].split("/")[-1] 32 | except Exception as exc: 33 | logger.debug("error extracting git repo: %s", str(exc)) 34 | return "" 35 | 36 | 37 | def git_meta() -> dict: 38 | """Returns meta data about the current git repo""" 39 | if not GIT_EXISTS: 40 | return {} 41 | # pylint: disable=broad-except 42 | try: 43 | repo = git.Repo(search_parent_directories=True) 44 | return { 45 | "repository": _repo_name(repo), 46 | "sha": repo.head.object.hexsha, 47 | "local_changes": repo.is_dirty(), 48 | "branch": repo.active_branch.name, 49 | } 50 | except Exception as exc: 51 | logger.debug("error generating git meta-data: %s", str(exc)) 52 | return None 53 | -------------------------------------------------------------------------------- /modelstore/metadata/code/runtime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import getpass 15 | import sys 16 | 17 | 18 | def get_python_version() -> str: 19 | """Returns the current python version""" 20 | vers = sys.version_info 21 | version = ".".join(str(x) for x in [vers.major, vers.minor, vers.micro]) 22 | return f"python:{version}" 23 | 24 | 25 | def get_user() -> str: 26 | """Returns the current user""" 27 | return getpass.getuser() 28 | -------------------------------------------------------------------------------- /modelstore/metadata/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/metadata/dataset/__init__.py -------------------------------------------------------------------------------- /modelstore/metadata/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from typing import Any, Optional 16 | 17 | from dataclasses_json import dataclass_json 18 | from dataclasses_json.cfg import config 19 | 20 | from modelstore.metadata.dataset.features import Features 21 | from modelstore.metadata.dataset.labels import Labels 22 | from modelstore.metadata.utils.utils import exclude_field 23 | 24 | 25 | @dataclass_json 26 | @dataclass 27 | class Dataset: 28 | 29 | """Dataset contains fields that are captured about 30 | the training dataset when the model is saved""" 31 | 32 | features: Optional[Features] = field( 33 | default=None, metadata=config(exclude=exclude_field) 34 | ) 35 | labels: Optional[Labels] = field( 36 | default=None, metadata=config(exclude=exclude_field) 37 | ) 38 | 39 | @classmethod 40 | def generate(cls, features: Any = None, labels: Any = None) -> "Dataset": 41 | """Returns summary stats about a dataset""" 42 | features = Features.generate(features) 43 | labels = Labels.generate(labels) 44 | if features is None and labels is None: 45 | return None 46 | return Dataset(features=features, labels=labels) 47 | -------------------------------------------------------------------------------- /modelstore/metadata/dataset/features.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from typing import Any, List, Optional 16 | 17 | from dataclasses_json import dataclass_json 18 | from dataclasses_json.cfg import config 19 | 20 | from modelstore.metadata.dataset.types import ( 21 | is_numpy_array, 22 | is_pandas_dataframe 23 | ) 24 | from modelstore.metadata.utils.utils import exclude_field 25 | from modelstore.utils.log import logger 26 | 27 | 28 | @dataclass_json 29 | @dataclass 30 | class Features: 31 | 32 | """Features contains fields that are captured about 33 | the training dataset's features when the model is saved""" 34 | 35 | shape: Optional[List[int]] = field( 36 | default=None, metadata=config(exclude=exclude_field) 37 | ) 38 | 39 | @classmethod 40 | def generate(cls, values: Any = None) -> "Features": 41 | """Returns summary stats about a set of features""" 42 | if values is None: 43 | return None 44 | if is_numpy_array(values): 45 | return Features( 46 | shape=list(values.shape), 47 | ) 48 | if is_pandas_dataframe(values): 49 | return Features( 50 | shape=list(values.shape), 51 | ) 52 | logger.debug("Trying to describe unknown type: %s", type(values)) 53 | return None 54 | -------------------------------------------------------------------------------- /modelstore/metadata/dataset/labels.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from typing import Any, List, Optional 16 | 17 | import numpy as np 18 | from dataclasses_json import dataclass_json 19 | from dataclasses_json.cfg import config 20 | 21 | from modelstore.metadata.dataset.types import ( 22 | is_numpy_array, 23 | is_pandas_dataframe, 24 | is_pandas_series 25 | ) 26 | from modelstore.metadata.utils.utils import exclude_field 27 | from modelstore.utils.log import logger 28 | 29 | 30 | @dataclass_json 31 | @dataclass 32 | class Labels: 33 | 34 | """Labels contains fields that are captured about 35 | the training dataset's labels when the model is saved""" 36 | 37 | shape: Optional[List[int]] = field( 38 | default=None, metadata=config(exclude=exclude_field) 39 | ) 40 | values: Optional[dict] = field(default=None, metadata=config(exclude=exclude_field)) 41 | 42 | @classmethod 43 | def generate(cls, values: Any = None) -> "Labels": 44 | """Returns summary stats about a set of labels""" 45 | if values is None: 46 | return None 47 | if is_numpy_array(values): 48 | if values.ndim == 1: 49 | # Array has one dimension (e.g., labels); return its 50 | # its shape and value counts 51 | unique, counts = np.unique(values, return_counts=True) 52 | return Labels( 53 | shape=list(values.shape), values=dict(zip(unique, counts)) 54 | ) 55 | # Array is multi-dimensional, only return its shape 56 | return Labels( 57 | shape=list(values.shape), 58 | values=None, 59 | ) 60 | if is_pandas_dataframe(values): 61 | # Data frame can have multiple dimensions; only 62 | # return its shape 63 | return Labels( 64 | shape=list(values.shape), 65 | values=None, 66 | ) 67 | if is_pandas_series(values): 68 | # Data series has one dimension (e.g., labels); return 69 | # its shape and value counts 70 | return Labels( 71 | shape=list(values.shape), 72 | values=values.value_counts().to_dict(), 73 | ) 74 | logger.debug("Trying to describe unknown type: %s", type(values)) 75 | return None 76 | -------------------------------------------------------------------------------- /modelstore/metadata/dataset/types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import numpy as np 15 | 16 | try: 17 | # numpy is a required dependency for modelstore, 18 | # but pandas is not 19 | import pandas as pd 20 | 21 | PANDAS_EXISTS = True 22 | except ImportError: 23 | PANDAS_EXISTS = False 24 | 25 | 26 | def is_numpy_array(values) -> bool: 27 | """Whether values is a numpy array""" 28 | return isinstance(values, np.ndarray) 29 | 30 | 31 | def is_pandas_dataframe(values) -> bool: 32 | """Whether values is a pandas data frame""" 33 | if PANDAS_EXISTS: 34 | return isinstance(values, pd.DataFrame) 35 | return False 36 | 37 | 38 | def is_pandas_series(values) -> bool: 39 | """Whether values is a pandas series""" 40 | if PANDAS_EXISTS: 41 | return isinstance(values, pd.Series) 42 | return False 43 | -------------------------------------------------------------------------------- /modelstore/metadata/metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from typing import Optional 16 | 17 | from dataclasses_json import dataclass_json 18 | from dataclasses_json.cfg import config 19 | 20 | import modelstore 21 | from modelstore.metadata.code.code import Code 22 | from modelstore.metadata.model.model import Dataset, Model, ModelType 23 | from modelstore.metadata.storage.storage import Storage 24 | from modelstore.metadata.utils.utils import ( 25 | exclude_field, 26 | validate_json_serializable 27 | ) 28 | 29 | 30 | @dataclass_json 31 | @dataclass 32 | class Summary: 33 | 34 | """Summary holds all of the fields that are captured 35 | when a model is saved""" 36 | 37 | model: Model 38 | storage: Storage 39 | modelstore: str # Version of modelstore 40 | code: Optional[Code] = field(default=None, metadata=config(exclude=exclude_field)) 41 | extra: Optional[dict] = field(default=None, metadata=config(exclude=exclude_field)) 42 | 43 | @classmethod 44 | def generate( 45 | cls, 46 | code_meta_data: Code, 47 | model_meta_data: Model, 48 | storage_meta_data: Storage, 49 | extra_metadata: dict = None, 50 | ) -> "Summary": 51 | """Generates all of the meta data for a model 52 | and adds the modelstore version""" 53 | validate_json_serializable("extra_metadata", extra_metadata) 54 | return Summary( 55 | model=model_meta_data, 56 | storage=storage_meta_data, 57 | modelstore=modelstore.__version__, 58 | code=code_meta_data, 59 | extra=extra_metadata, 60 | ) 61 | 62 | def dumps(self, target_file: str): 63 | """Dumps the data class as JSON into target_file""" 64 | # pylint: disable=no-member 65 | # pylint: disable=unspecified-encoding 66 | with open(target_file, "w") as out: 67 | out.write(self.to_json()) 68 | 69 | @classmethod 70 | def loads(cls, source_file: str) -> "Summary": 71 | """Loads the data class from a JSON source_file""" 72 | # pylint: disable=no-member 73 | # pylint: disable=unspecified-encoding 74 | with open(source_file, "r") as lines: 75 | content = lines.read() 76 | return Summary.from_json(content) 77 | 78 | def model_type(self) -> ModelType: 79 | """Returns the model type""" 80 | return self.model.model_type 81 | 82 | def dataset(self) -> Dataset: 83 | """Returns meta data about the training data""" 84 | return self.model.data 85 | -------------------------------------------------------------------------------- /modelstore/metadata/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/metadata/model/__init__.py -------------------------------------------------------------------------------- /modelstore/metadata/model/model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from typing import Optional 16 | 17 | from dataclasses_json import dataclass_json 18 | from dataclasses_json.cfg import config 19 | 20 | from modelstore.metadata.dataset.dataset import Dataset 21 | from modelstore.metadata.model.model_type import ModelType 22 | from modelstore.metadata.utils.utils import exclude_field 23 | 24 | 25 | @dataclass_json 26 | @dataclass 27 | class Model: 28 | 29 | """Model contains fields that are captured about 30 | the model when it is saved""" 31 | 32 | domain: str 33 | model_id: str 34 | model_type: ModelType 35 | parameters: Optional[dict] = field( 36 | default=None, metadata=config(exclude=exclude_field) 37 | ) 38 | data: Optional[Dataset] = field( 39 | default=None, metadata=config(exclude=exclude_field) 40 | ) 41 | 42 | @classmethod 43 | def generate( 44 | cls, 45 | domain: str, 46 | model_id: str, 47 | model_type: ModelType, 48 | parameters: dict = None, 49 | data: Dataset = None, 50 | ) -> "Model": 51 | """Generates the meta data for the model that is being saved""" 52 | return Model( 53 | domain=domain, 54 | model_id=model_id, 55 | model_type=model_type, 56 | parameters=parameters, 57 | data=data, 58 | ) 59 | -------------------------------------------------------------------------------- /modelstore/metadata/model/model_type.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from dataclasses import dataclass, field 16 | from typing import Dict, List, Optional 17 | 18 | from dataclasses_json import dataclass_json 19 | from dataclasses_json.cfg import config 20 | 21 | from modelstore.metadata.utils.utils import exclude_field 22 | 23 | _MODEL_TYPE_FILE = "model-info.json" 24 | 25 | 26 | @dataclass_json 27 | @dataclass 28 | class ModelType: 29 | 30 | """ModelType contains fields that are captured about 31 | the model type when it is saved""" 32 | 33 | library: str 34 | type: Optional[str] = field(default=None, metadata=config(exclude=exclude_field)) 35 | 36 | # When saving multiple models together, the models' 37 | # types are specified in this list 38 | models: Optional[List["ModelType"]] = field( 39 | default=None, metadata=config(exclude=exclude_field) 40 | ) 41 | 42 | @classmethod 43 | def generate( 44 | cls, library: str, class_name: str = None, models: List[Dict] = None 45 | ) -> "ModelType": 46 | """Generates the meta data for the type of model 47 | that is being saved""" 48 | return ModelType( 49 | library=library, 50 | type=class_name, 51 | models=models, 52 | ) 53 | 54 | def dumps(self, target_dir: str) -> str: 55 | """Dumps the data class as JSON into a file 56 | and returns the path to the file""" 57 | # pylint: disable=no-member 58 | # pylint: disable=unspecified-encoding 59 | target_file = os.path.join(target_dir, _MODEL_TYPE_FILE) 60 | with open(target_file, "w") as out: 61 | out.write(self.to_json()) 62 | return target_file 63 | -------------------------------------------------------------------------------- /modelstore/metadata/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/metadata/storage/__init__.py -------------------------------------------------------------------------------- /modelstore/metadata/storage/storage.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass, field 15 | from typing import Optional 16 | 17 | from dataclasses_json import dataclass_json 18 | from dataclasses_json.cfg import config 19 | 20 | from modelstore.metadata.utils.utils import exclude_field 21 | 22 | 23 | @dataclass_json 24 | @dataclass 25 | class Storage: 26 | 27 | """Storage contains fields that are captured about 28 | where the model is saved""" 29 | 30 | # Constant to describe the storage type 31 | type: str 32 | 33 | # Path-like storage (e.g. local) 34 | root: Optional[str] = field(default=None, metadata=config(exclude=exclude_field)) 35 | path: Optional[str] = field(default=None, metadata=config(exclude=exclude_field)) 36 | 37 | # Container-like storage 38 | bucket: Optional[str] = field(default=None, metadata=config(exclude=exclude_field)) 39 | prefix: Optional[str] = field(default=None, metadata=config(exclude=exclude_field)) 40 | 41 | # Retained for backwards compatibility (Azure) 42 | container: Optional[str] = field( 43 | default=None, metadata=config(exclude=exclude_field) 44 | ) 45 | 46 | @classmethod 47 | def from_path(cls, storage_type: str, root: str, path: str) -> "Storage": 48 | """Generates the meta data about where the model 49 | is going to be saved when it is saved in path-like storage""" 50 | return Storage( 51 | type=storage_type, 52 | root=root, 53 | path=path, 54 | ) 55 | 56 | @classmethod 57 | def from_bucket(cls, storage_type: str, bucket: str, prefix: str) -> "Storage": 58 | """Generates the meta data about where the model 59 | is going to be saved when it is saved in container storage""" 60 | return Storage( 61 | type=storage_type, 62 | bucket=bucket, 63 | prefix=prefix, 64 | ) 65 | 66 | @classmethod 67 | def from_container( 68 | cls, storage_type: str, container: str, prefix: str 69 | ) -> "Storage": 70 | """Generates the meta data about where the model 71 | is going to be saved when it is saved in an Azure container""" 72 | return Storage( 73 | type=storage_type, 74 | container=container, 75 | prefix=prefix, 76 | ) 77 | -------------------------------------------------------------------------------- /modelstore/metadata/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/metadata/utils/__init__.py -------------------------------------------------------------------------------- /modelstore/metadata/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | from typing import Any 16 | 17 | 18 | def remove_nones(values: dict) -> dict: 19 | """Removes any entries in a dictionary that have None values""" 20 | return {k: v for k, v in values.items() if v is not None} 21 | 22 | 23 | def exclude_field(value: Any) -> bool: 24 | """Whether to exclude a field from being included in the JSON 25 | meta data""" 26 | return value is None 27 | 28 | 29 | def validate_json_serializable(name: str, value: dict): 30 | """Validates that `value` is a JSON serializable dictionary""" 31 | if value is None: 32 | # None fields will not be dumped from dataclasses 33 | return 34 | if not isinstance(value, dict): 35 | raise TypeError(f"{name} is not a dictionary") 36 | try: 37 | # @Future: check if `value` has fields that can be auto-converted 38 | # to make it JSON serializable (e.g., np.array to list) 39 | json.dumps(value) 40 | except Exception as exc: 41 | raise TypeError(f"{name} must be json serializable") from exc 42 | -------------------------------------------------------------------------------- /modelstore/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/models/__init__.py -------------------------------------------------------------------------------- /modelstore/models/annoy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from functools import partial 16 | from typing import Any 17 | 18 | from modelstore.metadata import metadata 19 | from modelstore.models.model_manager import ModelManager 20 | from modelstore.storage.storage import CloudStorage 21 | from modelstore.utils.log import logger 22 | 23 | MODEL_FILE = "model.ann" 24 | 25 | 26 | class AnnoyManager(ModelManager): 27 | 28 | """ 29 | Model persistence for Annoy models: 30 | https://github.com/spotify/annoy 31 | """ 32 | 33 | NAME = "annoy" 34 | 35 | def __init__(self, storage: CloudStorage = None): 36 | super().__init__(self.NAME, storage) 37 | 38 | def required_dependencies(self) -> list: 39 | return ["annoy"] 40 | 41 | def _required_kwargs(self): 42 | return ["model", "metric", "num_trees"] 43 | 44 | def matches_with(self, **kwargs) -> bool: 45 | # pylint: disable=import-outside-toplevel 46 | from annoy import AnnoyIndex 47 | 48 | return isinstance(kwargs.get("model"), AnnoyIndex) 49 | 50 | def _get_functions(self, **kwargs) -> list: 51 | if not self.matches_with(**kwargs): 52 | raise TypeError("Model is not an AnnoyIndex!") 53 | 54 | return [ 55 | partial( 56 | save_model, 57 | model=kwargs["model"], 58 | ), 59 | ] 60 | 61 | def get_params(self, **kwargs) -> dict: 62 | return { 63 | "num_dimensions": kwargs["model"].f, 64 | "num_trees": kwargs["num_trees"], 65 | "metric": kwargs["metric"], 66 | } 67 | 68 | def load(self, model_path: str, meta_data: metadata.Summary) -> Any: 69 | super().load(model_path, meta_data) 70 | 71 | # pylint: disable=import-outside-toplevel 72 | from annoy import AnnoyIndex 73 | 74 | # Extract index size & metric from the meta_data 75 | params = meta_data.model.parameters 76 | num_dimensions = int(params["num_dimensions"]) 77 | metric = params["metric"] 78 | 79 | model = AnnoyIndex(num_dimensions, metric) 80 | model.load(_model_file_path(model_path)) 81 | return model 82 | 83 | 84 | def _model_file_path(tmp_dir: str) -> str: 85 | return os.path.join(tmp_dir, MODEL_FILE) 86 | 87 | 88 | def save_model(tmp_dir: str, model: "annoy.AnnoyIndex") -> str: 89 | """Saves an annoy index to file""" 90 | file_path = _model_file_path(tmp_dir) 91 | logger.debug("Saving annoy model to %s", file_path) 92 | model.save(file_path) 93 | return file_path 94 | -------------------------------------------------------------------------------- /modelstore/models/causalml.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from functools import partial 16 | from typing import Any 17 | 18 | from modelstore.metadata import metadata 19 | from modelstore.models.common import load_joblib, save_joblib 20 | from modelstore.models.model_manager import ModelManager 21 | from modelstore.storage.storage import CloudStorage 22 | 23 | MODEL_FILE = "model.joblib" 24 | 25 | 26 | class CausalMLManager(ModelManager): 27 | 28 | """ 29 | Model persistence for Causal ML models: 30 | https://causalml.readthedocs.io/en/latest/index.html 31 | """ 32 | 33 | NAME = "causalml" 34 | 35 | def __init__(self, storage: CloudStorage = None): 36 | super().__init__(self.NAME, storage) 37 | 38 | def required_dependencies(self) -> list: 39 | return ["causalml"] 40 | 41 | def optional_dependencies(self) -> list: 42 | deps = super().optional_dependencies() 43 | return deps + ["Cython", "joblib"] 44 | 45 | def _required_kwargs(self): 46 | return ["model"] 47 | 48 | def matches_with(self, **kwargs) -> bool: 49 | # pylint: disable=import-outside-toplevel 50 | import causalml 51 | 52 | return any( 53 | [ 54 | isinstance(kwargs.get("model"), causalml.inference.meta.base.BaseLearner), 55 | isinstance(kwargs.get("model"), causalml.propensity.PropensityModel), 56 | ] 57 | ) 58 | 59 | def _get_functions(self, **kwargs) -> list: 60 | if not self.matches_with(**kwargs): 61 | raise TypeError("This model is not a Causal ML model!") 62 | 63 | return [partial(save_joblib, model=kwargs["model"], file_name=MODEL_FILE)] 64 | 65 | def load(self, model_path: str, meta_data: metadata.Summary) -> Any: 66 | super().load(model_path, meta_data) 67 | 68 | # @Future: check if loading into same version of joblib 69 | # as was used for saving 70 | file_name = os.path.join(model_path, MODEL_FILE) 71 | return load_joblib(file_name) 72 | -------------------------------------------------------------------------------- /modelstore/models/common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | import os 16 | from typing import Any 17 | 18 | import joblib 19 | 20 | 21 | def save_json(tmp_dir: str, file_name: str, data: dict) -> str: 22 | target = os.path.join(tmp_dir, file_name) 23 | with open(target, "w") as out: 24 | out.write(json.dumps(data)) 25 | return target 26 | 27 | 28 | def save_joblib(tmp_dir: str, model: Any, file_name: str) -> str: 29 | model_path = os.path.join(tmp_dir, file_name) 30 | joblib.dump(model, model_path) 31 | return model_path 32 | 33 | 34 | def load_joblib(model_path: str) -> Any: 35 | return joblib.load(model_path) 36 | -------------------------------------------------------------------------------- /modelstore/models/missing_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Any, Optional 15 | 16 | from modelstore.metadata import metadata 17 | from modelstore.models.model_manager import ModelManager 18 | from modelstore.storage.storage import CloudStorage 19 | from modelstore.utils.log import logger 20 | 21 | 22 | class MissingDepManager(ModelManager): 23 | 24 | """ 25 | MissingDepManager is used when a dependency is not 26 | installed; it overrides the ModelManager functionality 27 | and gives the user informative error messages 28 | """ 29 | 30 | NAME = "missing" 31 | 32 | def __init__(self, library: str, storage: CloudStorage = None): 33 | super().__init__(self.NAME, storage) 34 | self.library = library 35 | 36 | def required_dependencies(self) -> list: 37 | return [] 38 | 39 | def matches_with(self, **kwargs) -> bool: 40 | return False 41 | 42 | def _get_functions(self, **kwargs) -> list: 43 | return [] 44 | 45 | def get_params(self, **kwargs) -> dict: 46 | return None 47 | 48 | def _required_kwargs(self) -> list: 49 | return [] 50 | 51 | def model_info(self, **kwargs) -> Optional[metadata.ModelType]: 52 | return None 53 | 54 | def model_data(self, **kwargs) -> Optional[metadata.Dataset]: 55 | return None 56 | 57 | def upload(self, domain: str, model_id: str, **kwargs) -> str: 58 | logger.error("Error: %s is not installed", self.library) 59 | logger.error("Please install it and try again") 60 | raise ModuleNotFoundError(f"{self.library} is not installed") 61 | 62 | def load(self, model_path: str, meta_data: metadata.Summary) -> Any: 63 | logger.error("Error: %s is not installed", self.library) 64 | logger.error("Please install it and try again") 65 | raise ModuleNotFoundError(f"{self.library} is not installed") 66 | -------------------------------------------------------------------------------- /modelstore/models/multiple_models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Any, List 15 | 16 | from modelstore.metadata import metadata 17 | from modelstore.models.managers import get_manager 18 | from modelstore.models.model_manager import ModelManager 19 | from modelstore.storage.storage import CloudStorage 20 | 21 | 22 | class MultipleModelsManager(ModelManager): 23 | 24 | """ 25 | Persistence for multiple models 26 | E.g. pairs of (model, explainer) that need to be saved together 27 | """ 28 | 29 | NAME = "multiple-models" 30 | 31 | def __init__( 32 | self, 33 | managers: List[ModelManager], 34 | storage: CloudStorage = None, 35 | ): 36 | super().__init__(self.NAME, storage) 37 | self.managers = managers 38 | 39 | def _required_kwargs(self) -> list: 40 | requirements = [] 41 | for manager in self.managers: 42 | # pylint: disable=protected-access 43 | requirements += manager._required_kwargs() 44 | return list(set(requirements)) 45 | 46 | def required_dependencies(self) -> list: 47 | dependencies = [] 48 | for manager in self.managers: 49 | dependencies += manager.required_dependencies() 50 | return list(set(dependencies)) 51 | 52 | def matches_with(self, **kwargs) -> bool: 53 | for manager in self.managers: 54 | if not manager.matches_with(**kwargs): 55 | return False 56 | return True 57 | 58 | def _get_functions(self, **kwargs) -> list: 59 | functions = [] 60 | for manager in self.managers: 61 | # pylint: disable=protected-access 62 | functions += manager._get_functions(**kwargs) 63 | return functions 64 | 65 | def model_info(self, **kwargs) -> metadata.ModelType: 66 | """Returns meta-data about the model's type""" 67 | return metadata.ModelType.generate( 68 | library=self.ml_library, 69 | models=[m.model_info(**kwargs) for m in self.managers], 70 | ) 71 | 72 | def get_params(self, **kwargs) -> dict: 73 | return { 74 | # pylint: disable=protected-access 75 | manager.ml_library: manager.get_params(**kwargs) 76 | for manager in self.managers 77 | } 78 | 79 | def load(self, model_path: str, meta_data: metadata.Summary) -> Any: 80 | models = {} 81 | for model in meta_data.model_type().models: 82 | manager = get_manager(model.library, self.storage) 83 | models[model.library] = manager.load(model_path, meta_data) 84 | return models 85 | -------------------------------------------------------------------------------- /modelstore/models/onnx.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from functools import partial 16 | from typing import Any 17 | 18 | from modelstore.metadata import metadata 19 | from modelstore.models.model_manager import ModelManager 20 | from modelstore.storage.storage import CloudStorage 21 | from modelstore.utils.log import logger 22 | 23 | MODEL_FILE = "model.onnx" 24 | 25 | 26 | class OnnxManager(ModelManager): 27 | 28 | """ 29 | Model persistence for ONNX models: 30 | https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md 31 | """ 32 | 33 | NAME = "onnx" 34 | 35 | def __init__(self, storage: CloudStorage = None): 36 | super().__init__(self.NAME, storage) 37 | 38 | def required_dependencies(self) -> list: 39 | return ["onnx"] 40 | 41 | def optional_dependencies(self) -> list: 42 | return super().optional_dependencies() + [ 43 | "skl2onnx", 44 | "onnxmltools", 45 | "onnxruntime", 46 | "onnxconverter-common", 47 | ] 48 | 49 | def _required_kwargs(self): 50 | return ["model"] 51 | 52 | def matches_with(self, **kwargs) -> bool: 53 | # pylint: disable=import-outside-toplevel 54 | from onnx import ModelProto 55 | 56 | return isinstance(kwargs.get("model"), ModelProto) 57 | 58 | def _get_functions(self, **kwargs) -> list: 59 | if not self.matches_with(**kwargs): 60 | raise TypeError("Model is not an onnx.ModelProto!") 61 | 62 | return [ 63 | partial( 64 | save_model, 65 | model=kwargs["model"], 66 | ), 67 | ] 68 | 69 | def load(self, model_path: str, meta_data: metadata.Summary) -> Any: 70 | super().load(model_path, meta_data) 71 | 72 | # pylint: disable=import-outside-toplevel 73 | import onnxruntime as rt 74 | 75 | model_path = _model_file_path(model_path) 76 | with open(model_path, "rb") as lines: 77 | model = lines.read() 78 | return rt.InferenceSession(model) 79 | 80 | 81 | def _model_file_path(tmp_dir: str) -> str: 82 | return os.path.join(tmp_dir, MODEL_FILE) 83 | 84 | 85 | def save_model(tmp_dir: str, model: "onnx.ModelProto") -> str: 86 | """Saves the onnx model in tmp_dir""" 87 | file_path = _model_file_path(tmp_dir) 88 | logger.debug("Saving onnx model to %s", file_path) 89 | with open(file_path, "wb") as model_file: 90 | model_file.write(model.SerializeToString()) 91 | return file_path 92 | -------------------------------------------------------------------------------- /modelstore/models/shap.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from functools import partial 16 | 17 | from modelstore.metadata import metadata 18 | from modelstore.metadata.model.model_type import ModelType 19 | from modelstore.models.common import load_joblib, save_joblib 20 | from modelstore.models.model_manager import ModelManager 21 | from modelstore.storage.storage import CloudStorage 22 | 23 | EXPLAINER_FILE = "explainer.joblib" 24 | 25 | 26 | class ShapManager(ModelManager): 27 | 28 | """ 29 | Model persistence for shap expainers 30 | """ 31 | 32 | NAME = "shap" 33 | 34 | def __init__(self, storage: CloudStorage = None): 35 | super().__init__(self.NAME, storage) 36 | 37 | def required_dependencies(self) -> list: 38 | return ["shap", "joblib"] 39 | 40 | def _required_kwargs(self): 41 | return ["explainer"] 42 | 43 | def model_info(self, **kwargs) -> ModelType: 44 | """Returns meta-data about the explainer type""" 45 | return ModelType.generate( 46 | library=self.ml_library, 47 | class_name=type(kwargs["explainer"]).__name__, 48 | ) 49 | 50 | def matches_with(self, **kwargs) -> bool: 51 | # pylint: disable=import-outside-toplevel 52 | from shap import Explainer 53 | 54 | return isinstance(kwargs.get("explainer"), Explainer) 55 | 56 | def _get_functions(self, **kwargs) -> list: 57 | if not self.matches_with(**kwargs): 58 | raise TypeError("Explainer is not a shap.Explainer!") 59 | 60 | return [ 61 | partial(save_joblib, model=kwargs["explainer"], file_name=EXPLAINER_FILE), 62 | ] 63 | 64 | def load(self, model_path: str, meta_data: metadata.Summary) -> "shap.Explainer": 65 | super().load(model_path, meta_data) 66 | 67 | explainer_path = _explainer_file_path(model_path) 68 | return load_joblib(explainer_path) 69 | 70 | 71 | def _explainer_file_path(tmp_dir: str) -> str: 72 | return os.path.join(tmp_dir, EXPLAINER_FILE) 73 | -------------------------------------------------------------------------------- /modelstore/models/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def convert_tensors(model_params): 17 | """Converts torch.Tensor types to numpy types""" 18 | import torch 19 | 20 | if isinstance(model_params, torch.Tensor): 21 | if hasattr(model_params, "detach"): 22 | model_params = model_params.detach() 23 | return model_params.cpu().numpy() 24 | if isinstance(model_params, list): 25 | return [convert_tensors(c) for c in model_params] 26 | if isinstance(model_params, dict): 27 | return {k: convert_tensors(v) for k, v in model_params.items()} 28 | 29 | return model_params 30 | 31 | 32 | def convert_numpy(model_params): 33 | """Converts numpy types to json serializable types""" 34 | import numpy as np 35 | 36 | if isinstance(model_params, np.ndarray): 37 | return model_params.tolist() 38 | 39 | if isinstance(model_params, list): 40 | return [convert_numpy(c) for c in model_params] 41 | if isinstance(model_params, dict): 42 | return {k: convert_numpy(v) for k, v in model_params.items()} 43 | return model_params 44 | -------------------------------------------------------------------------------- /modelstore/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/storage/__init__.py -------------------------------------------------------------------------------- /modelstore/storage/states/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/storage/states/__init__.py -------------------------------------------------------------------------------- /modelstore/storage/states/model_states.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from enum import Enum 16 | 17 | 18 | class ReservedModelStates(Enum): 19 | 20 | """ReservedModelStates are states that are 21 | created & managed by the modelstore library, 22 | so users cannot create a state with this name 23 | """ 24 | 25 | DELETED: str = "modelstore-deleted" 26 | 27 | 28 | def is_valid_state_name(state_name: str) -> bool: 29 | """Whether a state name is valid for usage""" 30 | if any(state_name == x for x in [None, ""]): 31 | return False 32 | if len(state_name) < 3: 33 | return False 34 | if os.path.split(state_name)[1] != state_name: 35 | return False 36 | if is_reserved_state(state_name): 37 | return False 38 | return True 39 | 40 | 41 | def is_reserved_state(state_name: str) -> bool: 42 | """Whether a state name is a reserved state""" 43 | reserved_state_names = set(x.value for x in ReservedModelStates) 44 | return state_name in reserved_state_names 45 | -------------------------------------------------------------------------------- /modelstore/storage/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/storage/util/__init__.py -------------------------------------------------------------------------------- /modelstore/storage/util/environment.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from typing import Optional 16 | 17 | 18 | def get_value(arg: str, env_key: str, allow_missing: bool = False) -> Optional[str]: 19 | """Modelstore storage can optionally be instantiated using 20 | environment variables. This function is used to decide whether to 21 | - pull a variable from the user's environment; 22 | - return the one that was passed in; 23 | - return None 24 | """ 25 | if arg is not None: 26 | # arg has been passed in as non-None, so return it 27 | return arg 28 | if env_key not in os.environ and allow_missing: 29 | # The environment key doesn't exist for a variable that 30 | # is allowed to be missing, so return None 31 | return None 32 | # Return the environment variable; this will KeyError if it 33 | # is missing 34 | return os.environ[env_key] 35 | -------------------------------------------------------------------------------- /modelstore/storage/util/versions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from datetime import datetime 15 | 16 | 17 | def sort_by_version(meta_data: dict): 18 | """Extracts the version from a model's meta data""" 19 | if "code" in meta_data: 20 | return datetime.strptime(meta_data["code"]["created"], "%Y/%m/%d/%H:%M:%S") 21 | if "meta" in meta_data: 22 | return datetime.strptime(meta_data["meta"]["created"], "%Y/%m/%d/%H:%M:%S") 23 | return 1 24 | 25 | 26 | def sorted_by_created(versions: list): 27 | """Sorts a list of models by version""" 28 | return sorted( 29 | versions, 30 | key=sort_by_version, 31 | reverse=True, 32 | ) 33 | -------------------------------------------------------------------------------- /modelstore/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/modelstore/utils/__init__.py -------------------------------------------------------------------------------- /modelstore/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | class FilePullFailedException(Exception): 17 | """Raised when modelstore was unable to download a file""" 18 | 19 | def __init__(self, base_exception: Exception): 20 | super().__init__() 21 | self.base_exception = base_exception 22 | 23 | 24 | class ModelDeletedException(Exception): 25 | """Raised when a modelstore user tries to download a model 26 | that has been deleted.""" 27 | 28 | def __init__(self, domain: str, model_id: str): 29 | super().__init__(f"model='{model_id}' has been deleted from domain='{domain}'") 30 | 31 | 32 | class ModelNotFoundException(Exception): 33 | """Raised when a modelstore user tries to download a model 34 | that does not exist in the given domain.""" 35 | 36 | def __init__(self, domain: str, model_id: str): 37 | super().__init__(f"model='{model_id}' does not exist in domain='{domain}'.") 38 | 39 | 40 | class DomainNotFoundException(Exception): 41 | """Raised when a modelstore user tries to retrieve/amend a domain 42 | that does not exist.""" 43 | 44 | def __init__(self, domain: str): 45 | super().__init__(f"The domain='{domain}' does not exist.") 46 | 47 | 48 | class ModelExistsException(Exception): 49 | """Raised when a modelstore user tries to upload a model that already exists""" 50 | 51 | def __init__(self, domain: str, model_id: str): 52 | super().__init__(f"model='{model_id}' already exists in this domain={domain}.") 53 | -------------------------------------------------------------------------------- /modelstore/utils/log.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import logging 15 | import sys 16 | 17 | def get_logger(): 18 | """Builds the modelstore logger""" 19 | log = logging.getLogger(name="modelstore") 20 | formatter = logging.Formatter("%(asctime)s - %(message)s") 21 | handler = logging.StreamHandler(sys.stderr) 22 | handler.setFormatter(formatter) 23 | log.setLevel(logging.INFO) 24 | log.addHandler(handler) 25 | return log 26 | 27 | 28 | logger = get_logger() 29 | 30 | 31 | def debug_mode(on: bool): 32 | global logger 33 | logger = get_logger() 34 | if not on: 35 | return 36 | logger.setLevel(logging.DEBUG) 37 | for handler in logger.handlers: 38 | handler.setLevel(logging.DEBUG) 39 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -p no:warnings 3 | filterwarnings = 4 | ignore:.*the imp module:DeprecationWarning 5 | ignore:.*.pytest.collect 6 | ignore:.*Using or importing the ABCs:DeprecationWarning 7 | ignore:.*numpy.ufunc size changed 8 | ignore:.*The dataloader 9 | ignore:.*overload of nonzero 10 | ignore:.*scipy.sparse 11 | ignore:.*torch.distributed.reduce_op is deprecated 12 | -------------------------------------------------------------------------------- /requirements-dev0.txt: -------------------------------------------------------------------------------- 1 | # Code & testing 2 | black>=22.12.0 3 | flake8>=5.0.4 4 | isort==5.11.3 # Note: this version is asserted in unit tests 5 | moto>=4.0.11 6 | pylint>=2.15.8 7 | pytest>=7.2.0 8 | mock>=4.0.3 9 | rope>=1.6.0 10 | twine>=4.0.2 11 | 12 | # Data / dependencies for ML libraries 13 | numba>=0.58.1 14 | numpy==1.23.5 15 | Cython>=3.0.8 16 | python-Levenshtein>=0.24.0 17 | pandas>=1.3.5; python_version < '3.8' 18 | pandas>=1.4.1; python_version > '3.7' 19 | scipy==1.10.1 # More recent versions were not compatible with Gensim releases https://github.com/piskvorky/gensim/issues/3525 20 | 21 | # ML Dependencies 22 | # pydoop<=2.0.0; sys_platform == 'darwin' 23 | -------------------------------------------------------------------------------- /requirements-dev1.txt: -------------------------------------------------------------------------------- 1 | # Dependencies for storage libraries 2 | protobuf>=3.19.5 # https://github.com/protocolbuffers/protobuf/issues/10051 3 | 4 | # Storage 5 | azure-core 6 | azure-storage-blob 7 | boto3 8 | google-cloud-storage 9 | minio 10 | 11 | # Machine Learning 12 | annoy 13 | catboost 14 | causalml 15 | fastai # Note: 1.0.61 has different import paths! 16 | gensim 17 | Keras-Preprocessing 18 | lightgbm 19 | onnx 20 | onnxruntime 21 | prophet 22 | pyspark 23 | pytorch-lightning 24 | scikit-learn 25 | shap 26 | skl2onnx 27 | skorch 28 | tensorflow; sys_platform != 'darwin' 29 | tensorflow-macos; sys_platform == 'darwin' 30 | tf-keras 31 | torch 32 | torchvision 33 | transformers 34 | xgboost 35 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dataclasses==0.8; python_version < '3.7' 2 | dataclasses-json>=0.5.7 3 | gitpython>=3.1.11 4 | joblib>=1.0.0 5 | requests>=2.23.0 6 | tqdm>=4.54.1 7 | click>=7.0 8 | numpy 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=missing-module-docstring 2 | from setuptools import find_packages, setup 3 | 4 | # pylint: disable=unspecified-encoding 5 | with open("requirements.txt", "r") as lines: 6 | requirements = lines.read().splitlines() 7 | 8 | 9 | setup( 10 | name="modelstore", 11 | version="0.0.81", 12 | packages=find_packages(exclude=["tests", "examples", "docs", "workflows"]), 13 | include_package_data=True, 14 | description="modelstore is a library for versioning, exporting, storing, and loading machine learning models", 15 | long_description="Please refer to: https://modelstore.readthedocs.io/en/latest/", 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/operatorai/modelstore", 18 | author="Neal Lathia", 19 | classifiers=[ 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.7", 22 | "Programming Language :: Python :: 3.8", 23 | "Programming Language :: Python :: 3.9", 24 | "License :: OSI Approved :: Apache Software License", 25 | ], 26 | license="Please refer to the readme", 27 | python_requires=">=3.6", 28 | install_requires=requirements, 29 | entry_points={ 30 | 'console_scripts': ['modelstore=modelstore.__main__:cli'] 31 | }, 32 | ) 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/__init__.py -------------------------------------------------------------------------------- /tests/ids/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/ids/__init__.py -------------------------------------------------------------------------------- /tests/ids/test_model_ids.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import uuid 16 | 17 | import pytest 18 | 19 | from modelstore.ids import model_ids 20 | 21 | # pylint: disable=protected-access 22 | # pylint: disable=missing-function-docstring 23 | 24 | 25 | def test_new() -> str: 26 | model_id = model_ids.new() 27 | assert isinstance(model_id, str) 28 | assert len(model_id) == len(str(uuid.uuid4())) 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "model_id,is_valid", 33 | [ 34 | ("a-model-id", True), 35 | ("a model id", False), 36 | ], 37 | ) 38 | def test_validate_no_spaces(model_id: str, is_valid: bool): 39 | assert model_ids.validate(model_id) == is_valid 40 | 41 | 42 | def test_validate_no_special_characters(): 43 | for character in model_ids._RESERVED_CHARACTERS: 44 | model_id = f"an-invalid-{character}-model-id" 45 | assert not model_ids.validate(model_id) 46 | -------------------------------------------------------------------------------- /tests/metadata/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/metadata/__init__.py -------------------------------------------------------------------------------- /tests/metadata/code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/metadata/code/__init__.py -------------------------------------------------------------------------------- /tests/metadata/code/test_code.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from datetime import datetime 15 | 16 | import pytest 17 | from mock import patch 18 | 19 | from modelstore.metadata import metadata 20 | 21 | # pylint: disable=missing-function-docstring 22 | # pylint: disable=redefined-outer-name 23 | 24 | 25 | @pytest.fixture 26 | def now(): 27 | return datetime.now() 28 | 29 | 30 | @pytest.fixture 31 | def code_meta_data(now): 32 | return metadata.Code( 33 | runtime="python:1.2.3", 34 | user="username", 35 | created=now.strftime("%Y/%m/%d/%H:%M:%S"), 36 | dependencies={}, 37 | git={"repository": "test"}, 38 | ) 39 | 40 | 41 | @patch("modelstore.metadata.code.code.revision") 42 | @patch("modelstore.metadata.code.code.runtime") 43 | def test_generate(mock_runtime, mock_revision, code_meta_data, now): 44 | mock_runtime.get_user.return_value = "username" 45 | mock_runtime.get_python_version.return_value = "python:1.2.3" 46 | mock_revision.git_meta.return_value = {"repository": "test"} 47 | result = metadata.Code.generate([], created=now) 48 | assert code_meta_data == result 49 | 50 | 51 | def test_encode_and_decode(code_meta_data): 52 | # pylint: disable=no-member 53 | json_result = code_meta_data.to_json() 54 | result = metadata.Code.from_json(json_result) 55 | assert result == code_meta_data 56 | -------------------------------------------------------------------------------- /tests/metadata/code/test_dependencies.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | import pytest 17 | 18 | from modelstore.metadata.code import dependencies 19 | 20 | # pylint: disable=protected-access 21 | # pylint: disable=missing-function-docstring 22 | 23 | 24 | def test_get_version(): 25 | assert dependencies._get_version("a-missing-dependency") is None 26 | assert dependencies._get_version("pytest") == pytest.__version__ 27 | if "isort" in sys.modules: 28 | # Force import 29 | del sys.modules["isort"] 30 | assert dependencies._get_version("isort") == "5.11.3" 31 | 32 | 33 | def test_get_dependency_versions(): 34 | test_deps = [ 35 | "annoy", 36 | "pytest", 37 | "pylint", 38 | "black", 39 | "flake8", 40 | "isort", 41 | "a-missing-dependency", 42 | "pickle", 43 | ] 44 | result = dependencies.get_dependency_versions(test_deps) 45 | assert list(result.keys()) == test_deps 46 | 47 | 48 | def test_module_exists(): 49 | assert dependencies.module_exists("pytest") is True 50 | assert dependencies.module_exists("a-missing-mod") is False 51 | -------------------------------------------------------------------------------- /tests/metadata/code/test_revision.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import sys 16 | import subprocess 17 | 18 | import git 19 | import pytest 20 | 21 | from modelstore.metadata.code import revision 22 | 23 | # pylint: disable=missing-function-docstring 24 | 25 | 26 | @pytest.mark.skipif(sys.platform!="darwin", reason="skipping in ubuntu") 27 | def test_repo_name(): 28 | # pylint: disable=protected-access 29 | repo = git.Repo(search_parent_directories=True) 30 | repo_name = revision._repo_name(repo) 31 | if repo_name == "": 32 | # Not a git repo 33 | return 34 | assert repo_name == "modelstore" 35 | 36 | 37 | def test_fail_gracefully(): 38 | # Assumes that there is no git repo at / 39 | current_wd = os.getcwd() 40 | os.chdir("/") 41 | assert revision.git_meta() is None 42 | os.chdir(current_wd) 43 | 44 | 45 | @pytest.mark.skip(reason="This test is flaky when run via Github actions") 46 | def test_git_meta(): 47 | try: 48 | res = subprocess.check_output("git log . | head -n 1", shell=True) 49 | exp = res.decode("utf-8").strip().split(" ")[1] 50 | # pylint: disable=bare-except 51 | except: 52 | # Repo is not a git repo 53 | return 54 | 55 | meta = revision.git_meta() 56 | assert meta is not None 57 | assert meta["repository"] == "modelstore" 58 | if meta["local_changes"] is False: 59 | assert meta["sha"] == exp 60 | -------------------------------------------------------------------------------- /tests/metadata/code/test_runtime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | from mock import patch 17 | 18 | from modelstore.metadata.code import runtime 19 | 20 | # pylint: disable=missing-function-docstring 21 | 22 | 23 | def test_get_python_version(): 24 | vers = sys.version_info 25 | expected = ".".join(str(x) for x in [vers.major, vers.minor, vers.micro]) 26 | assert runtime.get_python_version() == f"python:{expected}" 27 | 28 | 29 | @patch("modelstore.metadata.code.runtime.getpass") 30 | def test_get_user(mock_getpass): 31 | mock_getpass.getuser.return_value = "username" 32 | assert runtime.get_user() == "username" 33 | -------------------------------------------------------------------------------- /tests/metadata/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/metadata/dataset/__init__.py -------------------------------------------------------------------------------- /tests/metadata/dataset/fixtures.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import numpy as np 15 | import pandas as pd 16 | import pytest 17 | 18 | # pylint: disable=redefined-outer-name 19 | # pylint: disable=missing-function-docstring 20 | 21 | 22 | @pytest.fixture 23 | def np_2d_array(): 24 | return np.random.rand(10, 50) 25 | 26 | 27 | @pytest.fixture 28 | def np_1d_array(): 29 | return np.array([1, 2, 1, 2, 1]) 30 | 31 | 32 | @pytest.fixture 33 | def pd_dataframe(): 34 | rows = [] 35 | for _ in range(10): 36 | rows.append({f"col_{j}": j for j in range(50)}) 37 | return pd.DataFrame(rows) 38 | 39 | 40 | @pytest.fixture 41 | def pd_series(): 42 | return pd.Series([1, 2, 1, 2, 1]) 43 | -------------------------------------------------------------------------------- /tests/metadata/dataset/test_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from modelstore.metadata.dataset.dataset import Dataset, Features, Labels 15 | # pylint: disable=unused-import 16 | from tests.metadata.dataset.fixtures import ( 17 | np_1d_array, 18 | np_2d_array, 19 | pd_dataframe, 20 | pd_series 21 | ) 22 | 23 | # pylint: disable=redefined-outer-name 24 | # pylint: disable=missing-function-docstring 25 | 26 | 27 | def test_describe_nothing(): 28 | res = Dataset.generate() 29 | assert res is None 30 | 31 | 32 | def test_describe_numpy(np_2d_array, np_1d_array): 33 | exp = Dataset( 34 | features=Features(shape=[10, 50]), 35 | labels=Labels(shape=[5], values={1: 3, 2: 2}), 36 | ) 37 | res = Dataset.generate(np_2d_array, np_1d_array) 38 | assert exp == res 39 | 40 | 41 | def test_describe_dataframe(pd_dataframe, pd_series): 42 | exp = Dataset( 43 | features=Features(shape=[10, 50]), 44 | labels=Labels(shape=[5], values={1: 3, 2: 2}), 45 | ) 46 | res = Dataset.generate(pd_dataframe, pd_series) 47 | assert exp == res 48 | -------------------------------------------------------------------------------- /tests/metadata/dataset/test_features.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from modelstore.metadata.dataset.features import Features 15 | # pylint: disable=unused-import 16 | from tests.metadata.dataset.fixtures import ( 17 | np_1d_array, 18 | np_2d_array, 19 | pd_dataframe 20 | ) 21 | 22 | # pylint: disable=redefined-outer-name 23 | # pylint: disable=missing-function-docstring 24 | 25 | 26 | def test_describe_nothing(): 27 | res = Features.generate() 28 | assert res is None 29 | 30 | 31 | def test_describe_numpy_2d(np_2d_array): 32 | exp = Features(shape=[10, 50]) 33 | res = Features.generate(np_2d_array) 34 | assert exp == res 35 | 36 | 37 | def test_describe_numpy_1d(np_1d_array): 38 | exp = Features(shape=[5]) 39 | res = Features.generate(np_1d_array) 40 | assert exp == res 41 | 42 | 43 | def test_describe_dataframe(pd_dataframe): 44 | exp = Features(shape=[10, 50]) 45 | res = Features.generate(pd_dataframe) 46 | assert exp == res 47 | -------------------------------------------------------------------------------- /tests/metadata/dataset/test_labels.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from modelstore.metadata.dataset.labels import Labels 15 | # pylint: disable=unused-import 16 | from tests.metadata.dataset.fixtures import ( 17 | np_1d_array, 18 | np_2d_array, 19 | pd_dataframe, 20 | pd_series 21 | ) 22 | 23 | # pylint: disable=redefined-outer-name 24 | # pylint: disable=missing-function-docstring 25 | 26 | 27 | def test_describe_nothing(): 28 | res = Labels.generate() 29 | assert res is None 30 | 31 | 32 | def test_describe_numpy_2d(np_2d_array): 33 | exp = Labels(shape=[10, 50], values=None) 34 | res = Labels.generate(np_2d_array) 35 | assert exp == res 36 | 37 | 38 | def test_describe_numpy_1d(np_1d_array): 39 | exp = Labels(shape=[5], values={1: 3, 2: 2}) 40 | res = Labels.generate(np_1d_array) 41 | assert exp == res 42 | 43 | 44 | def test_describe_dataframe(pd_dataframe): 45 | exp = Labels(shape=[10, 50], values=None) 46 | res = Labels.generate(pd_dataframe) 47 | assert exp == res 48 | 49 | 50 | def test_describe_series(pd_series): 51 | exp = Labels(shape=[5], values={1: 3, 2: 2}) 52 | res = Labels.generate(pd_series) 53 | assert exp == res 54 | -------------------------------------------------------------------------------- /tests/metadata/dataset/test_types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from modelstore.metadata.dataset.types import ( 15 | is_numpy_array, 16 | is_pandas_dataframe, 17 | is_pandas_series 18 | ) 19 | # pylint: disable=unused-import 20 | from tests.metadata.dataset.fixtures import ( 21 | np_1d_array, 22 | np_2d_array, 23 | pd_dataframe, 24 | pd_series 25 | ) 26 | 27 | # pylint: disable=redefined-outer-name 28 | # pylint: disable=missing-function-docstring 29 | 30 | 31 | def test_is_numpy_array(np_1d_array, np_2d_array, pd_dataframe, pd_series): 32 | assert is_numpy_array(np_1d_array) 33 | assert is_numpy_array(np_2d_array) 34 | assert not is_numpy_array(pd_dataframe) 35 | assert not is_numpy_array(pd_series) 36 | 37 | 38 | def is_pandas_dataframe(np_1d_array, np_2d_array, pd_dataframe, pd_series): 39 | assert not is_pandas_dataframe(np_1d_array) 40 | assert not is_pandas_dataframe(np_2d_array) 41 | assert is_pandas_dataframe(pd_dataframe) 42 | assert not is_pandas_dataframe(pd_series) 43 | 44 | 45 | def test_is_pandas_series(np_1d_array, np_2d_array, pd_dataframe, pd_series): 46 | assert not is_pandas_series(np_1d_array) 47 | assert not is_pandas_series(np_2d_array) 48 | assert not is_pandas_series(pd_dataframe) 49 | assert is_pandas_series(pd_series) 50 | -------------------------------------------------------------------------------- /tests/metadata/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/metadata/model/__init__.py -------------------------------------------------------------------------------- /tests/metadata/model/test_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | 16 | from modelstore.metadata import metadata 17 | 18 | # pylint: disable=missing-function-docstring 19 | # pylint: disable=redefined-outer-name 20 | 21 | 22 | @pytest.fixture 23 | def model_meta_data(): 24 | return metadata.Model( 25 | domain="domain", 26 | model_id="model_id", 27 | model_type=metadata.ModelType.generate("library", "class-name"), 28 | parameters=None, 29 | data=None, 30 | ) 31 | 32 | 33 | def test_generate(model_meta_data): 34 | result = metadata.Model.generate( 35 | "domain", 36 | "model_id", 37 | metadata.ModelType.generate("library", "class-name"), 38 | ) 39 | assert model_meta_data == result 40 | 41 | 42 | def test_encode_and_decode(model_meta_data): 43 | # pylint: disable=no-member 44 | json_result = model_meta_data.to_json() 45 | result = metadata.Model.from_json(json_result) 46 | assert result == model_meta_data 47 | -------------------------------------------------------------------------------- /tests/metadata/model/test_model_type.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | 16 | from modelstore.metadata import metadata 17 | 18 | # pylint: disable=missing-function-docstring 19 | # pylint: disable=redefined-outer-name 20 | 21 | 22 | @pytest.fixture 23 | def model_type_meta_data(): 24 | return metadata.ModelType( 25 | library="a-library", 26 | type="a-class-name", 27 | models=None, 28 | ) 29 | 30 | 31 | @pytest.fixture 32 | def nested_model_type_meta_data(): 33 | return metadata.ModelType( 34 | library="multiple-models", 35 | type=None, 36 | models=[ 37 | metadata.ModelType.generate("sklearn", "RandomForestClassifier"), 38 | metadata.ModelType.generate("shap", "TreeExplainer"), 39 | ], 40 | ) 41 | 42 | 43 | def test_generate(model_type_meta_data): 44 | result = metadata.ModelType.generate( 45 | library="a-library", 46 | class_name="a-class-name", 47 | ) 48 | assert model_type_meta_data == result 49 | 50 | 51 | def test_encode_and_decode(model_type_meta_data): 52 | # pylint: disable=no-member 53 | json_result = model_type_meta_data.to_json() 54 | result = metadata.ModelType.from_json(json_result) 55 | assert result == model_type_meta_data 56 | 57 | 58 | def test_generate_multiple_models(nested_model_type_meta_data): 59 | result = metadata.ModelType.generate( 60 | "multiple-models", 61 | models=[ 62 | metadata.ModelType.generate("sklearn", "RandomForestClassifier"), 63 | metadata.ModelType.generate("shap", "TreeExplainer"), 64 | ], 65 | ) 66 | assert nested_model_type_meta_data == result 67 | 68 | 69 | def test_encode_and_decode_nested(nested_model_type_meta_data): 70 | # pylint: disable=no-member 71 | json_result = nested_model_type_meta_data.to_json() 72 | result = metadata.ModelType.from_json(json_result) 73 | assert result == nested_model_type_meta_data 74 | -------------------------------------------------------------------------------- /tests/metadata/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/metadata/storage/__init__.py -------------------------------------------------------------------------------- /tests/metadata/storage/test_storage.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | 16 | from modelstore.metadata import metadata 17 | 18 | # pylint: disable=missing-function-docstring 19 | # pylint: disable=redefined-outer-name 20 | # pylint: disable=no-member 21 | 22 | 23 | def test_generate_from_path(): 24 | expected = metadata.Storage( 25 | type="file_system", 26 | root="root", 27 | path="/path/to/files", 28 | bucket=None, 29 | container=None, 30 | prefix=None, 31 | ) 32 | result = metadata.Storage.from_path( 33 | "file_system", 34 | "root", 35 | "/path/to/files", 36 | ) 37 | assert expected == result 38 | 39 | result_dict = json.loads(result.to_json()) 40 | assert "container" not in result_dict 41 | assert result_dict["type"] == "file_system" 42 | 43 | loaded = metadata.Storage.from_json(result.to_json()) 44 | assert loaded == expected 45 | 46 | 47 | def test_generate_from_container(): 48 | expected = metadata.Storage( 49 | type="container-system", 50 | path=None, 51 | bucket=None, 52 | container="container-name", 53 | prefix="/path/to/files", 54 | ) 55 | result = metadata.Storage.from_container( 56 | "container-system", "container-name", "/path/to/files" 57 | ) 58 | assert expected == result 59 | 60 | 61 | def test_generate_from_bucket(): 62 | expected = metadata.Storage( 63 | type="bucket-system", 64 | path=None, 65 | bucket="bucket-name", 66 | container=None, 67 | prefix="/path/to/files", 68 | ) 69 | result = metadata.Storage.from_bucket( 70 | "bucket-system", "bucket-name", "/path/to/files" 71 | ) 72 | assert expected == result 73 | -------------------------------------------------------------------------------- /tests/metadata/test_metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from datetime import datetime 16 | 17 | import pytest 18 | 19 | import modelstore 20 | from modelstore.metadata import metadata 21 | 22 | # pylint: disable=redefined-outer-name 23 | # pylint: disable=protected-access 24 | # pylint: disable=missing-function-docstring 25 | # pylint: disable=no-member 26 | 27 | 28 | @pytest.fixture 29 | def extra_meta_data() -> dict: 30 | return { 31 | "field-1": "value", 32 | "field-2": ["list", "of", "values"], 33 | } 34 | 35 | 36 | @pytest.fixture 37 | def meta_data(extra_meta_data): 38 | return metadata.Summary( 39 | code=metadata.Code( 40 | runtime="python:1.2.3", 41 | user="username", 42 | created=datetime.now().strftime("%Y/%m/%d/%H:%M:%S"), 43 | dependencies={}, 44 | git={"repository": "test"}, 45 | ), 46 | model=metadata.Model.generate( 47 | domain="domain", 48 | model_id="model-id", 49 | model_type=metadata.ModelType.generate( 50 | "library", 51 | "class-name", 52 | ), 53 | ), 54 | storage=metadata.Storage.from_path( 55 | "example-storage-type", 56 | "root-directory", 57 | "path/to/files", 58 | ), 59 | modelstore=modelstore.__version__, 60 | extra=extra_meta_data, 61 | ) 62 | 63 | 64 | def test_generate(meta_data, extra_meta_data): 65 | result = metadata.Summary.generate( 66 | code_meta_data=meta_data.code, 67 | model_meta_data=meta_data.model, 68 | storage_meta_data=meta_data.storage, 69 | extra_metadata=extra_meta_data, 70 | ) 71 | assert result == meta_data 72 | 73 | encoded = result.to_json() 74 | decoded = metadata.Summary.from_json(encoded) 75 | assert decoded == meta_data 76 | assert decoded.code == meta_data.code 77 | assert decoded.model == meta_data.model 78 | assert decoded.storage == meta_data.storage 79 | 80 | 81 | def test_dump_and_load(meta_data, tmp_path): 82 | target_file = os.path.join(tmp_path, "meta.json") 83 | assert not os.path.exists(target_file) 84 | meta_data.dumps(target_file) 85 | assert os.path.exists(target_file) 86 | # pylint: disable=bare-except 87 | # pylint: disable=unspecified-encoding 88 | 89 | loaded = metadata.Summary.loads(target_file) 90 | assert loaded == meta_data 91 | -------------------------------------------------------------------------------- /tests/metadata/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/metadata/utils/__init__.py -------------------------------------------------------------------------------- /tests/metadata/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import numpy as np 15 | import pytest 16 | 17 | from modelstore.metadata.utils.utils import ( 18 | exclude_field, 19 | remove_nones, 20 | validate_json_serializable 21 | ) 22 | 23 | # pylint: disable=missing-function-docstring 24 | 25 | 26 | def test_remove_nones(): 27 | exp = {"a": "value-a"} 28 | res = remove_nones({"a": "value-a", "b": None}) 29 | assert exp == res 30 | 31 | 32 | @pytest.mark.parametrize( 33 | "value,should_exclude", 34 | [ 35 | (None, True), 36 | ("", False), 37 | (1, False), 38 | ], 39 | ) 40 | def test_exclude_field(value, should_exclude): 41 | assert exclude_field(value) == should_exclude 42 | 43 | 44 | @pytest.mark.parametrize( 45 | "value,should_raise", 46 | [ 47 | ({}, False), 48 | ({"key": 1}, False), 49 | ([], True), # Not a dictionary 50 | ({"key": np.array([1, 2, 3])}, True), # Not JSON serializable 51 | ], 52 | ) 53 | def test_validate_json_serializable(value, should_raise): 54 | """Validates that `value` is a JSON serializable dictionary""" 55 | if should_raise: 56 | with pytest.raises(TypeError): 57 | validate_json_serializable("field-name", value) 58 | else: 59 | try: 60 | validate_json_serializable("field-name", value) 61 | # pylint: disable=broad-except 62 | except Exception as exc: 63 | pytest.fail(f"validate_json_serializable() raised: {exc}") 64 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/test_common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | import os 16 | 17 | import pytest 18 | 19 | from modelstore.models import common 20 | 21 | # pylint: disable=redefined-outer-name,missing-function-docstring 22 | 23 | 24 | @pytest.fixture 25 | def value_to_save(): 26 | return {"key": "value"} 27 | 28 | 29 | def test_save_json(tmp_path, value_to_save): 30 | target = common.save_json(tmp_path, "data.json", value_to_save) 31 | with open(target, "r") as lines: 32 | res = json.loads(lines.read()) 33 | assert value_to_save == res 34 | 35 | 36 | def test_save_joblib(tmp_path, value_to_save): 37 | exp_path = os.path.join(tmp_path, "model.joblib") 38 | # Save returns the full path 39 | target = common.save_joblib(tmp_path, value_to_save, file_name="model.joblib") 40 | assert target == exp_path 41 | assert os.path.exists(exp_path) 42 | 43 | # Load takes the full path 44 | res = common.load_joblib(exp_path) 45 | assert value_to_save == res 46 | -------------------------------------------------------------------------------- /tests/models/test_managers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | 16 | from modelstore.models import managers 17 | from modelstore.models.catboost import CatBoostManager 18 | from modelstore.models.pyspark import PySparkManager 19 | from modelstore.models.pytorch import PyTorchManager 20 | from modelstore.models.pytorch_lightning import PyTorchLightningManager 21 | from modelstore.models.sklearn import SKLearnManager 22 | from modelstore.models.tensorflow import TensorflowManager 23 | from modelstore.models.xgboost import XGBoostManager 24 | 25 | # pylint: disable=missing-function-docstring 26 | 27 | 28 | def test_iter_libraries(): 29 | mgrs = {library: manager for library, manager in managers.iter_libraries()} 30 | assert len(mgrs) == 18 31 | assert isinstance(mgrs["sklearn"], SKLearnManager) 32 | assert isinstance(mgrs["pytorch"], PyTorchManager) 33 | assert isinstance(mgrs["xgboost"], XGBoostManager) 34 | assert isinstance(mgrs["catboost"], CatBoostManager) 35 | assert isinstance(mgrs["pytorch_lightning"], PyTorchLightningManager) 36 | assert isinstance(mgrs["pyspark"], PySparkManager) 37 | 38 | 39 | def test_matching_managers_empty_set(): 40 | with pytest.raises(ValueError): 41 | managers.matching_managers([], model="none") 42 | 43 | 44 | def test_no_matching_managers(): 45 | libraries = [m for _, m in managers.iter_libraries()] 46 | with pytest.raises(ValueError): 47 | managers.matching_managers(libraries, model="none") 48 | 49 | 50 | def test_get_keras_manager(): 51 | # The keras manager was merged with the tensorflow one 52 | # in modelstore==0.0.73; here we test explicitly that 53 | # modelstore returns the TensorflowManager for 54 | # backwards compatibility 55 | manager = managers.get_manager("keras") 56 | assert isinstance(manager, TensorflowManager) 57 | 58 | 59 | def test_get_manager(): 60 | # pylint: disable=protected-access 61 | for name, manager_type in managers._LIBRARIES.items(): 62 | manager = managers.get_manager(name) 63 | assert isinstance(manager, manager_type) 64 | 65 | 66 | def test_get_unknown_manager(): 67 | with pytest.raises(KeyError): 68 | managers.get_manager("an-unknown-library") 69 | -------------------------------------------------------------------------------- /tests/models/test_missing_manager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | 16 | from modelstore.models.missing_manager import MissingDepManager 17 | 18 | # pylint: disable=redefined-outer-name,missing-function-docstring 19 | 20 | 21 | @pytest.fixture 22 | def missing_library_manager(): 23 | return MissingDepManager("some-missing-library") 24 | 25 | 26 | def test_missing_dep_create(missing_library_manager): 27 | with pytest.raises(ModuleNotFoundError): 28 | missing_library_manager.upload("test-domain", "model-id") 29 | 30 | 31 | def test_missing_manager_matches_with(missing_library_manager): 32 | assert not missing_library_manager.matches_with(model="value") 33 | 34 | 35 | def test_load_model(missing_library_manager): 36 | with pytest.raises(ModuleNotFoundError): 37 | missing_library_manager.load("model-path", None) 38 | -------------------------------------------------------------------------------- /tests/models/test_model_file.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from tempfile import TemporaryDirectory 16 | 17 | import pytest 18 | 19 | from modelstore.metadata import metadata 20 | from modelstore.models.model_file import ModelFileManager, copy_file 21 | 22 | # pylint: disable=protected-access 23 | # pylint: disable=redefined-outer-name 24 | # pylint: disable=missing-function-docstring 25 | 26 | 27 | @pytest.fixture 28 | def model_file(tmpdir): 29 | # pylint: disable=unspecified-encoding 30 | file_path = os.path.join(tmpdir, "model.txt") 31 | with open(file_path, "w") as out: 32 | out.write("example-model-content") 33 | return file_path 34 | 35 | 36 | @pytest.fixture 37 | def model_file_manager(): 38 | return ModelFileManager() 39 | 40 | 41 | def test_model_info(model_file_manager): 42 | exp = metadata.ModelType("model_file", None, None) 43 | assert model_file_manager.model_info() == exp 44 | 45 | 46 | def test_model_data(model_file_manager): 47 | res = model_file_manager.model_data() 48 | assert res is None 49 | 50 | 51 | def test_required_kwargs(model_file_manager): 52 | assert model_file_manager._required_kwargs() == ["model"] 53 | 54 | 55 | def test_matches_with(model_file_manager, model_file): 56 | assert model_file_manager.matches_with(model=model_file) 57 | assert not model_file_manager.matches_with(model="a-string-value") 58 | assert not model_file_manager.matches_with(classifier=model_file) 59 | 60 | 61 | def test_get_functions(model_file_manager, model_file): 62 | assert len(model_file_manager._get_functions(model=model_file)) == 1 63 | with pytest.raises(TypeError): 64 | model_file_manager._get_functions(model="not-a-persisted-model-file") 65 | 66 | 67 | def test_get_params(model_file_manager, model_file): 68 | assert model_file_manager.get_params(model=model_file) == {} 69 | 70 | 71 | def test_copy_file(model_file): 72 | with TemporaryDirectory() as target_dir: 73 | target_file = os.path.join(target_dir, os.path.split(model_file)[1]) 74 | assert not os.path.exists(target_file) 75 | copy_file(target_dir, source=model_file) 76 | assert os.path.exists(target_file) 77 | 78 | 79 | def test_load_model(model_file_manager): 80 | with pytest.raises(ValueError): 81 | model_file_manager.load("model-path", None) 82 | -------------------------------------------------------------------------------- /tests/models/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | from random import randint 16 | 17 | import pandas as pd 18 | import pytest 19 | from sklearn.datasets import make_classification 20 | 21 | # pylint: disable=missing-function-docstring 22 | 23 | 24 | def is_macos() -> bool: 25 | return sys.platform == "darwin" 26 | 27 | 28 | @pytest.fixture(scope="session") 29 | def classification_data(): 30 | X_train, y_train = make_classification( 31 | n_samples=50, 32 | n_features=5, 33 | n_redundant=0, 34 | n_informative=3, 35 | n_clusters_per_class=1, 36 | ) 37 | return X_train, y_train 38 | 39 | 40 | @pytest.fixture(scope="session") 41 | def classification_df(classification_data): 42 | X_train, y_train = classification_data 43 | df = pd.DataFrame( 44 | X_train, 45 | columns=[f"x{i}" for i in range(X_train.shape[1])], 46 | ) 47 | df["y"] = y_train 48 | return df 49 | 50 | 51 | @pytest.fixture(scope="session") 52 | def classification_row(classification_df): 53 | return classification_df.iloc[randint(0, classification_df.shape[0] - 1)] 54 | -------------------------------------------------------------------------------- /tests/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/storage/__init__.py -------------------------------------------------------------------------------- /tests/storage/states/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/storage/states/__init__.py -------------------------------------------------------------------------------- /tests/storage/states/test_model_states.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import pytest 15 | 16 | from modelstore.storage.states.model_states import ( 17 | ReservedModelStates, 18 | is_reserved_state, 19 | is_valid_state_name 20 | ) 21 | 22 | # pylint: disable=missing-function-docstring 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "state_name,is_valid", 27 | [ 28 | (None, False), 29 | ("", False), 30 | ("a", False), 31 | ("path/to/place", False), 32 | ("other", True), 33 | (ReservedModelStates.DELETED.value, False), 34 | ], 35 | ) 36 | def test_is_valid_state_name(state_name, is_valid): 37 | assert is_valid_state_name(state_name) == is_valid 38 | 39 | 40 | def test_is_reserved_state(): 41 | for reserved_state in ReservedModelStates: 42 | assert is_reserved_state(reserved_state.value) 43 | -------------------------------------------------------------------------------- /tests/storage/test_blob_storage.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from datetime import datetime, timedelta 16 | from pathlib import Path 17 | 18 | import pytest 19 | 20 | from modelstore.metadata import metadata 21 | from modelstore.storage.local import FileSystemStorage 22 | 23 | # pylint: disable=missing-function-docstring 24 | 25 | 26 | def mock_meta_data(domain: str, model_id: str, inc_time: int) -> metadata.Summary: 27 | return metadata.Summary.generate( 28 | model_meta_data=metadata.Model.generate( 29 | domain=domain, 30 | model_id=model_id, 31 | model_type=None, 32 | ), 33 | code_meta_data=metadata.Code.generate( 34 | deps_list=[], 35 | created=datetime.now() + timedelta(hours=inc_time), 36 | ), 37 | storage_meta_data=None, 38 | ) 39 | 40 | 41 | @pytest.fixture 42 | def mock_model_file(tmp_path): 43 | model_file = os.path.join(tmp_path, "test-file.txt") 44 | Path(model_file).touch() 45 | return model_file 46 | 47 | 48 | @pytest.fixture 49 | def mock_blob_storage(tmp_path): 50 | return FileSystemStorage(str(tmp_path)) 51 | 52 | 53 | def assert_file_contents_equals(file_path: str, expected: metadata.Summary): 54 | # pylint: disable=unspecified-encoding 55 | # pylint: disable=no-member 56 | with open(file_path, "r") as lines: 57 | actual = metadata.Summary.from_json(lines.read()) 58 | assert expected == actual 59 | -------------------------------------------------------------------------------- /tests/storage/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | import os 16 | from tempfile import TemporaryDirectory 17 | 18 | # pylint: disable=redefined-outer-name 19 | # pylint: disable=missing-function-docstring 20 | TEST_FILE_NAME = "test-file.txt" 21 | TEST_FILE_CONTENTS = json.dumps({"k": "v"}) 22 | TEST_FILE_LIST = [f"test-file-{i}.json" for i in range(3)] 23 | TEST_FILE_TYPES = ["json", "txt"] 24 | 25 | 26 | def create_file(tmp_path, contents: str = None) -> str: 27 | # pylint: disable=unspecified-encoding 28 | if contents is None: 29 | contents = TEST_FILE_CONTENTS 30 | source = os.path.join(tmp_path, TEST_FILE_NAME) 31 | with open(source, "w") as out: 32 | out.write(contents) 33 | return source 34 | 35 | 36 | def file_contains_expected_contents(file_path: str) -> bool: 37 | # pylint: disable=unspecified-encoding 38 | with open(file_path, "r") as lines: 39 | contents = lines.read() 40 | return contents == TEST_FILE_CONTENTS 41 | 42 | 43 | def remote_path() -> str: 44 | return "prefix/to/file" 45 | 46 | 47 | def remote_file_path() -> str: 48 | return os.path.join(remote_path(), TEST_FILE_NAME) 49 | 50 | 51 | def push_temp_file(storage, contents: str = None) -> str: 52 | with TemporaryDirectory() as tmp_dir: 53 | # pylint: disable=protected-access 54 | result = storage._push( 55 | create_file(tmp_dir, contents), 56 | remote_file_path(), 57 | ) 58 | return result 59 | 60 | 61 | def push_temp_files(storage, prefix, file_types=TEST_FILE_TYPES): 62 | with TemporaryDirectory() as tmp_dir: 63 | for file_type in file_types: 64 | file_name = f"test-file-source.{file_type}" 65 | file_path = os.path.join(tmp_dir, file_name) 66 | # pylint: disable=unspecified-encoding 67 | with open(file_path, "w") as out: 68 | out.write(json.dumps({"key": "value"})) 69 | 70 | # Push the file to storage 71 | # pylint: disable=protected-access 72 | result = storage._push(file_path, os.path.join(prefix, file_name)) 73 | assert result == os.path.join(prefix, file_name) 74 | -------------------------------------------------------------------------------- /tests/storage/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/tests/storage/util/__init__.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | from functools import partial 16 | from pathlib import Path, PosixPath 17 | 18 | import pytest 19 | 20 | from modelstore.model_store import ModelStore 21 | from modelstore.models.managers import _LIBRARIES 22 | from modelstore.models.missing_manager import MissingDepManager 23 | from modelstore.models.model_manager import ModelManager 24 | 25 | # pylint: disable=protected-access 26 | # pylint: disable=redefined-outer-name 27 | # pylint: disable=missing-function-docstring 28 | 29 | 30 | def libraries_without_sklearn(): 31 | libraries = _LIBRARIES.copy() 32 | libraries.pop("sklearn") 33 | return libraries 34 | 35 | 36 | def iter_only_sklearn(_): 37 | for k, v in _LIBRARIES.items(): 38 | if k == "sklearn": 39 | yield k, v() 40 | else: 41 | yield k, partial(MissingDepManager, library=k)() 42 | 43 | 44 | def validate_library_attributes(store: ModelStore, allowed: list, not_allowed: list): 45 | # During dev mode, all libraries will be installed 46 | for library in allowed: 47 | assert hasattr(store, library) 48 | mgr = store.__getattribute__(library) 49 | assert issubclass(type(mgr), ModelManager) 50 | assert not isinstance(mgr, MissingDepManager) 51 | 52 | for library in not_allowed: 53 | assert hasattr(store, library) 54 | mgr = store.__getattribute__(library) 55 | assert issubclass(type(mgr), ModelManager) 56 | assert isinstance(mgr, MissingDepManager) 57 | with pytest.raises(ModuleNotFoundError): 58 | mgr.upload(domain="test", model_id="model-id", model="test") 59 | 60 | 61 | @pytest.fixture 62 | def model_file(tmp_path: PosixPath): 63 | file_path = os.path.join(tmp_path, "model.txt") 64 | Path(file_path).touch() 65 | return file_path 66 | -------------------------------------------------------------------------------- /workflows/Makefile: -------------------------------------------------------------------------------- 1 | VIRTUALENV_NAME=modelstore.$(shell pwd | rev | cut -d '/' -f 1 | rev) 2 | REPO_ROOT=$(shell cd ../ && pwd) 3 | 4 | .PHONY: setup pyenv pyenv-uninstall refresh 5 | 6 | pyenv: pyenv-uninstall 7 | @$(REPO_ROOT)/bin/_pyenv_install $(VIRTUALENV_NAME) 8 | find requirements/ -name "*.txt" -type f -exec pip install -r '{}' ';' 9 | 10 | refresh: 11 | @echo "\n 🔵 Refreshing installation of modelstore" 12 | pip install --upgrade pip setuptools wheel 13 | pip uninstall -y modelstore 14 | pip install --no-cache-dir -e $(REPO_ROOT) 15 | 16 | pyenv-uninstall: 17 | @$(REPO_ROOT)/bin/_pyenv_uninstall $(VIRTUALENV_NAME) 18 | 19 | setup: 20 | pip install --upgrade pip setuptools wheel 21 | pip install -r requirements.txt 22 | -------------------------------------------------------------------------------- /workflows/actions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/workflows/actions/__init__.py -------------------------------------------------------------------------------- /workflows/actions/actions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Any, List 15 | 16 | from modelstore import ModelStore 17 | # pylint: disable=import-error 18 | from workflows.actions import cli, models, storage 19 | 20 | MODEL_DOMAIN = "diabetes-boosting-demo" 21 | 22 | 23 | def run_on_storage(model_store: ModelStore): 24 | """Runs a series of actions on `model_store` that don't require a model""" 25 | for func in storage.get_actions(): 26 | print(f"🔍 Running {str(func)}") 27 | func(model_store, MODEL_DOMAIN) 28 | print("✅ Storage assertions passed") 29 | 30 | 31 | def run_with_model( 32 | model_store: ModelStore, model: Any, extra_metadata: dict, extra_files: List[str] 33 | ): 34 | """Runs a series of actions on `model_store` using `model`""" 35 | meta_data = model_store.upload( 36 | domain=MODEL_DOMAIN, 37 | model=model, 38 | extra_metadata=extra_metadata, 39 | extras=extra_files, 40 | ) 41 | model_id = meta_data["model"]["model_id"] 42 | print(f"✅ Finished uploading the model={model_id}") 43 | 44 | for func in models.get_actions(): 45 | print(f"🔍 Running {str(func)}") 46 | func(model_store, MODEL_DOMAIN, meta_data) 47 | print("✅ Model assertions passed") 48 | 49 | 50 | def run_cli_commands(model_path: str): 51 | """Runs a series of CLI commands""" 52 | model_id = cli.assert_upload_runs(MODEL_DOMAIN, model_path) 53 | cli.assert_download_runs(MODEL_DOMAIN, model_id) 54 | -------------------------------------------------------------------------------- /workflows/actions/cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import subprocess 16 | import tempfile 17 | 18 | 19 | def _run_cli_command(args: list) -> str: 20 | """Runs a modelstore CLI command""" 21 | command = ["python", "-m", "modelstore"] + args 22 | print(f"⏱ Running: {command}") 23 | return ( 24 | subprocess.run( 25 | command, 26 | stdout=subprocess.PIPE, 27 | check=True, 28 | ) 29 | .stdout.decode("utf-8") 30 | .replace("\n", "") 31 | ) 32 | 33 | 34 | def assert_upload_runs(domain: str, model_path: str) -> str: 35 | """Runs the 'python -m modelstore upload' command""" 36 | assert os.path.exists(model_path) 37 | model_id = _run_cli_command( 38 | [ 39 | "upload", 40 | domain, 41 | model_path, 42 | ] 43 | ) 44 | assert model_id is not None 45 | assert model_id != "" 46 | print(f"✅ CLI command uploaded model={model_id}") 47 | return model_id 48 | 49 | 50 | def assert_download_runs(domain: str, model_id: str): 51 | """Runs the 'python -m modelstore download' command 52 | in a temporary directory""" 53 | with tempfile.TemporaryDirectory() as tmp_dir: 54 | _run_cli_command(["download", domain, model_id, str(tmp_dir)]) 55 | model_dir = os.path.join(tmp_dir, domain, model_id) 56 | assert os.path.exists(model_dir) 57 | assert len(os.listdir(model_dir)) != 0 58 | print(f"✅ CLI command downloaded model={model_id}") 59 | -------------------------------------------------------------------------------- /workflows/actions/storage.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Callable, List 15 | 16 | from modelstore import ModelStore 17 | from modelstore.utils import exceptions 18 | 19 | 20 | def assert_get_missing_domain_raises(model_store: ModelStore, _: str): 21 | """Calling get_domain() with an unknown domain raises an exception""" 22 | try: 23 | _ = model_store.get_domain("missing-domain") 24 | except exceptions.DomainNotFoundException: 25 | print("✅ Raises a DomainNotFoundException if it can't find a domain") 26 | return 27 | raise AssertionError("failed to raise DomainNotFoundException") 28 | 29 | 30 | def assert_create_model_states(model_store: ModelStore, _: str): 31 | """Creating, listing and getting model states""" 32 | state_names = ["staging", "production"] 33 | for state_name in state_names: 34 | model_store.create_model_state(state_name) 35 | model_state_names = model_store.list_model_states() 36 | 37 | for state_name in state_names: 38 | assert state_name in model_state_names 39 | print(f"✅ Created {len(state_names)} model states.") 40 | 41 | 42 | def get_actions() -> List[Callable]: 43 | """Returns the set of actions that can be run on a model_store""" 44 | return [ 45 | assert_get_missing_domain_raises, 46 | assert_create_model_states, 47 | ] 48 | -------------------------------------------------------------------------------- /workflows/fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/workflows/fixtures/__init__.py -------------------------------------------------------------------------------- /workflows/fixtures/extra.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import json 15 | import os 16 | from typing import List 17 | 18 | 19 | def metadata() -> dict: 20 | """Returns a dictionary that contains extra metadata 21 | to upload alongside the model""" 22 | return { 23 | "field_name": "value", 24 | } 25 | 26 | 27 | def files(tmp_dir, num_files: int = 2) -> List[str]: 28 | """Returns the paths to files that contain 29 | extra data to upload alongside the model""" 30 | results = [] 31 | for i in range(num_files): 32 | result = os.path.join(tmp_dir, f"result-{i}.json") 33 | # pylint: disable=unspecified-encoding 34 | with open(result, "w") as out: 35 | out.write( 36 | json.dumps( 37 | { 38 | "field-1": "value-1", 39 | "field-2": "value-2", 40 | } 41 | ) 42 | ) 43 | results.append(result) 44 | return results 45 | -------------------------------------------------------------------------------- /workflows/fixtures/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import joblib 17 | import xgboost as xgb 18 | from sklearn.datasets import load_diabetes 19 | from sklearn.ensemble import GradientBoostingRegressor 20 | from sklearn.model_selection import train_test_split 21 | 22 | 23 | def _load_dataset(): 24 | diabetes = load_diabetes() 25 | X_train, _, y_train, _ = train_test_split( 26 | diabetes.data, diabetes.target, test_size=0.1, random_state=13 27 | ) 28 | return X_train, y_train 29 | 30 | 31 | def _train_sklearn(X_train, y_train): 32 | model = GradientBoostingRegressor( 33 | **{ 34 | "n_estimators": 500, 35 | "max_depth": 4, 36 | "min_samples_split": 5, 37 | "learning_rate": 0.01, 38 | "loss": "absolute_error", 39 | } 40 | ) 41 | model.fit(X_train, y_train) 42 | return model 43 | 44 | 45 | def _train_xgboost(X_train, y_train): 46 | model = xgb.XGBRegressor( 47 | objective="reg:squarederror", 48 | colsample_bytree=0.3, 49 | learning_rate=0.1, 50 | max_depth=5, 51 | alpha=10, 52 | n_estimators=10, 53 | ) 54 | model.fit(X_train, y_train) 55 | return model 56 | 57 | 58 | def iter_models(): 59 | """Generator for test models""" 60 | X_train, y_train = _load_dataset() 61 | models = [_train_sklearn, _train_xgboost] 62 | for model in models: 63 | yield model(X_train, y_train) 64 | 65 | 66 | def iter_model_files(tmp_dir: str): 67 | """Generator for test model files""" 68 | for model in iter_models(): 69 | model_path = os.path.join(tmp_dir, "model.joblib") 70 | joblib.dump(model, model_path) 71 | yield model_path 72 | -------------------------------------------------------------------------------- /workflows/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Neal Lathia 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | import tempfile 16 | 17 | import click 18 | from actions import actions 19 | from fixtures import extra 20 | from fixtures.models import iter_model_files, iter_models 21 | from fixtures.modelstores import create_model_store 22 | 23 | 24 | @click.command() 25 | @click.option( 26 | "--modelstore-in", 27 | type=click.Choice( 28 | [ 29 | "filesystem", 30 | "aws-s3", 31 | "google-cloud-storage", 32 | "azure-container", 33 | "minio", 34 | ] 35 | ), 36 | ) 37 | def main(modelstore_in: str): 38 | """Executes all of the integration tests in a given storage type""" 39 | print(f"🆕 Running modelstore example with {modelstore_in} backend.") 40 | 41 | # Create a modelstore instance 42 | model_store = create_model_store(modelstore_in) 43 | 44 | # Run actions on the modelstore instance that are 45 | # not dependent on a trained model (e.g. model states) 46 | print("⏱ Running storage actions") 47 | actions.run_on_storage(model_store) 48 | 49 | # Run actions on the modelstore instance that 50 | # are dependent on a trained model (e.g. upload, download) 51 | print("⏱ Running storage + model actions") 52 | with tempfile.TemporaryDirectory() as tmp_dir: 53 | extra_files = extra.files(tmp_dir, num_files=2) 54 | for model in iter_models(): 55 | actions.run_with_model( 56 | model_store, 57 | model, 58 | extra_metadata=extra.metadata(), 59 | extra_files=extra_files, 60 | ) 61 | 62 | # Run CLI actions - the storage type is read from an 63 | # environment variable 64 | os.environ["MODEL_STORE_STORAGE"] = modelstore_in 65 | print(f"⏱ Running CLI actions for: {os.environ['MODEL_STORE_STORAGE']}") 66 | with tempfile.TemporaryDirectory() as tmp_dir: 67 | for model_path in iter_model_files(tmp_dir): 68 | actions.run_cli_commands(model_path) 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /workflows/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn>=0.23.2 2 | xgboost>=1.2.0 3 | click>=7.1.2 -------------------------------------------------------------------------------- /workflows/requirements/aws-s3.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.14.56 -------------------------------------------------------------------------------- /workflows/requirements/azure-container.txt: -------------------------------------------------------------------------------- 1 | azure-core>=1.13.0 2 | azure-storage-blob>=12.8.0 -------------------------------------------------------------------------------- /workflows/requirements/filesystem.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/operatorai/modelstore/a180993317c1a1209aa7ec25f755d82fdadf90a9/workflows/requirements/filesystem.txt -------------------------------------------------------------------------------- /workflows/requirements/google-cloud-storage.txt: -------------------------------------------------------------------------------- 1 | protobuf==3.20.1 # https://github.com/protocolbuffers/protobuf/issues/10051 2 | google-cloud-storage>=1.31.0 -------------------------------------------------------------------------------- /workflows/requirements/minio.txt: -------------------------------------------------------------------------------- 1 | minio>=7.1.12 --------------------------------------------------------------------------------