├── benchmark.md ├── docker ├── .keep ├── frontend.Dockerfile ├── docker-compose-cpu-modelhub.yml ├── docker-compose-cuda10.2-modelhub.yml ├── Dockerfile └── cpu.Dockerfile ├── k8s └── .keep ├── modelci ├── hub │ ├── compressor │ │ └── .keep │ ├── deployer │ │ ├── k8s │ │ │ ├── __init__.py │ │ │ ├── README.md │ │ │ └── dispatch_api_template.yml │ │ ├── onnxs │ │ │ ├── __init__.py │ │ │ ├── proto │ │ │ │ ├── __init__.py │ │ │ │ └── service.proto │ │ │ ├── .dockerignore │ │ │ ├── deploy_model_cpu.sh │ │ │ ├── deploy_model_gpu.sh │ │ │ ├── environment.yml │ │ │ ├── onnx-serve-cpu.Dockerfile │ │ │ ├── onnx-serve-gpu.Dockerfile │ │ │ └── README.md │ │ ├── tfs │ │ │ ├── __init__.py │ │ │ ├── deploy_model_cpu.sh │ │ │ ├── deploy_model_gpu.sh │ │ │ ├── README.md │ │ │ └── rest_client.py │ │ ├── trt │ │ │ ├── __init__.py │ │ │ ├── deploy_model.sh │ │ │ └── gen-hint-environment.yml │ │ ├── pytorch │ │ │ ├── __init__.py │ │ │ ├── proto │ │ │ │ ├── __init__.py │ │ │ │ └── service.proto │ │ │ ├── .dockerignore │ │ │ ├── deploy_model_cpu.sh │ │ │ ├── deploy_model_gpu.sh │ │ │ ├── environment.yml │ │ │ ├── torch-serve-cpu.Dockerfile │ │ │ ├── torch-serve-gpu.Dockerfile │ │ │ ├── utils.py │ │ │ └── README.md │ │ ├── config │ │ │ ├── docker-env.env.example │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── __init__.py │ │ ├── README.md │ │ └── environment.yml │ ├── client │ │ ├── data │ │ │ └── cat.jpg │ │ ├── __init__.py │ │ ├── sample.py │ │ ├── onnx_client.py │ │ ├── torch_client.py │ │ ├── trt_client.py │ │ └── tfs_client.py │ ├── __init__.py │ ├── converter │ │ ├── __init__.py │ │ ├── to_tfs.py │ │ └── to_torchscript.py │ ├── README.md │ └── model_loader.py ├── data_engine │ ├── __init__.py │ ├── readme.md │ ├── preprocessor │ │ ├── __init__.py │ │ └── image_classification.py │ └── postprocessor │ │ ├── __init__.py │ │ └── image_classification.py ├── types │ ├── proto │ │ ├── __init__.py │ │ ├── service.proto │ │ └── service_pb2_grpc.py │ ├── __init__.py │ ├── models │ │ └── __init__.py │ ├── do │ │ ├── __init__.py │ │ ├── profile_result_do.py │ │ ├── static_profile_result_do.py │ │ ├── model_do.py │ │ └── dynamic_profile_result_do.py │ ├── vo │ │ └── __init__.py │ └── bo │ │ ├── __init__.py │ │ ├── static_profile_result_bo.py │ │ └── profile_result_bo.py ├── metrics │ ├── monetary │ │ ├── __init__.py │ │ └── README.md │ ├── cadvisor │ │ ├── __init__.py │ │ └── sample.py │ ├── benchmark │ │ ├── __init__.py │ │ └── README.md │ ├── __init__.py │ └── README.md ├── app │ ├── v1 │ │ ├── readme.md │ │ ├── __init__.py │ │ ├── endpoints │ │ │ ├── __init__.py │ │ │ ├── profiler.py │ │ │ └── visualizer.py │ │ └── api.py │ ├── experimental │ │ ├── __init__.py │ │ ├── endpoints │ │ │ ├── __init__.py │ │ │ ├── drl_tuner.py │ │ │ ├── nlp_tuner.py │ │ │ └── trainer.py │ │ └── api.py │ ├── readme.md │ ├── handler.py │ ├── __init__.py │ └── main.py ├── env-frontend.env ├── controller │ ├── readme.md │ ├── __init__.py │ └── controller.py ├── utils │ ├── __init__.py │ ├── exceptions.py │ ├── docker_api_utils.py │ └── logger.py ├── env-backend.env ├── env-mongodb.env ├── experimental │ ├── __init__.py │ ├── model │ │ ├── __init__.py │ │ └── common.py │ ├── curd │ │ ├── __init__.py │ │ └── model_train.py │ ├── finetuner │ │ ├── __init__.py │ │ └── coordinator.py │ └── mongo_client.py ├── monitor │ ├── __init__.py │ ├── readme.md │ └── gpu_node_exporter.py ├── __init__.py ├── init-mongo.sh ├── persistence │ ├── exceptions.py │ ├── __init__.py │ └── mongo_db.py ├── cli │ ├── modelps.py │ ├── __init__.py │ ├── archive │ │ └── model_cli.py │ └── service.py └── config.py ├── .flake8 ├── frontend ├── .stylelintignore ├── .prettierrc.js ├── .stylelintrc.js ├── screenshot │ ├── main.png │ ├── nonav.png │ ├── wide.png │ ├── nonav_wide.png │ └── visualizer.png ├── src │ ├── global.scss │ ├── app.ts │ ├── layouts │ │ └── BasicLayout │ │ │ ├── components │ │ │ ├── Footer │ │ │ │ ├── index.module.scss │ │ │ │ └── index.tsx │ │ │ ├── Logo │ │ │ │ ├── index.module.scss │ │ │ │ └── index.tsx │ │ │ └── PageNav │ │ │ │ └── index.tsx │ │ │ ├── menuConfig.ts │ │ │ └── index.tsx │ ├── pages │ │ ├── Jobs │ │ │ └── index.css │ │ ├── VersionTree │ │ │ └── utils │ │ │ │ ├── type.tsx │ │ │ │ └── mock.json │ │ ├── User │ │ │ └── index.tsx │ │ ├── Setting │ │ │ └── index.tsx │ │ ├── About │ │ │ └── index.tsx │ │ ├── Dashboard │ │ │ └── components │ │ │ │ └── Pannel │ │ │ │ ├── index.module.scss │ │ │ │ └── index.tsx │ │ ├── ModelRegister │ │ │ └── components │ │ │ │ ├── CustomUpload │ │ │ │ └── index.tsx │ │ │ │ └── CustomInputGroup │ │ │ │ └── index.tsx │ │ ├── Visualizer │ │ │ └── utils │ │ │ │ └── type.tsx │ │ └── index.css │ ├── config.ts │ └── routes.ts ├── .prettierignore ├── .eslintignore ├── .eslintrc.js ├── .editorconfig ├── build.json ├── public │ ├── index.html │ └── favicon.png ├── README.md ├── tsconfig.json └── package.json ├── ops ├── stop_node_exporter.sh ├── README.md └── start_node_exporter.sh ├── docs ├── img │ ├── modelci.png │ └── model-service-block-diagram.png └── tutorial │ ├── retrieve-and-deploy.md │ └── housekeeper.md ├── .codacy.yml ├── scripts ├── init_db.js ├── install.conda_env.sh ├── uninstall.sh ├── install.trtis_client.sh ├── install.sh ├── install.verify.sh └── generate_env.py ├── tests ├── README.md ├── pytest.ini ├── test_onnx_conversion.py ├── test_keras_conversion.py ├── test_xgboost_conversion.py ├── test_lightgbm_conversion.py ├── test_model_api.py └── test_sklearn_conversion.py ├── pyproject.toml ├── .dockerignore ├── example ├── resnet50.yml ├── resnet50_torchscript.yml ├── resnet50_explicit_path.yml ├── retinanet.yml ├── sample_k8s_deployment.conf └── sample_mrcnn.py ├── .github ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── run_test.yml ├── setup.cfg ├── .fossa.yml ├── .travis.yml ├── CHANGELOG.md ├── requirements.txt └── pure_requires.yml /benchmark.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /k8s/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/compressor/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/data_engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/types/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/deployer/k8s/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/deployer/tfs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/deployer/trt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/metrics/monetary/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/metrics/cadvisor/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelci/app/v1/readme.md: -------------------------------------------------------------------------------- 1 | # V1 APIs for ModelCI 2 | -------------------------------------------------------------------------------- /modelci/metrics/monetary/README.md: -------------------------------------------------------------------------------- 1 | # Monetary Cost -------------------------------------------------------------------------------- /modelci/env-frontend.env: -------------------------------------------------------------------------------- 1 | HOST=localhost 2 | PORT=3333 3 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | per-file-ignores = 3 | **/__init__.py: F401 4 | -------------------------------------------------------------------------------- /frontend/.stylelintignore: -------------------------------------------------------------------------------- 1 | build/ 2 | tests/ 3 | demo/ 4 | coverage/ 5 | -------------------------------------------------------------------------------- /modelci/controller/readme.md: -------------------------------------------------------------------------------- 1 | # Controller 2 | 3 | For elastic diagnose 4 | -------------------------------------------------------------------------------- /ops/stop_node_exporter.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | kill -9 "$(lsof -t -i:9100)" 4 | -------------------------------------------------------------------------------- /docs/img/modelci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/docs/img/modelci.png -------------------------------------------------------------------------------- /.codacy.yml: -------------------------------------------------------------------------------- 1 | exclude_paths: 2 | - 'modelci/types/proto/*.py' 3 | - 'modelci/types/**/__init__.py' 4 | -------------------------------------------------------------------------------- /frontend/.prettierrc.js: -------------------------------------------------------------------------------- 1 | const { prettier } = require('@ice/spec'); 2 | 3 | module.exports = prettier; 4 | -------------------------------------------------------------------------------- /modelci/hub/deployer/config/docker-env.env.example: -------------------------------------------------------------------------------- 1 | CUDA_DEVICE_ORDER=PCI_BUS_ID 2 | CUDA_VISIBLE_DEVICES=1 -------------------------------------------------------------------------------- /modelci/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import Logger 2 | 3 | __all__ = ['exceptions', 'misc', 'Logger'] 4 | -------------------------------------------------------------------------------- /frontend/.stylelintrc.js: -------------------------------------------------------------------------------- 1 | const { stylelint } = require('@ice/spec'); 2 | 3 | module.exports = stylelint; 4 | -------------------------------------------------------------------------------- /frontend/screenshot/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/frontend/screenshot/main.png -------------------------------------------------------------------------------- /frontend/screenshot/nonav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/frontend/screenshot/nonav.png -------------------------------------------------------------------------------- /frontend/screenshot/wide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/frontend/screenshot/wide.png -------------------------------------------------------------------------------- /modelci/hub/client/data/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/modelci/hub/client/data/cat.jpg -------------------------------------------------------------------------------- /frontend/screenshot/nonav_wide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/frontend/screenshot/nonav_wide.png -------------------------------------------------------------------------------- /frontend/screenshot/visualizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/frontend/screenshot/visualizer.png -------------------------------------------------------------------------------- /modelci/data_engine/readme.md: -------------------------------------------------------------------------------- 1 | # Data Engine 2 | 3 | For manage the data pre-processing and post-preprocessing. 4 | 5 | -------------------------------------------------------------------------------- /frontend/src/global.scss: -------------------------------------------------------------------------------- 1 | @import '@alifd/next/reset.scss'; 2 | 3 | body { 4 | -webkit-font-smoothing: antialiased; 5 | } 6 | -------------------------------------------------------------------------------- /docs/img/model-service-block-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cap-ntu/ML-Model-CI/HEAD/docs/img/model-service-block-diagram.png -------------------------------------------------------------------------------- /scripts/init_db.js: -------------------------------------------------------------------------------- 1 | db = db.getSiblingDB('modelci'); 2 | db.createUser({user: "modelci", pwd: "modelci@2020", roles: ["readWrite"]}); 3 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Model CI Tests 2 | 3 | ## Test Model Service 4 | ```shell script 5 | pytest tests/test_model_service.py 6 | ``` 7 | -------------------------------------------------------------------------------- /frontend/.prettierignore: -------------------------------------------------------------------------------- 1 | build/ 2 | tests/ 3 | demo/ 4 | .ice/ 5 | coverage/ 6 | **/*-min.js 7 | **/*.min.js 8 | package-lock.json 9 | yarn.lock -------------------------------------------------------------------------------- /modelci/metrics/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['BaseModelInspector', 'ReqThread'] 2 | 3 | from .metric import BaseModelInspector, ReqThread 4 | -------------------------------------------------------------------------------- /modelci/app/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: yuanmingleee 5 | Email: 6 | Date: 1/29/2021 7 | """ 8 | -------------------------------------------------------------------------------- /frontend/.eslintignore: -------------------------------------------------------------------------------- 1 | build/ 2 | tests/ 3 | demo/ 4 | .ice/ 5 | coverage/ 6 | 7 | **/*-min.js 8 | **/*.min.js 9 | 10 | package-lock.json 11 | yarn.lock 12 | -------------------------------------------------------------------------------- /modelci/app/v1/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/20/2020 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/types/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/13/2020 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/env-backend.env: -------------------------------------------------------------------------------- 1 | PROJECT_NAME=modelci 2 | SERVER_HOST=localhost 3 | SERVER_PORT=8000 4 | SECRET_KEY=2a6c03b9ca06cd8fc3cf506f0ba924cb735f15918d54758426fd7282366a5e19 5 | -------------------------------------------------------------------------------- /modelci/types/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 2/17/2021 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/app/v1/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/20/2020 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/env-mongodb.env: -------------------------------------------------------------------------------- 1 | MONGO_HOST=localhost 2 | MONGO_PORT=27017 3 | MONGO_USERNAME=modelci 4 | MONGO_PASSWORD=modelci@2020 5 | MONGO_DB=modelci 6 | MONGO_AUTH_SOURCE=modelci 7 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/.dockerignore: -------------------------------------------------------------------------------- 1 | model/ 2 | .dockerignore 3 | .gitignore 4 | onnx-serve-cpu.Dockerfile 5 | onnx-serve-gpu.Dockerfile 6 | deploy_model_cpu.sh 7 | README.md 8 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/.dockerignore: -------------------------------------------------------------------------------- 1 | model/ 2 | .dockerignore 3 | torch-serve-cpu.Dockerfile 4 | torch-serve-gpu.Dockerfile 5 | deploy_model_cpu.sh 6 | deploy_model_gpu.sh 7 | -------------------------------------------------------------------------------- /frontend/src/app.ts: -------------------------------------------------------------------------------- 1 | import { createApp } from 'ice'; 2 | 3 | const appConfig = { 4 | app: { 5 | rootId: 'ice-container', 6 | }, 7 | }; 8 | 9 | createApp(appConfig); 10 | -------------------------------------------------------------------------------- /modelci/data_engine/preprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_classification import preprocess as image_classification_preprocessor 2 | 3 | __all__ = ['image_classification_preprocessor'] 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=46.4.0", 4 | "wheel", 5 | "distro", 6 | "requests>=2.23.0" 7 | ] 8 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /modelci/app/experimental/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/29/2021 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/app/experimental/endpoints/drl_tuner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/29/2021 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/app/experimental/endpoints/nlp_tuner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/29/2021 7 | """ 8 | -------------------------------------------------------------------------------- /modelci/data_engine/postprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_classification import postprocess as image_classification_postprocessor 2 | 3 | __all__ = ['image_classification_postprocessor'] 4 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | env = 3 | MONGO_HOST=localhost 4 | MONGO_PORT=27017 5 | MONGO_USERNAME=modelci 6 | MONGO_PASSWORD=modelci@2020 7 | MONGO_DB=test 8 | MONGO_AUTH_SOURCE=modelci -------------------------------------------------------------------------------- /modelci/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/31/2021 7 | 8 | ModelCI's experimental APIs. 9 | """ 10 | -------------------------------------------------------------------------------- /modelci/experimental/model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/31/2021 7 | 8 | Data model definition. 9 | """ 10 | -------------------------------------------------------------------------------- /modelci/experimental/curd/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 2/1/2021 7 | 8 | CRUD functions for data access. 9 | """ 10 | -------------------------------------------------------------------------------- /scripts/install.conda_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Install Conda environment 3 | conda env create -f environment.yml 4 | 5 | # Activate conda 6 | source "${CONDA_PREFIX}"/etc/profile.d/conda.sh 7 | conda activate modelci 8 | -------------------------------------------------------------------------------- /modelci/hub/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 9/19/2020 7 | """ 8 | __all__ = ['converter', 'manager', 'client', 'utils', 'profiler'] 9 | -------------------------------------------------------------------------------- /modelci/hub/deployer/__init__.py: -------------------------------------------------------------------------------- 1 | # from .dispatcher import serve 2 | # TODO: circular reference as 3 | # dispatcher depends on manager 4 | # manager depends on profiler 5 | # profiler depends on dispatcher 6 | __all__ = ['dispatcher'] 7 | -------------------------------------------------------------------------------- /frontend/.eslintrc.js: -------------------------------------------------------------------------------- 1 | const { tslint, deepmerge } = require('@ice/spec'); 2 | 3 | module.exports = deepmerge(tslint, { 4 | rules: { 5 | 'react/jsx-filename-extension': 0, 6 | '@typescript-eslint/explicit-function-return-type': 0, 7 | }, 8 | }); 9 | -------------------------------------------------------------------------------- /modelci/app/readme.md: -------------------------------------------------------------------------------- 1 | # FastAPI RESTful Server 2 | 3 | ## Start server 4 | 5 | ```shell script 6 | uvicorn main:app 7 | ``` 8 | 9 | The server will start at http://localhost:8000. 10 | 11 | ## Try APIs 12 | 13 | Open docs at http://localhost:8000/docs 14 | -------------------------------------------------------------------------------- /modelci/monitor/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 9/19/2020 7 | """ 8 | __all__ = ['GPUNodeExporter'] 9 | 10 | from .gpu_node_exporter import GPUNodeExporter 11 | -------------------------------------------------------------------------------- /frontend/src/layouts/BasicLayout/components/Footer/index.module.scss: -------------------------------------------------------------------------------- 1 | .footer { 2 | line-height: 20px; 3 | text-align: center; 4 | } 5 | 6 | .logo { 7 | font-weight: bold; 8 | font-size: 16px; 9 | } 10 | 11 | .copyright { 12 | font-size: 12px; 13 | } 14 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/deploy_model_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d --rm -p "${2}":8000 -p "${3}":8001 \ 4 | --mount type=bind,source="${HOME}"/.modelci/"${1}"/pytorch-onnx,target=/models/"${1}" \ 5 | -e MODEL_NAME="${1}" -t mlmodelci/onnx-serving:latest 6 | -------------------------------------------------------------------------------- /frontend/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 2 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/deploy_model_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d --rm -p "${2}":8000 -p "${3}":8001 \ 4 | --mount type=bind,source="${HOME}"/.modelci/"${1}"/pytorch-torchscript,target=/models/"${1}" \ 5 | -e MODEL_NAME="${1}" -t mlmodelci/pytorch-serving:latest -------------------------------------------------------------------------------- /modelci/hub/deployer/tfs/deploy_model_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d --rm --gpus '"device=1"' -p "${2}":8500 -p "${3}":8501 \ 4 | --mount type=bind,source="${HOME}"/.modelci/"${1}"/tensorflow-tfs,target=/models/"${1}" \ 5 | -e MODEL_NAME="${1}" -t tensorflow/serving 6 | -------------------------------------------------------------------------------- /modelci/hub/deployer/tfs/deploy_model_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d --rm -p "${2}":8500 -p "${3}":8501 --runtime=nvidia \ 4 | --mount type=bind,source="${HOME}"/.modelci/"${1}"/tensorflow-tfs,target=/models/"${1}" \ 5 | -e MODEL_NAME="${1}" -t tensorflow/serving:2.1.0-gpu -------------------------------------------------------------------------------- /modelci/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 9/19/2020 7 | """ 8 | from .cadvisor.cadvisor import CAdvisor 9 | 10 | __all__ = ['benchmark', 'monetary', 'CAdvisor'] 11 | -------------------------------------------------------------------------------- /frontend/src/pages/Jobs/index.css: -------------------------------------------------------------------------------- 1 | @import '~antd/dist/antd.css'; 2 | 3 | .ant-descriptions-item-label,.ant-descriptions-item-content,table { 4 | font-size: 20px; 5 | } 6 | 7 | tr.ant-table-expanded-row .ant-descriptions-view table { 8 | width: 100%; 9 | font-size: 20px; 10 | } -------------------------------------------------------------------------------- /frontend/src/pages/VersionTree/utils/type.tsx: -------------------------------------------------------------------------------- 1 | export interface IGitData { 2 | author: { 3 | name: string | null; 4 | email: string | null; 5 | }; 6 | hash: string; 7 | refs: string[]; 8 | parents: string[]; 9 | subject: string | null; 10 | created_at: string; 11 | } -------------------------------------------------------------------------------- /modelci/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | class ModelCIError(Exception): 2 | """Super class of all ModelCI exception types.""" 3 | pass 4 | 5 | 6 | class ModelStructureError(ValueError): 7 | """ 8 | Exception raised when model structure unable to construct. 9 | """ 10 | pass 11 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ### Python ### 2 | .git/ 3 | 4 | build 5 | dist 6 | *.egg-info 7 | *.egg/ 8 | *.swp 9 | 10 | .tox 11 | .coverage 12 | .pytest_cache 13 | html/* 14 | __pycache__ 15 | 16 | ### Front end ### 17 | node_modules 18 | npm-debug.log 19 | 20 | ### Build cache ### 21 | */**/*.cache 22 | -------------------------------------------------------------------------------- /docker/frontend.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:12-alpine as compile-image 2 | COPY frontend /frontend 3 | WORKDIR /frontend 4 | RUN yarn install 5 | RUN yarn build 6 | 7 | FROM nginx:stable-alpine as build-image 8 | COPY --from=compile-image /frontend/build /usr/share/nginx/html 9 | CMD ["nginx", "-g", "daemon off;"] -------------------------------------------------------------------------------- /modelci/experimental/finetuner/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/12/2021 7 | """ 8 | from pathlib import Path 9 | 10 | OUTPUT_DIR = Path.home() / 'tmp' 11 | OUTPUT_DIR.mkdir(parents=True, exist_ok=True) 12 | -------------------------------------------------------------------------------- /modelci/types/do/__init__.py: -------------------------------------------------------------------------------- 1 | from .dynamic_profile_result_do import DynamicProfileResultDO 2 | from .model_do import IOShapeDO, ModelDO 3 | from .profile_result_do import ProfileResultDO 4 | from .static_profile_result_do import StaticProfileResultDO 5 | 6 | __all__ = [_s for _s in dir() if not _s.startswith('_')] 7 | -------------------------------------------------------------------------------- /ops/README.md: -------------------------------------------------------------------------------- 1 | # Node exporter for our system 2 | 3 | ## Monitor the system status 4 | 5 | The node_exporter is designed to monitor the host system. It's not recommended to deploy it as a Docker container because it requires access to the host system. 6 | 7 | ```bash 8 | sh start_node_exporter.sh 9 | ``` 10 | -------------------------------------------------------------------------------- /frontend/src/pages/User/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Link } from 'ice'; 3 | 4 | const User = () => { 5 | return ( 6 |
7 |

User page

8 |
9 | Home 10 |
11 |
12 | ); 13 | }; 14 | 15 | export default User; 16 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/deploy_model_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d --rm -p "${2}":8000 -p "${3}":8001 \ 4 | --runtime=nvidia \ 5 | --mount type=bind,source="${HOME}"/.modelci/"${1}"/pytorch-onnx,target=/models/"${1}" \ 6 | -e MODEL_NAME="${1}" --env-file docker-env.env -t mlmodelci/onnx-serving:latest-gpu 7 | -------------------------------------------------------------------------------- /frontend/src/pages/Setting/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Link } from 'ice'; 3 | 4 | const Setting = () => { 5 | return ( 6 |
7 |

Setting page

8 |
9 | Home 10 |
11 |
12 | ); 13 | }; 14 | 15 | export default Setting; 16 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/deploy_model_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker run -d --rm -p "${2}":8000 -p "${3}":8001 \ 4 | --runtime=nvidia \ 5 | --mount type=bind,source="${HOME}"/.modelci/"${1}"/pytorch-torchscript,target=/models/"${1}" \ 6 | -e MODEL_NAME="${1}" --env-file docker-env.env -t mlmodelci/pytorch-serving:latest-gpu 7 | -------------------------------------------------------------------------------- /frontend/src/pages/About/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Link, withRouter } from 'ice'; 3 | 4 | const About = () => { 5 | return ( 6 |
7 |

About page

8 |
9 | Home 10 |
11 |
12 | ); 13 | }; 14 | 15 | export default About; 16 | -------------------------------------------------------------------------------- /example/resnet50.yml: -------------------------------------------------------------------------------- 1 | weight: "~/.modelci/ResNet50/PyTorch-PYTORCH/Image_Classification/1.pth" 2 | dataset: ImageNet 3 | task: Image_Classification 4 | metric: 5 | acc: 0.76 6 | inputs: 7 | - name: "input" 8 | shape: [ -1, 3, 224, 224 ] 9 | dtype: TYPE_FP32 10 | outputs: 11 | - name: "output" 12 | shape: [ -1, 1000 ] 13 | dtype: TYPE_FP32 14 | convert: true -------------------------------------------------------------------------------- /modelci/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 9/19/2020 7 | """ 8 | from . import data_engine 9 | from . import metrics 10 | from . import monitor 11 | from . import types 12 | from . import utils 13 | 14 | __all__ = ['data_engine', 'metrics', 'monitor', 'types', 'utils'] 15 | -------------------------------------------------------------------------------- /frontend/src/pages/Dashboard/components/Pannel/index.module.scss: -------------------------------------------------------------------------------- 1 | .container { 2 | min-height: 100%; 3 | overflow: hidden; 4 | text-align: center; 5 | background-color: $color-white; 6 | } 7 | 8 | .title { 9 | font-size: 40px; 10 | text-align: center; 11 | } 12 | 13 | .description { 14 | margin-top: 40px; 15 | } 16 | 17 | .action { 18 | margin-top: 40px; 19 | } 20 | -------------------------------------------------------------------------------- /example/resnet50_torchscript.yml: -------------------------------------------------------------------------------- 1 | weight: "~/.modelci/ResNet50/PyTorch-TORCHSCRIPT/Image_Classification/1.zip" 2 | dataset: ImageNet 3 | task: Image_Classification 4 | metric: 5 | acc: 0.76 6 | inputs: 7 | - name: "input" 8 | shape: [ -1, 3, 224, 224 ] 9 | dtype: TYPE_FP32 10 | outputs: 11 | - name: "output" 12 | shape: [ -1, 1000 ] 13 | dtype: TYPE_FP32 14 | convert: false -------------------------------------------------------------------------------- /scripts/uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # remove Conda environment 3 | conda remove --name modelci --all -y 4 | 5 | # stop docker service 6 | # shellcheck disable=SC2046 7 | docker stop $(docker ps -a -q --filter="name=modelci.*") 8 | 9 | # remove tmp files 10 | rm -rf ~/tmp/tensorrtserver 11 | rm -f scripts/libnvidia-ml.cache 12 | 13 | # remove log 14 | rm -rf /tmp/modelci-install*.log 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | #### Software and Hardware Versions 6 | ModelCI v1.x.x, CUDA Version vx.x.x, GPU device used... 7 | 8 | #### Problem description 9 | 10 | #### Steps to Reproduce the Problem 11 | 12 | #### Expected Behavior 13 | 14 | #### Other Information 15 | Things you tried, stack traces, related issues, suggestions on how to fix it... -------------------------------------------------------------------------------- /frontend/src/layouts/BasicLayout/components/Logo/index.module.scss: -------------------------------------------------------------------------------- 1 | .logo { 2 | display: flex; 3 | align-items: center; 4 | justify-content: center; 5 | color: $color-text1-1; 6 | font-weight: bold; 7 | font-size: 14px; 8 | line-height: 22px; 9 | 10 | &:visited, 11 | &:link { 12 | color: $color-text1-1; 13 | } 14 | 15 | img { 16 | height: 24px; 17 | margin-right: 10px; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /modelci/hub/client/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 9/19/2020 7 | """ 8 | from .onnx_client import CVONNXClient 9 | from .tfs_client import CVTFSClient 10 | from .torch_client import CVTorchClient 11 | from .trt_client import CVTRTClient 12 | 13 | __all__ = ['CVONNXClient', 'CVTFSClient', 'CVTRTClient', 'CVTorchClient'] 14 | -------------------------------------------------------------------------------- /modelci/hub/deployer/trt/deploy_model.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # TODO: combine TF-TRT and ONNX converted TRT 4 | docker run --runtime=nvidia -d --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67100864 \ 5 | -p 8000:"${2}" -p 8001:"${3}" -p 8002:"${4}" \ 6 | -v /"${HOME}"/.modelci/"${1}"/tensorflow-trt:/models/"${1}" \ 7 | nvcr.io/nvidia/tensorrtserver:19.10-py3 \ 8 | trtserver --model-repository=/models 9 | -------------------------------------------------------------------------------- /modelci/metrics/README.md: -------------------------------------------------------------------------------- 1 | # Metric 2 | 3 | Here are some metrics of model performance. 4 | 5 | - [CAdvisor Client](./cadvisor): monitor for model docker containers. 6 | - [Throughput and Latency](./benchmark): throughput and latency test for a specific model. 7 | - [Monetary Cost](./monetary): Monetary cost of machines (GPU, CPU) per unit time. 8 | 9 | ## Roadmap 10 | 11 | - add cost metric 12 | - add carbon metric 13 | - more clients 14 | -------------------------------------------------------------------------------- /modelci/monitor/readme.md: -------------------------------------------------------------------------------- 1 | # Monitor 2 | 3 | ## Node Exporter for GPU monitor 4 | 5 | start the nvidia node exporter 6 | 7 | ```bash 8 | bash ../../scripts/start_node_exporter.sh 9 | ``` 10 | 11 | check idle GPU ids 12 | 13 | ```python 14 | from modelci.monitor.gpu_node_exporter import GPUNodeExporter 15 | 16 | a = GPUNodeExporter() 17 | a.get_idle_gpu() 18 | # output [0, 1, 2] 19 | 20 | ``` 21 | 22 | ## cAdvisor for monitoring the other resource usage -------------------------------------------------------------------------------- /modelci/init-mongo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | mongo -- "$MONGO_INITDB_DATABASE" < 7 | CAP @NTU 8 |
9 | 10 | © 2019-2020 Nanyang Technological University, Singapore 11 | 12 |

13 | ); 14 | } 15 | -------------------------------------------------------------------------------- /modelci/types/vo/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: USER 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/13/2020 7 | """ 8 | from .model_vo import ( 9 | ModelInputFormat, Framework, Engine, Status, IOShapeVO, InfoTupleVO, ProfileMemoryVO, ProfileLatencyVO, 10 | ProfileThroughputVO, DynamicResultVO, ProfileResultVO, ModelListOut, ModelDetailOut 11 | ) 12 | 13 | __all__ = [_s for _s in dir() if not _s.startswith('_')] 14 | -------------------------------------------------------------------------------- /example/resnet50_explicit_path.yml: -------------------------------------------------------------------------------- 1 | weight: "~/.modelci/ResNet50/PyTorch-PYTORCH/Image_Classification/1.pth" 2 | architecture: ResNet50 3 | framework: PyTorch 4 | engine: PYTORCH 5 | version: 1 6 | dataset: ImageNet 7 | task: Image_Classification 8 | metric: 9 | acc: 0.76 10 | inputs: 11 | - name: "input" 12 | shape: [ -1, 3, 224, 224 ] 13 | dtype: TYPE_FP32 14 | outputs: 15 | - name: "output" 16 | shape: [ -1, 1000 ] 17 | dtype: TYPE_FP32 18 | convert: true 19 | -------------------------------------------------------------------------------- /modelci/types/bo/__init__.py: -------------------------------------------------------------------------------- 1 | from .dynamic_profile_result_bo import DynamicProfileResultBO, ProfileLatency, ProfileMemory, ProfileThroughput 2 | from .model_bo import ModelBO 3 | from .model_objects import DataType, Task, Metric, ModelStatus, Framework, Engine, Status, ModelVersion, IOShape, InfoTuple, Weight 4 | from .profile_result_bo import ProfileResultBO 5 | from .static_profile_result_bo import StaticProfileResultBO 6 | 7 | __all__ = [_s for _s in dir() if not _s.startswith('_')] 8 | -------------------------------------------------------------------------------- /frontend/src/config.ts: -------------------------------------------------------------------------------- 1 | // FIXME: unable to get environment variables from process.env 2 | const host = process.env.REACT_APP_BACKEND_URL || 'http://localhost:8000' 3 | 4 | export default { 5 | default: { 6 | modelURL: `${host}/api/v1/model`, 7 | visualizerURL: `${host}/api/v1/visualizer`, 8 | structureURL: `${host}/api/exp/structure`, 9 | structureRefractorURL: `${host}/api/exp/cv-tuner/finetune`, // temp url 10 | trainerURL: `${host}/api/exp/train` 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /frontend/src/pages/Dashboard/components/Pannel/index.tsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react'; 2 | import styles from 'index.module.scss'; 3 | 4 | const Pannel = () => { 5 | return ( 6 |
7 |

Welcome to ModelCI!

8 | 9 |

This is a awesome project, enjoy it!

10 | 11 |
12 |
13 | ); 14 | }; 15 | 16 | export default Pannel; 17 | -------------------------------------------------------------------------------- /modelci/controller/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/29/2020 7 | """ 8 | import atexit 9 | 10 | from modelci.controller.executor import JobExecutor 11 | 12 | job_executor = JobExecutor() 13 | job_executor.start() 14 | 15 | 16 | @atexit.register 17 | def terminate_controllers(): 18 | job_executor.join() 19 | print('Exiting job executor.') 20 | 21 | 22 | __all__ = ['job_executor'] 23 | -------------------------------------------------------------------------------- /frontend/build.json: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": [ 3 | [ 4 | "build-plugin-fusion", 5 | { 6 | "themePackage": "@alifd/theme-design-pro" 7 | } 8 | ], 9 | [ 10 | "build-plugin-moment-locales", 11 | { 12 | "locales": [ 13 | "zh-cn" 14 | ] 15 | } 16 | ], 17 | [ 18 | "build-plugin-antd", 19 | { 20 | "themeConfig": { 21 | "primary-color": "#1890ff" 22 | } 23 | } 24 | ] 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /modelci/persistence/exceptions.py: -------------------------------------------------------------------------------- 1 | class ServiceException(Exception): 2 | def __init__(self, message): 3 | self.message = message 4 | 5 | def __str__(self): 6 | return repr(self.message) 7 | 8 | 9 | class DoesNotExistException(ServiceException): 10 | def __init__(self, message): 11 | super().__init__(message=message) 12 | 13 | 14 | class BadRequestValueException(ServiceException): 15 | def __init__(self, message): 16 | super().__init__(message=message) 17 | -------------------------------------------------------------------------------- /frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | ML ModelCI 13 | 14 | 15 | 16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /frontend/src/layouts/BasicLayout/menuConfig.ts: -------------------------------------------------------------------------------- 1 | const headerMenuConfig = []; 2 | 3 | const asideMenuConfig = [ 4 | { name: 'Dashboard', path: '/', icon: 'chart-pie' }, 5 | { name: 'Users', path: '/user', icon: 'account' }, 6 | { name: 'Settings', path: '/setting', icon: 'set' }, 7 | { name: 'About', path: '/about', icon: 'warning' }, 8 | { name: 'Jobs', path: '/jobs', icon: 'edit' }, 9 | { name: 'VersionTree', path: '/versiontree', icon: 'eye' }, 10 | ]; 11 | 12 | export { headerMenuConfig, asideMenuConfig }; 13 | -------------------------------------------------------------------------------- /modelci/persistence/__init__.py: -------------------------------------------------------------------------------- 1 | from modelci.config import db_settings 2 | from .mongo_db import MongoDB 3 | 4 | conn_settings = { 5 | 'db': str(db_settings.mongo_db), 6 | 'host': str(db_settings.mongo_host), 7 | 'port': int(db_settings.mongo_port), 8 | 'username': str(db_settings.mongo_username), 9 | 'password': db_settings.mongo_password.get_secret_value(), 10 | 'authentication_source': str(db_settings.mongo_auth_source) 11 | } 12 | 13 | mongo = MongoDB(**conn_settings) 14 | 15 | mongo.connect() 16 | -------------------------------------------------------------------------------- /example/retinanet.yml: -------------------------------------------------------------------------------- 1 | weight: "~/.modelci/RetinaNet/PyTorch-PYTORCH/Object_Detection/1.pth" 2 | architecture: RetinaNet 3 | framework: PyTorch 4 | engine: PYTORCH 5 | version: 1 6 | dataset: COCO 7 | task: Object_Detection 8 | metric: 9 | mAP: 0.365 10 | inputs: 11 | - name: "input" 12 | shape: [ -1, 3, 224, 224 ] 13 | dtype: TYPE_FP32 14 | outputs: 15 | - name: "BBOX" 16 | shape: [ -1, 100, 5 ] 17 | dtype: TYPE_FP32 18 | - name: "SCORE" 19 | shape: [ -1, 100 ] 20 | dtype: TYPE_FP32 21 | convert: true -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = modelci 3 | version = 1.0.0 4 | author = NTU CAP 5 | author_email = huaizhen001@e.ntu.edu.sg 6 | description = A complete platform for managing, converting, profiling, and deploying models as cloud services (MLaaS) 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/cap-ntu/ML-Model-CI 10 | project_urls = 11 | Bug Tracker = https://github.com/cap-ntu/ML-Model-CI/issues 12 | license_file = LICENSE 13 | 14 | 15 | [options] 16 | python_requires = 17 | >=3.7 -------------------------------------------------------------------------------- /.fossa.yml: -------------------------------------------------------------------------------- 1 | # Generated by FOSSA CLI (https://github.com/fossas/fossa-cli) 2 | # Visit https://fossa.com to learn more 3 | 4 | version: 2 5 | cli: 6 | server: https://app.fossa.com 7 | fetcher: custom 8 | project: https://github.com/cap-ntu/ML-Model-CI.git 9 | analyze: 10 | modules: 11 | - name: frontend 12 | type: npm 13 | target: frontend 14 | path: frontend 15 | - name: frontend 16 | type: npm 17 | target: frontend 18 | path: frontend 19 | - name: . 20 | type: pip 21 | target: . 22 | path: . 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.7" 5 | 6 | before_install: 7 | - "export PYTHONPATH=$PYTHONPATH:$(pwd)" 8 | 9 | install: 10 | - pip install pip==20.1 setuptools requests==2.23.0 11 | - pip install -r requirements.txt 12 | 13 | services: 14 | - mongodb 15 | 16 | before_script: 17 | - mongo modelci --eval 'db.createUser({user:"modelci",pwd:"modelci@2020",roles:["readWrite"]});' 18 | 19 | script: 20 | - python -m pytest tests/ 21 | 22 | notifications: 23 | slack: ml-model-ci:SyXxPANcJK5flgmAM7aMaNuV 24 | email: false -------------------------------------------------------------------------------- /modelci/metrics/benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Testing Tool for Throughput and Latency 2 | 3 | ## Metrics 4 | 5 | By running once, you can get some performance information. 6 | 7 | - a latency list contains every batch's latency. 8 | - a through list contains every batch's throughput. 9 | - overall latency 10 | - overall throughput 11 | - 25th-percentile latency 12 | - 50th-percentile latency 13 | - 75th-percentile latency 14 | 15 | And we do also support a custom percentile, you can setup that by passing a `percentile` in the `BaseModelInspector` 16 | constructor. 17 | -------------------------------------------------------------------------------- /frontend/src/layouts/BasicLayout/components/Logo/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Link } from 'ice'; 3 | import styles from './index.module.scss'; 4 | 5 | export interface ILogoProps { 6 | image?: string; 7 | text?: string; 8 | url?: string; 9 | } 10 | 11 | export default function Logo({ image, text, url }: ILogoProps) { 12 | return ( 13 |
14 | 15 | {image && logo} 16 | {text} 17 | 18 |
19 | ); 20 | } 21 | -------------------------------------------------------------------------------- /example/sample_k8s_deployment.conf: -------------------------------------------------------------------------------- 1 | [remote_storage] 2 | # configuration for pulling models from cloud storage. 3 | storage_type = S3 4 | aws_access_key_id = sample-id 5 | aws_secret_access_key = sample-key 6 | bucket_name = sample-bucket 7 | remote_model_path = models/bidaf-9 8 | 9 | [model] 10 | # local model path for storing model after pulling it from cloud 11 | local_model_dir = /models 12 | local_model_name = bidaf-9 13 | 14 | [deployment] 15 | # deployment detailed configuration 16 | name = sample-deployment 17 | namespace = default 18 | replicas = 1 19 | engine = ONNX 20 | device = cpu 21 | batch_size = 16 22 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # Web Portal of ML Model CI 2 | 3 | ## Quick Start 4 | 5 | Before getting start with the web application, you need to connect with your backend APIs (MLModelCI services). The default address is http://localhost:8000/api/v1/model. If you want to connect to your own MLModelCI service, you should modify the address before starting in `src/config.ts`. 6 | 7 | ```shell script 8 | npm install 9 | npm start 10 | ``` 11 | 12 | ## Screentshot 13 | 14 | ### Dashboard 15 | 16 | ![](https://i.loli.net/2020/05/12/WL2tQlf3ZpjrBPI.png) 17 | 18 | ### Model visualization 19 | 20 | ![](screenshot/visualizer.png) 21 | -------------------------------------------------------------------------------- /modelci/experimental/model/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 2/1/2021 7 | 8 | Common Pydantic Class 9 | """ 10 | from bson import ObjectId 11 | 12 | 13 | class ObjectIdStr(str): 14 | @classmethod 15 | def __get_validators__(cls): 16 | yield cls.validate 17 | 18 | @classmethod 19 | def validate(cls, v): 20 | if isinstance(v, str): 21 | v = ObjectId(v) 22 | if not isinstance(v, ObjectId): 23 | raise ValueError("Not a valid ObjectId") 24 | return str(v) 25 | -------------------------------------------------------------------------------- /modelci/types/do/profile_result_do.py: -------------------------------------------------------------------------------- 1 | from mongoengine import EmbeddedDocument 2 | from mongoengine.fields import EmbeddedDocumentField, ListField 3 | 4 | from .dynamic_profile_result_do import DynamicProfileResultDO 5 | from .static_profile_result_do import StaticProfileResultDO 6 | 7 | 8 | class ProfileResultDO(EmbeddedDocument): 9 | """ 10 | Profiling result plain object. 11 | """ 12 | # Static profile result 13 | static_profile_result = EmbeddedDocumentField(StaticProfileResultDO) 14 | # Dynamic profile result 15 | dynamic_profile_results = ListField(EmbeddedDocumentField(DynamicProfileResultDO)) 16 | -------------------------------------------------------------------------------- /modelci/app/v1/api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/20/2020 7 | """ 8 | from fastapi import APIRouter 9 | 10 | from modelci.app.v1.endpoints import model 11 | from modelci.app.v1.endpoints import visualizer 12 | from modelci.app.v1.endpoints import profiler 13 | 14 | api_router = APIRouter() 15 | api_router.include_router(model.router, prefix='/model', tags=['model']) 16 | api_router.include_router(visualizer.router, prefix='/visualizer', tags=['visualizer']) 17 | api_router.include_router(profiler.router, prefix='/profiler', tags=['profiler']) -------------------------------------------------------------------------------- /modelci/app/experimental/api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/20/2020 7 | """ 8 | from fastapi import APIRouter 9 | 10 | from modelci.app.experimental.endpoints import cv_tuner 11 | from modelci.app.experimental.endpoints import model_structure, trainer 12 | 13 | api_router = APIRouter() 14 | api_router.include_router(cv_tuner.router, prefix='/cv-tuner', tags=['[*exp] cv-tuner']) 15 | api_router.include_router(model_structure.router, prefix='/structure', tags=['[*exp] structure']) 16 | api_router.include_router(trainer.router, prefix='/train', tags=['[*exp] train']) 17 | -------------------------------------------------------------------------------- /modelci/cli/modelps.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. -------------------------------------------------------------------------------- /ops/start_node_exporter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VERSION=0.18.1 4 | DIR_NAME="node_exporter-${VERSION}.linux-amd64" 5 | FILENAME="${DIR_NAME}.tar.gz" 6 | 7 | # create temporary directory 8 | mkdir -p ~/tmp && cd ~/tmp || exit 1 9 | mkdir -p node_exporter && cd node_exporter || exit 1 10 | 11 | 12 | # download and unzip 13 | if [ -d "${DIR_NAME}" ]; then 14 | echo "${DIR_NAME} has been downloaded" 15 | else 16 | echo "Start to download ${DIR_NAME}" 17 | wget "https://github.com/prometheus/node_exporter/releases/download/v${VERSION}/${FILENAME}" 18 | tar xvfz "${FILENAME}" 19 | fi 20 | 21 | # install 22 | cd ${DIR_NAME} || exit 23 | ./node_exporter & 24 | -------------------------------------------------------------------------------- /modelci/types/proto/service.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | service Predict { 4 | // Inference 5 | rpc Infer (InferRequest) returns (InferResponse) { 6 | } 7 | 8 | // Stream Interface 9 | rpc StreamInfer (stream InferRequest) returns (stream InferResponse) { 10 | } 11 | } 12 | 13 | message InferRequest { 14 | // Model name 15 | string model_name = 1; 16 | 17 | // Meta data 18 | string meta = 2; 19 | 20 | // List of bytes (etc. encoded frame) 21 | repeated bytes raw_input = 3; 22 | } 23 | 24 | message InferResponse { 25 | // Json as string 26 | string json = 1; 27 | // Meta data 28 | string meta = 2; 29 | } 30 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/proto/service.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | service Predict { 4 | // Inference 5 | rpc Infer (InferRequest) returns (InferResponse) { 6 | } 7 | 8 | // Stream Interface 9 | rpc StreamInfer (stream InferRequest) returns (stream InferResponse) { 10 | } 11 | } 12 | 13 | message InferRequest { 14 | // Model name 15 | string model_name = 1; 16 | 17 | // Meta data 18 | string meta = 2; 19 | 20 | // List of bytes (etc. encoded frame) 21 | repeated bytes raw_input = 3; 22 | } 23 | 24 | message InferResponse { 25 | // Json as string 26 | string json = 1; 27 | // Meta data 28 | string meta = 2; 29 | } -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/proto/service.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | service Predict { 4 | // Inference 5 | rpc Infer (InferRequest) returns (InferResponse) { 6 | } 7 | 8 | // Stream Interface 9 | rpc StreamInfer (stream InferRequest) returns (stream InferResponse) { 10 | } 11 | } 12 | 13 | message InferRequest { 14 | // Model name 15 | string model_name = 1; 16 | 17 | // Meta data 18 | string meta = 2; 19 | 20 | // List of bytes (etc. encoded frame) 21 | repeated bytes raw_input = 3; 22 | } 23 | 24 | message InferResponse { 25 | // Json as string 26 | string json = 1; 27 | // Meta data 28 | string meta = 2; 29 | } 30 | -------------------------------------------------------------------------------- /modelci/app/v1/endpoints/profiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Xing Di 5 | Date: 2021/1/15 6 | 7 | """ 8 | from fastapi import APIRouter, HTTPException 9 | from modelci.persistence.service_ import exists_by_id, profile_model 10 | 11 | router = APIRouter() 12 | 13 | 14 | @router.get('/{model_id}', status_code=201) 15 | def profile(model_id: str, device: str='cuda', batch_size: int=1): 16 | if not exists_by_id(model_id): 17 | raise HTTPException( 18 | status_code=404, 19 | detail=f'Model ID {model_id} does not exist. You may change the ID', 20 | ) 21 | profile_result = profile_model(model_id, device, batch_size) 22 | return profile_result 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | # Change Log 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## \[Unreleased\] - 2021-01-20 6 | 7 | ### Added 8 | - PyTorch model visualization 9 | 10 | 11 | ## \[Unreleased\] - 2020-11-01 12 | 13 | Here we write upgrading notes for brands. It's a team effort to make them as 14 | straightforward as possible. 15 | 16 | ### Added 17 | - PyTorch converter from XGBoost, LightGBM, Sci-kit Learn, and ONNX models. 18 | - ONNX converter from XGBoost, LightGBM, Sci-kit Learn, and Keras models. 19 | - Part of CI tests for PyTorch converters and ONNX converters. 20 | 21 | ### Changed 22 | - Required packages to include only packages used by `import` statement. 23 | 24 | ### Fixed 25 | 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | betterproto>=1.2.3 2 | click==7.1.2 3 | docker>=4.2.0 4 | fastapi<=0.61.2,>=0.58.0 5 | GPUtil==1.4.0 6 | grpcio>=1.27.2 7 | humanize>=3.0.1 8 | hummingbird-ml>=0.0.6 9 | lightgbm==2.3.0 10 | mongoengine>=0.19.1 11 | numpy<1.19.0,>=1.16.0 12 | onnx==1.6.0 13 | opencv-python>=3.1.0 14 | onnxconverter-common>=1.6.0 15 | onnxmltools>=1.6.0 16 | onnxruntime==1.2.0 17 | py-cpuinfo>=6.0.0 18 | pydantic<2.0.0,>=0.32.2 19 | pymongo==3.11.2 20 | pytest>=5.4.1 21 | pytest-env==0.6.2 22 | PyYAML==5.3.1 23 | rich==9.1.0 24 | scikit-learn==0.23.2 25 | starlette<=0.13.6,>=0.13.4 26 | typer>=0.3.2 27 | typer-cli>=0.0.11 28 | toolz==0.10.0 29 | tqdm==4.55.1 30 | uvicorn>=0.11.5 31 | xgboost==1.2.0 32 | Jinja2==2.11.2 33 | python-multipart==0.0.5 34 | pydantic[dotenv]>=0.10.4 -------------------------------------------------------------------------------- /modelci/types/do/static_profile_result_do.py: -------------------------------------------------------------------------------- 1 | from mongoengine import EmbeddedDocument 2 | from mongoengine.fields import IntField, LongField 3 | 4 | 5 | class StaticProfileResultDO(EmbeddedDocument): 6 | """ 7 | Static profiling result plain object 8 | """ 9 | 10 | # Number of parameters of this model 11 | parameters = IntField(required=True) 12 | # Floating point operations 13 | flops = LongField(required=True) 14 | # Memory consumption in Byte in order to load this model into GPU or CPU 15 | memory = LongField(required=True) 16 | # Memory read in Byte 17 | mread = LongField(required=True) 18 | # Memory write in Byte 19 | mwrite = LongField(required=True) 20 | # Memory readwrite in Byte 21 | mrw = LongField(required=True) 22 | -------------------------------------------------------------------------------- /scripts/install.trtis_client.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | VERSION=1.8.0 4 | UBUNTU_VERSION=$(lsb_release -sr | tr -d '.') 5 | FILENAME=v"${VERSION}"_ubuntu"${UBUNTU_VERSION}".clients.tar.gz 6 | 7 | function download_file_and_un_tar() { 8 | wget https://github.com/NVIDIA/triton-inference-server/releases/download/v"${VERSION}"/"${FILENAME}" 9 | tar xzf "${FILENAME}" 10 | } 11 | 12 | mkdir -p ~/tmp 13 | cd ~/tmp || return 1 14 | mkdir -p tensorrtserver && cd tensorrtserver || return 1 15 | 16 | # get package 17 | if [ -f "${FILENAME}" ] ; then 18 | echo "Already downloaded at ${FILENAME}" 19 | tar xzf "${FILENAME}" || download_file_and_un_tar 20 | else 21 | download_file_and_un_tar 22 | fi 23 | 24 | # install 25 | pip install python/tensorrtserver-${VERSION}-py2.py3-none-linux_x86_64.whl 26 | -------------------------------------------------------------------------------- /modelci/data_engine/postprocessor/image_classification.py: -------------------------------------------------------------------------------- 1 | def postprocess(results, filenames, batch_size): 2 | """ 3 | Post-process results to show classifications. 4 | """ 5 | if len(results) != 1: 6 | raise Exception("expected 1 result, got {}".format(len(results))) 7 | 8 | batched_result = list(results.values())[0] 9 | if len(batched_result) != batch_size: 10 | raise Exception("expected {} results, got {}".format(batch_size, len(batched_result))) 11 | if len(filenames) != batch_size: 12 | raise Exception("expected {} filenames, got {}".format(batch_size, len(filenames))) 13 | 14 | for (index, result) in enumerate(batched_result): 15 | print("Image '{}':".format(filenames[index])) 16 | for cls in result: 17 | print(" {} ({}) = {}".format(cls[0], cls[2], cls[1])) 18 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | Closes # 6 | 7 | 10 | 11 | #### What has been done to verify that this works as intended? 12 | 13 | #### Why is this the best possible solution? Were any other approaches considered? 14 | 15 | #### How does this change affect users? Describe intentional changes to behavior and behavior that could have accidentally been affected by code changes. In other words, what are the regression risks? 16 | 17 | #### Does this change require updates to documentation? 18 | 19 | #### Before submitting this PR, please make sure you have: 20 | 21 | - [ ] run `python -m pytest tests/` and confirmed all checks still pass. 22 | - [ ] verified that any code or assets from external sources are properly credited. 23 | -------------------------------------------------------------------------------- /frontend/src/pages/ModelRegister/components/CustomUpload/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Upload } from 'antd'; 3 | import { InboxOutlined } from '@ant-design/icons'; 4 | 5 | const { Dragger } = Upload; 6 | 7 | export default function CustomUpload(props) { 8 | return ( 9 | { 14 | let file = value.file; 15 | delete file.uid; 16 | props.onChange(file) 17 | } 18 | } 19 | beforeUpload={(file) => { 20 | return false; 21 | } 22 | } 23 | > 24 |

25 | 26 |

27 |

Click or drag file to this area to upload

28 |

29 | Support for a single upload 30 |

31 |
32 | ); 33 | } -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/environment.yml: -------------------------------------------------------------------------------- 1 | name: torch-serve 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1 6 | - blas=1.0 7 | - c-ares=1.15.0 8 | - ca-certificates=2020.1.1 9 | - certifi=2020.4.5.1 10 | - grpcio=1.27.2 11 | - intel-openmp=2020.0 12 | - ld_impl_linux-64=2.33.1 13 | - libedit=3.1.20181209 14 | - libffi=3.2.1 15 | - libgcc-ng=9.1.0 16 | - libgfortran-ng=7.3.0 17 | - libprotobuf=3.11.4 18 | - libstdcxx-ng=9.1.0 19 | - mkl=2020.0 20 | - mkl-service=2.3.0 21 | - mkl_fft=1.0.15 22 | - mkl_random=1.1.0 23 | - ncurses=6.2 24 | - ninja=1.9.0 25 | - numpy=1.18.1 26 | - numpy-base=1.18.1 27 | - openssl=1.1.1g 28 | - pip=20.0.2 29 | - protobuf=3.11.4 30 | - python=3.7.7 31 | - readline=8.0 32 | - setuptools=46.1.3 33 | - six=1.14.0 34 | - tk=8.6.8 35 | - toolz=0.10.0 36 | - wheel=0.34.2 37 | - xz=5.2.5 38 | - zlib=1.2.11 39 | -------------------------------------------------------------------------------- /modelci/experimental/finetuner/coordinator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 2/2/2021 7 | """ 8 | from typing import Dict 9 | 10 | from modelci.experimental.finetuner.trainer import BaseTrainer 11 | 12 | 13 | class Coordinator(object): 14 | 15 | def __init__(self): 16 | self.pool: Dict[str, BaseTrainer] = dict() 17 | 18 | def get_job_by_id(self, job_id: str): 19 | return self.pool.get(job_id, None) 20 | 21 | def submit_job(self, trainer: BaseTrainer): 22 | self.pool[trainer.id] = trainer 23 | trainer.start() 24 | 25 | def delete_job_by_id(self, job_id: str): 26 | trainer = self.pool.pop(job_id, None) 27 | if trainer is not None: 28 | trainer.terminate() 29 | 30 | def delete_all(self): 31 | for trainer in self.pool.values(): 32 | trainer.terminate() 33 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/environment.yml: -------------------------------------------------------------------------------- 1 | name: onnx-serve 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1 6 | - blas=1.0 7 | - c-ares=1.15.0 8 | - ca-certificates=2020.1.1 9 | - certifi=2020.4.5.1 10 | - grpcio=1.27.2 11 | - intel-openmp=2020.0 12 | - ld_impl_linux-64=2.33.1 13 | - libedit=3.1.20181209 14 | - libffi=3.2.1 15 | - libgcc-ng=9.1.0 16 | - libgfortran-ng=7.3.0 17 | - libprotobuf=3.11.4 18 | - libstdcxx-ng=9.1.0 19 | - mkl=2020.0 20 | - mkl-service=2.3.0 21 | - mkl_fft=1.0.15 22 | - mkl_random=1.1.0 23 | - ncurses=6.2 24 | - numpy=1.18.1 25 | - numpy-base=1.18.1 26 | - openssl=1.1.1g 27 | - pip=20.0.2 28 | - protobuf=3.11.4 29 | - python=3.7.7 30 | - readline=8.0 31 | - setuptools=46.1.3 32 | - six=1.14.0 33 | - tk=8.6.8 34 | - toolz=0.10.0 35 | - wheel=0.34.2 36 | - xz=5.2.5 37 | - zlib=1.2.11 38 | - pip: 39 | - onnx==1.6.0 40 | - typing-extensions==3.7.4.2 41 | -------------------------------------------------------------------------------- /modelci/app/v1/endpoints/visualizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Jiang Shanshan 5 | Email: univeroner@gmail.com 6 | Date: 2021/1/15 7 | 8 | """ 9 | 10 | from fastapi import APIRouter 11 | from modelci.persistence.service import ModelService 12 | from modelci.types.bo.model_objects import Engine 13 | from torchviz import make_dot 14 | import torch 15 | 16 | router = APIRouter() 17 | 18 | 19 | @router.get('/{id}') 20 | def generate_model_graph(*, id: str): # noqa 21 | model_bo = ModelService.get_model_by_id(id) 22 | dot_graph = '' 23 | if model_bo.engine == Engine.PYTORCH: 24 | pytorch_model = torch.load(model_bo.saved_path) 25 | sample_data = torch.zeros(1, *model_bo.inputs[0].shape[1:], dtype=torch.float, requires_grad=False) 26 | out = pytorch_model(sample_data) 27 | dot_graph = make_dot(out, params=dict(list(pytorch_model.named_parameters()) + [('x', sample_data)])) 28 | 29 | return {'dot': str(dot_graph)} 30 | -------------------------------------------------------------------------------- /frontend/src/routes.ts: -------------------------------------------------------------------------------- 1 | import BasicLayout from '@/layouts/BasicLayout'; 2 | import Dashboard from '@/pages/Dashboard'; 3 | import About from '@/pages/About'; 4 | import Setting from '@/pages/Setting'; 5 | import User from '@/pages/User'; 6 | import Visualizer from '@/pages/Visualizer'; 7 | import VersionTree from '@/pages/VersionTree'; 8 | import Jobs from '@/pages/Jobs'; 9 | import ModelRegister from '@/pages/ModelRegister' 10 | 11 | const routerConfig = [ 12 | { 13 | path: '/', 14 | component: BasicLayout, 15 | children: [ 16 | { path: '/about', component: About }, 17 | { path: '/user', component: User }, 18 | { path: '/setting', component: Setting }, 19 | { path: '/visualizer/:id', component: Visualizer }, 20 | { path: '/', exact: true, component: Dashboard }, 21 | { path: '/jobs', component: Jobs }, 22 | { path: '/versiontree', component: VersionTree }, 23 | { path: '/modelregister', component: ModelRegister } 24 | ], 25 | }, 26 | ]; 27 | 28 | export default routerConfig; 29 | -------------------------------------------------------------------------------- /pure_requires.yml: -------------------------------------------------------------------------------- 1 | name: modelci 2 | channels: 3 | - defaults 4 | - pytorch 5 | - conda-forge 6 | dependencies: 7 | - click=7.1.2 8 | - docker-py>=4.2.0 9 | - fastapi<=0.61.2,>=0.58.0 10 | - gputil=1.4.0 11 | - grpcio>=1.27.2 12 | - humanize>=3.0.1 13 | - lightgbm=2.3.0 14 | - mongoengine>=0.19.1 15 | - numpy<1.19.0,>=1.16.0 16 | - onnx=1.6.0 17 | - opencv>=3.1.0 18 | - pip=20.2.4 19 | - py-cpuinfo=7.0.0 20 | - pydantic<2.0.0,>=0.32.2 21 | - pytest>=5.4.1 22 | - pytest-env=0.6.2 23 | - python=3.7.7 24 | - pyyaml=5.3.1 25 | - requests>=2.23.0 26 | - rich=9.1.0 27 | - setuptools>=46.4.0 28 | - scikit-learn=0.23.2 29 | - starlette<=0.13.6,>=0.13.4 30 | - tensorflow-gpu>=2.1.0 31 | - typer>=0.3.2 32 | - typer-cli>=0.0.11 33 | - uvicorn>=0.11.5 34 | - xgboost=1.2.0 35 | - pip: 36 | - betterproto>=1.2.3 37 | - hummingbird-ml>=0.0.6 38 | - onnxconverter-common>=1.6.0 39 | - onnxmltools>=1.6.0 40 | - onnxruntime==1.2.0 41 | - tensorflow-serving-api==2.1.0 42 | -------------------------------------------------------------------------------- /modelci/hub/deployer/config/__init__.py: -------------------------------------------------------------------------------- 1 | """Default settings for deployer 2 | 3 | 1. Port 4 | | Engine Name | HTTP Port | gRPC Port | HTTP Port (GPU) | gRPC Port (GPU) | 5 | |-------------|:---------:|:---------:|:----------------:|:---------------:| 6 | | ONNX | 8001 | 8002 | 8010 | 8011 | 7 | | TorchScript | 8100 | 8101 | 8110 | 8111 | 8 | | TRT | 8200 | 8201 | 8202 (Prometheus)| - | 9 | | TFS | 8501 | 8500 | 8510 | 8511 | 10 | """ 11 | ONNX_HTTP_PORT = 8001 12 | ONNX_GRPC_PORT = 8002 13 | ONNX_HTTP_PORT_GPU = 8010 14 | ONNX_GRPC_PORT_GPU = 8011 15 | 16 | TORCHSCRIPT_HTTP_PORT = 8100 17 | TORCHSCRIPT_GRPC_PORT = 8101 18 | TORCHSCRIPT_HTTP_PORT_GPU = 8110 19 | TORCHSCRIPT_GRPC_PORT_GPU = 8111 20 | 21 | TRT_HTTP_PORT = 8200 22 | TRT_GRPC_PORT = 8201 23 | TRT_PROMETHEUS_PORT = 8202 24 | 25 | TFS_HTTP_PORT = 8501 26 | TFS_GRPC_PORT = 8500 27 | TFS_HTTP_PORT_GPU = 8510 28 | TFS_GRPC_PORT_GPU = 8511 29 | 30 | -------------------------------------------------------------------------------- /example/sample_mrcnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN registering, managing, serving and profiling code demo using ModelCI. 3 | """ 4 | 5 | from PIL import Image 6 | 7 | from modelci.hub.client.tfs_client import CVTFSClient 8 | from modelci.hub.manager import retrieve_model, register_model_from_yaml 9 | from modelci.hub.profiler import Profiler 10 | from modelci.types.bo import Engine, Framework 11 | 12 | if __name__ == "__main__": 13 | test_img = Image.open("path to the test data") 14 | 15 | register_model_from_yaml("path to your yaml file") # register your model in the database 16 | model_info = retrieve_model( # retrieve model information 17 | architecture_name='MRCNN', 18 | framework=Framework.TENSORFLOW, 19 | engine=Engine.TFS 20 | )[0] 21 | 22 | tfs_client = CVTFSClient( 23 | test_img, batch_num=100, batch_size=32, asynchronous=True, model_info=model_info 24 | ) 25 | 26 | profiler = Profiler(model_info=model_info, server_name='tfs', inspector=tfs_client) 27 | profiler.diagnose(device='cuda:0') # profile batch size 32 28 | -------------------------------------------------------------------------------- /modelci/hub/converter/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at: 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | 17 | from .converter import convert, generate_model_family 18 | from .to_trt import TRTConverter 19 | from .to_tfs import TFSConverter 20 | from .to_onnx import ONNXConverter 21 | from .to_pytorch import PyTorchConverter 22 | from .to_torchscript import TorchScriptConverter 23 | 24 | __all__ = ["convert", "generate_model_family", "TRTConverter", "TFSConverter", "ONNXConverter", "PyTorchConverter", "TorchScriptConverter"] 25 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compileOnSave": false, 3 | "buildOnSave": false, 4 | "compilerOptions": { 5 | "baseUrl": ".", 6 | "outDir": "build", 7 | "module": "esnext", 8 | "target": "es6", 9 | "jsx": "react", 10 | "moduleResolution": "node", 11 | "allowSyntheticDefaultImports": true, 12 | "lib": [ 13 | "es6", 14 | "dom" 15 | ], 16 | "sourceMap": true, 17 | "allowJs": true, 18 | "rootDir": "./", 19 | "forceConsistentCasingInFileNames": true, 20 | "noImplicitReturns": true, 21 | "noImplicitThis": true, 22 | "noImplicitAny": false, 23 | "importHelpers": true, 24 | "strictNullChecks": true, 25 | "suppressImplicitAnyIndexErrors": true, 26 | "noUnusedLocals": true, 27 | "skipLibCheck": true, 28 | "paths": { 29 | "@/*": [ 30 | "./src/*" 31 | ], 32 | "ice": [ 33 | ".ice/index.ts" 34 | ], 35 | "ice/*": [ 36 | ".ice/pages/*" 37 | ] 38 | } 39 | }, 40 | "include": [ 41 | "src/*", 42 | ".ice" 43 | ], 44 | "exclude": [ 45 | "node_modules", 46 | "build", 47 | "public" 48 | ] 49 | } 50 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/onnx-serve-cpu.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda 2 | 3 | # set system environment 4 | ENV CONDA_ROOT=/opt/conda 5 | ENV CONDA_PREFIX=${CONDA_ROOT} 6 | ENV PATH=$CONDA_PREFIX/bin:$PATH 7 | ENV CONDA_AUTO_UPDATE_CONDA=false 8 | ENV MODEL_NAME='model' 9 | ENV LC_ALL=C.UTF-8 10 | ENV LANG=C.UTF-8 11 | 12 | COPY . /content/ 13 | 14 | WORKDIR /content/ 15 | 16 | # install build dependencies 17 | RUN apt-get update -y \ 18 | && apt-get install -y libc-dev gcc 19 | 20 | # install conda environment 21 | RUN conda env update --name base --file /content/environment.yml \ 22 | && conda clean -ayf \ 23 | && pip install onnxruntime==1.2.0 \ 24 | && rm -rf ~/.cache/pip 25 | RUN find ${CONDA_ROOT}/ -follow -type f -name '*.a' -delete 2> /dev/null; exit 0 \ 26 | && find ${CONDA_ROOT}/ -follow -type f -name '*.pyc' -delete 2> /dev/null; exit 0 \ 27 | && find ${CONDA_ROOT}/ -follow -type f -name '*.js.map' -delete 2> /dev/null; exit 0 \ 28 | && find ${CONDA_ROOT}/lib/python*/site-packages/bokeh/server/static \ 29 | -follow -type f -name '*.js' ! -name '*.min.js' -delete 2> /dev/null; exit 0 30 | 31 | CMD python onnx_serve.py ${MODEL_NAME} 32 | -------------------------------------------------------------------------------- /modelci/hub/deployer/trt/gen-hint-environment.yml: -------------------------------------------------------------------------------- 1 | name: protobuf-gen 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1 6 | - c-ares=1.15.0 7 | - ca-certificates=2020.1.1 8 | - certifi=2019.11.28 9 | - grpcio=1.16.1 10 | - grpcio-tools=1.16.1 11 | - ld_impl_linux-64=2.33.1 12 | - libedit=3.1.20181209 13 | - libffi=3.2.1 14 | - libgcc-ng=9.1.0 15 | - libprotobuf=3.11.2 16 | - libstdcxx-ng=9.1.0 17 | - ncurses=6.1 18 | - openssl=1.1.1d 19 | - pip=20.0.2 20 | - protobuf=3.11.2 21 | - python=3.7.6 22 | - readline=7.0 23 | - setuptools=45.1.0 24 | - six=1.14.0 25 | - sqlite=3.30.1 26 | - tk=8.6.8 27 | - wheel=0.34.1 28 | - xz=5.2.4 29 | - zlib=1.2.11 30 | - pip: 31 | - appdirs==1.4.3 32 | - attrs==19.3.0 33 | - betterproto==1.2.2 34 | - black==19.10b0 35 | - click==7.0 36 | - grpclib==0.3.1 37 | - h2==3.1.1 38 | - hpack==3.0.0 39 | - hyperframe==5.2.0 40 | - jinja2==2.11.1 41 | - markupsafe==1.1.1 42 | - multidict==4.7.4 43 | - pathspec==0.7.0 44 | - regex==2020.1.8 45 | - stringcase==1.2.0 46 | - toml==0.10.0 47 | - typed-ast==1.4.1 48 | -------------------------------------------------------------------------------- /frontend/src/pages/ModelRegister/components/CustomInputGroup/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Input, Select, InputNumber } from 'antd'; 3 | const { Option } = Select; 4 | function CustomInputGroup(props) { 5 | if(!props.mutators.exist(0)){ 6 | props.mutators.push({"name":"acc"}) 7 | props.mutators.push({"score": 0.76}) 8 | } 9 | return ( 10 | 11 | 24 | { 32 | props.mutators.pop() 33 | props.mutators.push({"score": value}) 34 | } 35 | } /> 36 | 37 | ) 38 | } 39 | CustomInputGroup.isFieldComponent = true 40 | export default CustomInputGroup; 41 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/torch-serve-cpu.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda 2 | 3 | # set system environment 4 | ENV CONDA_ROOT=/opt/conda 5 | ENV CONDA_PREFIX=${CONDA_ROOT} 6 | ENV PATH=$CONDA_PREFIX/bin:$PATH 7 | ENV CONDA_AUTO_UPDATE_CONDA=false 8 | ENV MODEL_NAME='model' 9 | ENV LC_ALL=C.UTF-8 10 | ENV LANG=C.UTF-8 11 | 12 | COPY ./environment.yml /content/ 13 | 14 | WORKDIR /content/ 15 | 16 | # install build dependencies 17 | RUN apt-get update -y \ 18 | && apt-get install -y libc-dev gcc 19 | 20 | # install conda environment 21 | RUN conda env update --name base --file /content/environment.yml \ 22 | && conda install -y pytorch cpuonly -c pytorch -c conda-forge \ 23 | && conda clean -ayf \ 24 | && rm -rf ~/.cache/pip 25 | RUN find ${CONDA_ROOT}/ -follow -type f -name '*.a' -delete 2> /dev/null; exit 0 \ 26 | && find ${CONDA_ROOT}/ -follow -type f -name '*.pyc' -delete 2> /dev/null; exit 0 \ 27 | && find ${CONDA_ROOT}/ -follow -type f -name '*.js.map' -delete 2> /dev/null; exit 0 \ 28 | && find ${CONDA_ROOT}/lib/python*/site-packages/bokeh/server/static \ 29 | -follow -type f -name '*.js' ! -name '*.min.js' -delete 2> /dev/null; exit 0 30 | 31 | COPY . /content/ 32 | 33 | CMD python pytorch_serve.py ${MODEL_NAME} 34 | -------------------------------------------------------------------------------- /modelci/hub/deployer/README.md: -------------------------------------------------------------------------------- 1 | # Auto-deployment 2 | 3 | ## Example 4 | 5 |
    6 | 7 |
  1. Serve by name 8 | 9 | Serve a ResNet50 model built with TensorFlow framework and to be served by TensorRT, on CUDA device 1: 10 | ```shell script 11 | python serving.py name --m ResNet50 -f tensorflow -e trt --device cuda:1 12 | ``` 13 | 14 |
  2. 15 | 16 |
  3. Serve by task 17 | 18 | Serve a model performing image classification task on CPU 19 | ```shell script 20 | python serving.py task --task 'image classification' --device cpu 21 | ``` 22 | 23 |
  4. 24 | 25 |
26 | 27 | ## Usage 28 | 29 | ### Serve by name 30 | 31 | ```shell script 32 | python serving.py name --model/-m {MODEL_NAME} --framework/-f {FRAMEWORK_NAME} --engine/-e {ENGINE_NAME} --device {DEVICE} 33 | ``` 34 | 35 | ### Server by task 36 | 37 | ```shell script 38 | python serving.py task --task {TASK_NAME} --device {DEVICE} 39 | ``` 40 | 41 | Supported model name: 42 | - ResNet50 43 | 44 | Supported production model formats associated with serving systems: 45 | - TorchScript -> Self-defined gRPC docker 46 | - TensorFlow SavedModel -> Tensorflow-Serving 47 | - ONNX -> ONNX runtime 48 | - TensorRT -> TensorRT inference Server (can also support all above formats) 49 | 50 | Support production communication protocol 51 | - HTTP 52 | - gRPC 53 | -------------------------------------------------------------------------------- /modelci/hub/README.md: -------------------------------------------------------------------------------- 1 | # ModelHub 2 | 3 | Manage (CURD), convert, diagnose and deploy DL models supported by industrial serving systems. 4 | 5 | ## Manage 6 | 7 | A collection of high level APIs to drive model service, including register (including uploading) models with a branch of auto-generated model family, select suitable model based on requirement and other model management APIs. 8 | 9 | ## Convert 10 | 11 | To help developers convert models for deployment purpose 12 | 13 | - [x] Pytorch -> torchscript 14 | - [x] Pytorch -> ONNX 15 | - [x] Tensorflow -> Tensorflow-Serving format 16 | - [x] Tensorflow -> TensorRT format 17 | - [x] ONNX -> TensorRT format 18 | 19 | ## Diagnose 20 | 21 | Test model at production environment 22 | 23 | - [ ] Get original model performance (cold start, latency, throughput) on different devices (Local) 24 | - [ ] Get containerized model performance (cold start, latency, throughput) on different devices on different devices (container) 25 | - [ ] Successful converted models with their performance and failed converted models with error codes 26 | - [ ] Accuracy (or some other metrics) loss of new converted models (need users to specify the path of test data) 27 | 28 | ## Deploy 29 | 30 | Employ serving systems and docker to deploy models 31 | 32 | - [x] Torchscript 33 | - [x] Tensorflow-Serving 34 | - [x] ONNX_Runtime 35 | - [x] TensorRT-Inference-Server 36 | -------------------------------------------------------------------------------- /modelci/app/handler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/22/2020 7 | """ 8 | from datetime import datetime 9 | 10 | from fastapi import Request 11 | from starlette.responses import JSONResponse 12 | 13 | from modelci.app.main import app 14 | from modelci.persistence.exceptions import DoesNotExistException, BadRequestValueException, ServiceException 15 | 16 | 17 | @app.exception_handler(ServiceException) 18 | async def service_exception_handler(request: Request, exc: ServiceException): 19 | return JSONResponse( 20 | status_code=500, 21 | content={'message': exc.message, 'status': 500, 'timestamp': datetime.now().timestamp()} 22 | ) 23 | 24 | 25 | @app.exception_handler(DoesNotExistException) 26 | async def does_not_exist_exception_handler(request: Request, exc: DoesNotExistException): 27 | return JSONResponse( 28 | status_code=404, 29 | content={'message': exc.message, 'status': 404, 'timestamp': datetime.now().timestamp()} 30 | ) 31 | 32 | 33 | @app.exception_handler(BadRequestValueException) 34 | async def bad_request_value_exception_handler(request: Request, exc: BadRequestValueException): 35 | return JSONResponse( 36 | status_code=400, 37 | content={'message': exc.message, 'status': 400, 'timestamp': datetime.now().timestamp()} 38 | ) 39 | -------------------------------------------------------------------------------- /docker/docker-compose-cpu-modelhub.yml: -------------------------------------------------------------------------------- 1 | networks: 2 | modelci: 3 | name: modelci_cpu_network 4 | 5 | services: 6 | mongo: 7 | image: mongo:latest 8 | container_name: mongodb 9 | restart: always 10 | ports: 11 | - 27017:27017 12 | environment: 13 | MONGO_INITDB_USERNAME: modelci 14 | MONGO_INITDB_PASSWORD: modelci@2020 15 | MONGO_INITDB_DATABASE: modelci 16 | hostname: mongodb 17 | volumes: 18 | - ${PWD}/../scripts/init_db.js:/docker-entrypoint-initdb.d/init_db.js 19 | networks: 20 | - modelci 21 | 22 | cadvisor: 23 | image: google/cadvisor:latest 24 | container_name: cadvisor 25 | command: --privileged 26 | restart: always 27 | ports: 28 | - 8080:8080 29 | volumes: 30 | - /:/rootfs:ro 31 | - /var/run:/var/run:rw 32 | - /sys:/sys:ro 33 | - /var/lib/docker/:/var/lib/docker:ro 34 | networks: 35 | - modelci 36 | 37 | backend: 38 | image: mlmodelci/mlmodelci:cpu 39 | container_name: modelci-backend 40 | hostname: modelci 41 | ports: 42 | - 8000:8000 43 | depends_on: 44 | - mongo 45 | networks: 46 | - modelci 47 | 48 | frontend: 49 | image: mlmodelci/frontend:latest 50 | container_name: modelci-frontend 51 | ports: 52 | - "3333:80" 53 | depends_on: 54 | - mongo 55 | - backend 56 | networks: 57 | - modelci -------------------------------------------------------------------------------- /modelci/utils/docker_api_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 10/3/2020 7 | 8 | Docker Container API utilization. 9 | """ 10 | from docker.errors import ImageNotFound 11 | 12 | 13 | def check_container_status(docker_client, name): 14 | """Check an existed container running status and health. 15 | 16 | Args: 17 | docker_client (docker.client.DockerClient): 18 | name (str): Name of the container. 19 | 20 | Returns: 21 | 22 | """ 23 | state = docker_client.containers.get(name).attrs.get('State') 24 | return state is not None and state.get('Status') == 'running' 25 | 26 | 27 | def list_containers(docker_client, filters): 28 | return docker_client.containers.list(all=True, filters=filters) 29 | 30 | 31 | def get_image(docker_client, name, logger): 32 | """Get Docker image. 33 | 34 | Args: 35 | docker_client (docker.client.DockerClient): Docker client instance. 36 | name (str): Image name. 37 | logger (modelci.utils.Logger): logger instance. 38 | 39 | Returns: 40 | docker.models.images.Image: Docker image. 41 | """ 42 | try: 43 | image = docker_client.images.get(name) 44 | except ImageNotFound: 45 | logger.info(f'pulling {name}...') 46 | image = docker_client.images.pull(name) 47 | 48 | return image 49 | -------------------------------------------------------------------------------- /docker/docker-compose-cuda10.2-modelhub.yml: -------------------------------------------------------------------------------- 1 | networks: 2 | modelci: 3 | name: modelci_gpu_network 4 | 5 | services: 6 | mongo: 7 | image: mongo:latest 8 | container_name: mongodb 9 | restart: always 10 | ports: 11 | - 27017:27017 12 | environment: 13 | MONGO_INITDB_USERNAME: modelci 14 | MONGO_INITDB_PASSWORD: modelci@2020 15 | MONGO_INITDB_DATABASE: modelci 16 | hostname: mongodb 17 | volumes: 18 | - ${PWD}/../scripts/init_db.js:/docker-entrypoint-initdb.d/init_db.js 19 | networks: 20 | - modelci 21 | 22 | cadvisor: 23 | image: google/cadvisor:latest 24 | container_name: cadvisor 25 | command: --privileged 26 | restart: always 27 | ports: 28 | - 8080:8080 29 | volumes: 30 | - /:/rootfs:ro 31 | - /var/run:/var/run:rw 32 | - /sys:/sys:ro 33 | - /var/lib/docker/:/var/lib/docker:ro 34 | networks: 35 | - modelci 36 | 37 | backend: 38 | image: mlmodelci/mlmodelci:cuda10.2-cudnn8 39 | container_name: modelci-backend 40 | hostname: modelci 41 | ports: 42 | - 8000:8000 43 | depends_on: 44 | - mongo 45 | networks: 46 | - modelci 47 | 48 | frontend: 49 | image: mlmodelci/frontend:latest 50 | container_name: modelci-frontend 51 | ports: 52 | - "3333:80" 53 | depends_on: 54 | - mongo 55 | - backend 56 | networks: 57 | - modelci -------------------------------------------------------------------------------- /modelci/hub/converter/to_tfs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at: 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | import shutil 17 | from pathlib import Path 18 | 19 | from modelci.utils import Logger 20 | 21 | logger = Logger('converter', welcome=False) 22 | 23 | 24 | class TFSConverter(object): 25 | supported_framework = ["tensorflow"] 26 | 27 | @staticmethod 28 | def from_tensorflow(model, save_path: Path, override: bool = False): 29 | import tensorflow as tf 30 | 31 | if save_path.with_suffix('.zip').exists(): 32 | if not override: # file exist yet override flag is not set 33 | logger.info('Use cached model') 34 | return True 35 | 36 | tf.compat.v1.saved_model.save(model, str(save_path)) 37 | shutil.make_archive(save_path, 'zip', root_dir=save_path.parent) 38 | 39 | return True 40 | -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/onnx-serve-gpu.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="10.1" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-runtime-ubuntu16.04 5 | 6 | # set system environment 7 | ENV CONDA_ROOT=/miniconda/ 8 | ENV CONDA_PREFIX=${CONDA_ROOT} 9 | ENV PATH=${CONDA_ROOT}/bin:${PATH} 10 | ENV CONDA_AUTO_UPDATE_CONDA=false 11 | ENV MODEL_NAME='model' 12 | ENV LC_ALL=C.UTF-8 13 | ENV LANG=C.UTF-8 14 | 15 | # Copy source 16 | COPY . /content/ 17 | 18 | WORKDIR /content/ 19 | 20 | # install basics 21 | RUN apt-get update -y \ 22 | && apt-get install -y curl gcc 23 | 24 | # Install Miniconda 25 | RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh -o /miniconda.sh \ 26 | && chmod +x /miniconda.sh \ 27 | && /miniconda.sh -b -p /miniconda \ 28 | && rm /miniconda.sh 29 | 30 | # Create a Python environment 31 | RUN conda env update --name base -f /content/environment.yml \ 32 | && pip install onnxruntime-gpu==1.2.0 \ 33 | && conda clean -ya \ 34 | && rm -rf ~/.cache/pip 35 | 36 | RUN find ${CONDA_ROOT}/ -follow -type f -name '*.a' -delete 2> /dev/null; exit 0 \ 37 | && find ${CONDA_ROOT}/ -follow -type f -name '*.pyc' -delete 2> /dev/null; exit 0 \ 38 | && find ${CONDA_ROOT}/ -follow -type f -name '*.js.map' -delete 2> /dev/null; exit 0 \ 39 | && find ${CONDA_ROOT}/lib/python*/site-packages/bokeh/server/static \ 40 | -follow -type f -name '*.js' ! -name '*.min.js' -delete 2> /dev/null; exit 0 41 | 42 | RUN apt-get autoremove -y curl gcc \ 43 | && apt-get clean 44 | 45 | CMD python onnx_serve.py ${MODEL_NAME} 46 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/torch-serve-gpu.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG CUDA="10.1" 2 | ARG CUDNN="7" 3 | 4 | FROM nvidia/cuda:${CUDA}-cudnn${CUDNN}-runtime-ubuntu16.04 5 | 6 | # set built-time arguments 7 | ARG CUDA 8 | 9 | # set system environment 10 | ENV CONDA_ROOT=/miniconda 11 | ENV CONDA_PREFIX=${CONDA_ROOT} 12 | ENV PATH=${CONDA_ROOT}/bin:${PATH} 13 | ENV CONDA_AUTO_UPDATE_CONDA=false 14 | ENV MODEL_NAME='model' 15 | ENV LC_ALL=C.UTF-8 16 | ENV LANG=C.UTF-8 17 | 18 | COPY ./environment.yml /content/ 19 | 20 | WORKDIR /content/ 21 | 22 | # install basics 23 | RUN apt-get update -y \ 24 | && apt-get install -y curl 25 | 26 | # Install Miniconda 27 | RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh -o /miniconda.sh \ 28 | && chmod +x /miniconda.sh \ 29 | && /miniconda.sh -b -p /miniconda \ 30 | && rm /miniconda.sh 31 | 32 | # Create a Python environment 33 | RUN conda env update --name base -f /content/environment.yml \ 34 | && conda install -y pytorch cudatoolkit=${CUDA} -c pytorch \ 35 | && conda clean -ya \ 36 | && rm -rf ~/.cache/pip 37 | 38 | RUN find ${CONDA_ROOT}/ -follow -type f -name '*.a' -delete 2> /dev/null; exit 0 \ 39 | && find ${CONDA_ROOT}/ -follow -type f -name '*.pyc' -delete 2> /dev/null; exit 0 \ 40 | && find ${CONDA_ROOT}/ -follow -type f -name '*.js.map' -delete 2> /dev/null; exit 0 \ 41 | && find ${CONDA_ROOT}/lib/python*/site-packages/bokeh/server/static \ 42 | -follow -type f -name '*.js' ! -name '*.min.js' -delete 2> /dev/null; exit 0 43 | 44 | # Copy source 45 | COPY . /content/ 46 | 47 | CMD python pytorch_serve.py ${MODEL_NAME} 48 | -------------------------------------------------------------------------------- /modelci/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | 15 | import click 16 | import typer 17 | 18 | from modelci.cli import modelhub 19 | from modelci.cli._fixup import _get_click_type_wrapper, _generate_enum_convertor_wrapper 20 | from modelci.cli.service import service 21 | 22 | # Fixup for typer argument and options annotations 23 | typer.main.get_click_type = _get_click_type_wrapper(typer.main.get_click_type) 24 | typer.main.generate_enum_convertor = _generate_enum_convertor_wrapper(typer.main.generate_enum_convertor) 25 | 26 | app = typer.Typer() 27 | 28 | 29 | @app.callback() 30 | def callback(): 31 | """ 32 | A complete MLOps platform for managing, converting and profiling models and 33 | then deploying models as cloud services (MLaaS) 34 | """ 35 | 36 | 37 | app.add_typer(modelhub.app, name='modelhub') 38 | typer_click_object: click.Group = typer.main.get_command(app) # noqa 39 | typer_click_object.add_command(service) 40 | 41 | if __name__ == '__main__': 42 | typer_click_object() 43 | -------------------------------------------------------------------------------- /modelci/hub/deployer/config/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | import numpy as np 4 | 5 | 6 | class DataType(Enum): 7 | """A simplified version of Triton DataType""" 8 | TYPE_INVALID = 0 9 | TYPE_BOOL = 1 10 | TYPE_UINT8 = 2 11 | TYPE_UINT16 = 3 12 | TYPE_UINT32 = 4 13 | TYPE_UINT64 = 5 14 | TYPE_INT8 = 6 15 | TYPE_INT16 = 7 16 | TYPE_INT32 = 8 17 | TYPE_INT64 = 9 18 | TYPE_FP16 = 10 19 | TYPE_FP32 = 11 20 | TYPE_FP64 = 12 21 | TYPE_STRING = 13 22 | 23 | 24 | def model_data_type_to_np(model_dtype): 25 | mapper = { 26 | DataType.TYPE_INVALID: None, 27 | DataType.TYPE_BOOL: np.bool, 28 | DataType.TYPE_UINT8: np.uint8, 29 | DataType.TYPE_UINT16: np.uint16, 30 | DataType.TYPE_UINT32: np.uint32, 31 | DataType.TYPE_UINT64: np.uint64, 32 | DataType.TYPE_INT8: np.int8, 33 | DataType.TYPE_INT16: np.int16, 34 | DataType.TYPE_INT32: np.int32, 35 | DataType.TYPE_INT64: np.int64, 36 | DataType.TYPE_FP16: np.float16, 37 | DataType.TYPE_FP32: np.float32, 38 | DataType.TYPE_FP64: np.float64, 39 | DataType.TYPE_STRING: np.dtype(object) 40 | } 41 | 42 | if isinstance(model_dtype, int): 43 | model_dtype = DataType(model_dtype) 44 | elif isinstance(model_dtype, str): 45 | model_dtype = DataType[model_dtype] 46 | elif not isinstance(model_dtype, DataType): 47 | raise TypeError( 48 | f'model_dtype is expecting one of the type: `int`, `str`, or `DataType` but got {type(model_dtype)}' 49 | ) 50 | return mapper[model_dtype] 51 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | import numpy as np 4 | 5 | 6 | class DataType(Enum): 7 | """A simplified version of Triton DataType""" 8 | TYPE_INVALID = 0 9 | TYPE_BOOL = 1 10 | TYPE_UINT8 = 2 11 | TYPE_UINT16 = 3 12 | TYPE_UINT32 = 4 13 | TYPE_UINT64 = 5 14 | TYPE_INT8 = 6 15 | TYPE_INT16 = 7 16 | TYPE_INT32 = 8 17 | TYPE_INT64 = 9 18 | TYPE_FP16 = 10 19 | TYPE_FP32 = 11 20 | TYPE_FP64 = 12 21 | TYPE_STRING = 13 22 | 23 | 24 | def model_data_type_to_np(model_dtype): 25 | mapper = { 26 | DataType.TYPE_INVALID: None, 27 | DataType.TYPE_BOOL: np.bool, 28 | DataType.TYPE_UINT8: np.uint8, 29 | DataType.TYPE_UINT16: np.uint16, 30 | DataType.TYPE_UINT32: np.uint32, 31 | DataType.TYPE_UINT64: np.uint64, 32 | DataType.TYPE_INT8: np.int8, 33 | DataType.TYPE_INT16: np.int16, 34 | DataType.TYPE_INT32: np.int32, 35 | DataType.TYPE_INT64: np.int64, 36 | DataType.TYPE_FP16: np.float16, 37 | DataType.TYPE_FP32: np.float32, 38 | DataType.TYPE_FP64: np.float64, 39 | DataType.TYPE_STRING: np.dtype(object) 40 | } 41 | 42 | if isinstance(model_dtype, int): 43 | model_dtype = DataType(model_dtype) 44 | elif isinstance(model_dtype, str): 45 | model_dtype = DataType[model_dtype] 46 | elif not isinstance(model_dtype, DataType): 47 | raise TypeError( 48 | f'model_dtype is expecting one of the type: `int`, `str`, or `DataType` but got {type(model_dtype)}' 49 | ) 50 | return mapper[model_dtype] 51 | -------------------------------------------------------------------------------- /modelci/experimental/mongo_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 2/1/2021 7 | """ 8 | from typing import Optional 9 | 10 | import pymongo 11 | 12 | from modelci.config import db_settings 13 | 14 | 15 | class MongoClient(pymongo.MongoClient): 16 | def __init__( 17 | self, 18 | host: str = db_settings.mongo_host, 19 | port: str = db_settings.mongo_port, 20 | document_class: type = dict, 21 | tz_aware: bool = True, 22 | connect: bool = None, 23 | type_registry=None, 24 | username: str = db_settings.mongo_username, 25 | password: Optional[str] = db_settings.mongo_password.get_secret_value(), 26 | authSource: str = db_settings.mongo_auth_source, 27 | authMechanism: str = db_settings.auth_mechanism, 28 | **kwargs 29 | ): 30 | """ 31 | MongoDB Client wrapper with defined configuration. 32 | 33 | Use this class just as `pymongo.MongoClient`. We inject some database related configuration into such 34 | MongoDB Client wrapper, for ease of config management. 35 | """ 36 | super().__init__( 37 | host=host, 38 | port=port, 39 | document_class=document_class, 40 | tz_aware=tz_aware, 41 | connect=connect, 42 | type_registry=type_registry, 43 | username=username, 44 | password=password, 45 | authSource=authSource, 46 | authMechanism=authMechanism, 47 | **kwargs, 48 | ) 49 | -------------------------------------------------------------------------------- /frontend/src/pages/VersionTree/utils/mock.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "author": { 4 | "name": null, 5 | "email": null 6 | }, 7 | "hash": "eb239973072bc1944", 8 | "refs": [ 9 | "ResNet50/CIFAR10" 10 | ], 11 | "parents": [ 12 | "aca769f0d9e6586e4" 13 | ], 14 | "subject": " ", 15 | "created_at": "2021-02-20T17:33:17.922000" 16 | }, 17 | { 18 | "author": { 19 | "name": null, 20 | "email": null 21 | }, 22 | "hash": "aca769f0d9e6586e4", 23 | "refs": [ 24 | "ResNet50/CIFAR10" 25 | ], 26 | "parents": [ 27 | "d3013089bbc6ebdc0" 28 | ], 29 | "subject": " ", 30 | "created_at": "2021-02-20T17:32:58.434000" 31 | }, 32 | { 33 | "author": { 34 | "name": null, 35 | "email": null 36 | }, 37 | "hash": "d3013089bbc6ebdc0", 38 | "refs": [ 39 | "ResNet50/CIFAR10" 40 | ], 41 | "parents": [ 42 | "7f7e3ab454788d99c" 43 | ], 44 | "subject": " ", 45 | "created_at": "2021-02-20T17:32:29.488000" 46 | }, 47 | { 48 | "author": { 49 | "name": null, 50 | "email": null 51 | }, 52 | "hash": "2c387c4f1c449f251", 53 | "refs": [ 54 | "ResNet50/MNIST" 55 | ], 56 | "parents": [ 57 | "7f7e3ab454788d99c" 58 | ], 59 | "subject": " ", 60 | "created_at": "2021-02-20T17:31:46.160000" 61 | }, 62 | { 63 | "author": { 64 | "name": null, 65 | "email": null 66 | }, 67 | "hash": "7f7e3ab454788d99c", 68 | "refs": [ 69 | "HEAD" 70 | ], 71 | "parents": [ 72 | "root" 73 | ], 74 | "subject": " ", 75 | "created_at": "2021-02-20T17:30:09.246000" 76 | } 77 | ] -------------------------------------------------------------------------------- /modelci/hub/deployer/onnxs/README.md: -------------------------------------------------------------------------------- 1 | # ONNX Serving 2 | 3 | Road map 4 | - [x] try official script for deploying ONNX model via a REST API with FastAPI 5 | - [x] serve Resnet50 6 | - [x] pack a ONNX serving docker 7 | - [x] add gRPC support of the ONNX serving 8 | - [ ] API test script and gRPC test script 9 | - [ ] API and gRPC test with profiling 10 | 11 | ## Install 12 | 13 | ```shell script 14 | cp ../config/utils.py . 15 | cp ../config/docker-env.env.example ./docker-env.env 16 | 17 | # Generate gRPC code 18 | python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. proto/service.proto 19 | 20 | # Build Docker 21 | docker build -t onnx-serving:latest -f onnx-serve-cpu.Dockerfile . 22 | # For GPU version, use onnx-serve-gpu.Dockerfile instead, tag GPU version as onnx-serving:latest-gpu 23 | # docker build -t onnx-serving:latest-gpu -f onnx-serve-gpu.Dockerfile . 24 | ``` 25 | 26 | ## Usage 27 | 28 |
    29 |
  1. Get pretrained torch model 30 | 31 | We assume you have setup the MongoDB and all environment variables by [install](/README.md#installation). 32 | See `modelci/hub/init_data.py`. 33 | For example, running 34 | ```shell script 35 | python modelci/init_data.py --model resnet50 --framework pytorch 36 | ``` 37 | Models will be saved at `~/.modelci/ResNet50/pytorch-torchscript/` directory. 38 | **Note**: You do not need to rerun the above code if you have done so for [TorchScript](/modelci/hub/deployer/pytorch). 39 | 40 |
  2. 41 |
  3. deploy model 42 | 43 | CPU version: 44 | ```shell script 45 | sh deploy_model_cpu.sh {MODEL_NAME} {REST_API_PORT} 46 | ``` 47 | GPU version: 48 | ```shell script 49 | sh depoly_model_gpu.sh {MODEL_NAME} {REST_API_PORT} 50 | ``` 51 | 52 |
  4. 53 |
54 | -------------------------------------------------------------------------------- /modelci/hub/deployer/pytorch/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Serving 2 | 3 | Road map 4 | - [x] try official script for deploying pytorch model via a REST API with FastAPI 5 | - [x] serve Resnet50 6 | - [x] pack a pytorch serving docker 7 | - [x] add gRPC support of the pytorch serving 8 | - [ ] API test script and gRPC test script 9 | - [ ] API and gRPC test with profiling 10 | 11 | ## Install 12 | 13 | ```shell script 14 | cp ../config/utils.py . 15 | cp ../config/docker-env.env.example ./docker-env.env 16 | 17 | # Generate gRPC code 18 | python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. proto/service.proto 19 | 20 | # Build Docker 21 | docker build -t pytorch-serving:latest -f torch-serve-cpu.Dockerfile . 22 | # For GPU version, use torch-serve-gpu.Dockerfile instead, tag GPU version as onnx-serving:latest-gpu 23 | # docker build -t pytorch-serving:latest-gpu -f torch-serve-gpu.Dockerfile . 24 | ``` 25 | 26 | ## Usage 27 | 28 |
    29 |
  1. Get pre-trained PyTorch model 30 | 31 | We assume you have setup the MongoDB and all environment variables by [install](/README.md#installation). 32 | See `modelci/hub/init_data.py`. 33 | For example, running 34 | ```shell script 35 | python modelci/init_data.py --model resnet50 --framework pytorch 36 | ``` 37 | Models will be saved at `~/.modelci/ResNet50/pytorch-torchscript/` directory. 38 | **Note**: You do not need to rerun the above code if you have done so for [ONNX](/modelci/hub/deployer/onnxs). 39 | 40 |
  2. 41 |
  3. deploy model 42 | 43 | CPU version: 44 | ```shell script 45 | sh deploy_model_cpu.sh {MODEL_NAME} {REST_API_PORT} 46 | ``` 47 | GPU version: 48 | ```shell script 49 | sh deploy_model_cpu.sh {MODEL_NAME} {REST_API_PORT} 50 | ``` 51 | 52 |
  4. 53 |
54 | -------------------------------------------------------------------------------- /modelci/hub/deployer/k8s/README.md: -------------------------------------------------------------------------------- 1 | # Generate deployment file of cloud service in k8s 2 | 3 | ## Example 4 | 5 |
    6 | 7 |
  1. Generate a config file for necessary environment variables in deployment file. 8 | 9 | ``` 10 | [remote_storage] 11 | # configuration for pulling models from cloud storage. 12 | storage_type = S3 13 | aws_access_key_id = sample-id 14 | aws_secret_access_key = sample-key 15 | bucket_name = sample-bucket 16 | remote_model_path = models/bidaf-9 17 | 18 | [model] 19 | # local model path for storing model after pulling it from cloud 20 | local_model_dir = /models 21 | local_model_name = bidaf-9 22 | 23 | [deployment] 24 | # deployment detailed configuration 25 | name = sample-deployment 26 | namespace = default 27 | replicas = 1 28 | engine = ONNX 29 | device = cpu 30 | batch_size = 16 31 | ``` 32 | 33 | `[remote_storage]` defines variables for pulling model from cloud storage. Currently only s3 bucket is supported. 34 | 35 | `[model]` defines variables of model path in containers 36 | 37 | `[deployment]` defines variables for serving the model as a cloud service 38 | 39 |
  2. 40 | 41 |
  3. Generate deployment file to desired output path. 42 | 43 | ``` 44 | from modelci.hub.deployer.k8s.dispatcher import render 45 | 46 | render( 47 | configuration='example/sample_k8s_deployment.conf', 48 | output_file_path='example/output.yaml' 49 | ) 50 | ``` 51 | 52 |
  4. 53 | 54 |
  5. Deploy the service into your k8s cluster 55 | 56 |
  6. 57 |
58 | 59 | 60 | ## Usage 61 | 62 | The function is for quickly generate deployment file of cloud service with the modelci-compiled model. 63 | We assume you: 64 | - Push the your compiled model to your remote storage 65 | - Have the k8s cluster for deploying the cloud service 66 | 67 | -------------------------------------------------------------------------------- /scripts/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | now=$(date +'%Y%m%d-%H%M%S') 4 | log_path="/tmp/modelci-install-${now}.log" 5 | FLAG_ERROR=false 6 | 7 | RED='\033[0;31m' 8 | GREEN='\033[0;32m' 9 | YELLOW='\033[0;33m' 10 | CYAN='\033[0;36m' 11 | NC='\033[0m' 12 | 13 | function script_execution() { 14 | if [[ "${redirect}" == all ]] ; then 15 | bash "${scripts_path}" "$@" &>> "${log_path}" 16 | elif [[ "${redirect}" == stdout ]] ; then 17 | bash "${scripts_path}" "$@" >> "${log_path}" 18 | else 19 | bash "${scripts_path}" "$@" 20 | fi 21 | } 22 | 23 | function error_capture() { 24 | local scripts_path=$1 && shift 25 | local redirect="${1:-all}" && shift 26 | 27 | if script_execution "$@" ; then 28 | echo -e "${GREEN}OK${NC}" 29 | else 30 | echo -e "${RED}FAIL${NC}" 31 | FLAG_ERROR=true 32 | fi 33 | } 34 | 35 | function info_echo() { 36 | printf "${CYAN}%s${NC}" "$1" 37 | } 38 | 39 | # Change all line ending to LF 40 | find scripts/ -type f -exec sed -i -e "s/^M$//" {} \; 41 | 42 | # Install Conda environment 43 | info_echo "Installing Conda environment..." 44 | conda >> /dev/null || exit 1 45 | 46 | error_capture scripts/install.conda_env.sh all 47 | 48 | # Activate conda 49 | source "${CONDA_PREFIX}/etc/profile.d/conda.sh" 50 | conda activate modelci 51 | 52 | # Install Triton client APIs 53 | info_echo "Installing Triton client API..." 54 | error_capture scripts/install.trtis_client.sh all 55 | 56 | # Generating proto 57 | info_echo "Generating gRPC code..." 58 | python -m grpc_tools.protoc -I . --python_out=. --grpc_python_out=. modelci/types/proto/service.proto 59 | 60 | if "${FLAG_ERROR}" = true ; then 61 | echo -e "${YELLOW}Some installation step has failed. Please see full log at ${log_path}." 62 | fi 63 | -------------------------------------------------------------------------------- /scripts/install.verify.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function clean_docker() { 4 | local name="modelci.test._*" 5 | # shellcheck disable=SC2046 6 | docker stop $(docker ps -a -q filter="name=${name}") 7 | } 8 | 9 | source "${HOME}"/anaconda3/etc/profile.d/conda.sh 10 | conda activate modelci 11 | 12 | source scripts/setup_env.sh 13 | export MONGO_DB='test' 14 | 15 | python -m pytest tests/ 16 | 17 | cd modelci/hub || exit 1 18 | python init_data.py export --model ResNet50 --framework tensorflow 19 | # For tensorflow with TFS and TRT 20 | #python init_data.py export --model ResNet50 --framework tensorflow --trt 21 | python init_data.py export --model ResNet50 --framework pytorch 22 | 23 | cd deployer || exit 1 24 | # test ts 25 | python serving.py name -m ResNet50 -f pytorch -e torchscript --device cpu --name modelci.test._resnet50-ts 26 | python serving.py name -m ResNet50 -f pytorch -e torchscript --device cuda --name modelci.test._resnet50-ts-gpu 27 | # TODO: client 28 | clean_docker modelci.test.resnet50-ts* 29 | 30 | # test tfs 31 | python serving.py name -m ResNet50 -f tensorflow -e tfs --device cpu --name modelci.test._resnet50-tfs 32 | python serving.py name -m ResNet50 -f tensorflow -e tfs --device cuda --name modelci.test._resnet50-tfs-gpu 33 | # TODO: client 34 | clean_docker modelci.test.resnet50-tfs* 35 | 36 | # test onnx 37 | python serving.py name -m ResNet50 -f pytorch -e onnx --device cpu --name modelci.test._resnet50-onnx 38 | python serving.py name -m ResNet50 -f pytorch -e onnx --device cuda --name modelci.test._resnet50-onnx-gpu 39 | # TODO: client 40 | clean_docker modelci.test.resnet50-onnx* 41 | 42 | # test trt 43 | #python serving.py name -m ResNet50 -f tensorflow -e trt --device cuda --name modelci.test.resnet50-trt 44 | # TODO: client 45 | #clean_docker modelci.test.resnet50-trt 46 | -------------------------------------------------------------------------------- /modelci/app/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/19/2020 7 | """ 8 | import multiprocessing as mp 9 | import os 10 | import signal 11 | from pathlib import Path 12 | 13 | from modelci.config import app_settings 14 | from modelci.utils import Logger 15 | from modelci.utils.misc import check_process_running 16 | 17 | logger = Logger('modelci backend', welcome=False) 18 | default_log_file = Path.home() / 'tmp/modelci.log' 19 | default_log_file.parent.mkdir(exist_ok=True) 20 | 21 | 22 | def start(): 23 | """Run a ModelCI backend server with Uvicorn.""" 24 | from modelci.app.main import _app_start_detach 25 | 26 | # check if the process is running 27 | pid = check_process_running(app_settings.server_port) 28 | if not pid: 29 | backend_process = mp.Process(target=_app_start_detach, args=(default_log_file,)) 30 | backend_process.start() 31 | 32 | logger.info(f'Uvicorn server listening on {app_settings.server_url}, check full log at {default_log_file}') 33 | else: 34 | logger.warning(f'Unable to started server. A process with pid={pid} is already listening on ' 35 | f'port {app_settings.server_port}. ' 36 | 'Please check if your Uvicorn server has started.') 37 | 38 | 39 | def stop(): 40 | """Stop the ModelCI backend server.""" 41 | # get backend process pid 42 | pid = check_process_running(app_settings.server_port) 43 | if pid: 44 | os.killpg(os.getpgid(pid), signal.SIGTERM) 45 | logger.info(f'The Uvicorn server with pid={pid} stopped.') 46 | else: 47 | logger.warning(f'No process is listening on port {app_settings.server_port}') 48 | 49 | 50 | if __name__ == '__main__': 51 | start() 52 | -------------------------------------------------------------------------------- /modelci/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from pydantic import BaseSettings, SecretStr, SecretBytes 5 | 6 | # API 7 | API_V1_STR = '/api/v1' 8 | API_EXP_STR = '/api/exp' 9 | 10 | 11 | class DBSettings(BaseSettings): 12 | mongo_host: str = 'localhost' 13 | mongo_port: int = 27017 14 | mongo_username: str = 'modelci' 15 | mongo_password: SecretStr = SecretStr('modelci@2020') 16 | mongo_db: str = 'modelci' 17 | mongo_auth_source: str = 'modelci' 18 | auth_mechanism: str = 'SCRAM-SHA-256' 19 | 20 | class Config: 21 | env_file = Path(__file__).absolute().parent / '.env' 22 | 23 | 24 | class ServiceSettings(BaseSettings): 25 | mongo_host: str = 'localhost' 26 | mongo_port: int = 27017 27 | 28 | # cAdvisor configuration 29 | cadvisor_port: int = 8080 30 | 31 | # Node exporter configuration 32 | node_exporter_port: int = 9400 33 | 34 | class Config: 35 | env_file = Path(__file__).absolute().parent / '.env' 36 | 37 | 38 | class AppSettings(BaseSettings): 39 | project_name: str = 'ModelCI' 40 | backend_cors_origins: str = '*' 41 | server_host: str = 'localhost' 42 | server_port: int = 8000 43 | secret_key: SecretBytes = SecretBytes(os.urandom(32)) 44 | access_token_expire_minutes: int = 60 * 24 * 8 # 60 minutes * 24 hours * 8 days = 8 days 45 | 46 | class Config: 47 | env_file = Path(__file__).absolute().parent / '.env' 48 | 49 | @property 50 | def server_url(self): 51 | return f'http://{self.server_host}:{self.server_port}' 52 | 53 | @property 54 | def api_v1_prefix(self): 55 | return f'http://{self.server_host}:{self.server_port}{API_V1_STR}' 56 | 57 | 58 | service_settings = ServiceSettings() 59 | db_settings = DBSettings() 60 | app_settings = AppSettings() 61 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tensorrt:19.10-py3 2 | 3 | # set system environment 4 | ENV CONDA_ROOT=/miniconda 5 | ENV CONDA_PREFIX=${CONDA_ROOT} 6 | ENV PATH=${CONDA_ROOT}/bin:${PATH} 7 | ENV CONDA_AUTO_UPDATE_CONDA=false 8 | ENV TRITONIS_VERSION=1.8.0 9 | ENV PYTHONPATH=/content/ 10 | 11 | COPY . /content 12 | 13 | # Change all files EOF to LF 14 | RUN find /content/scripts -type f -exec sed -i -e 's/^M$//' {} \; 15 | 16 | RUN apt-get update -y \ 17 | && apt-get install -y curl=7.58.0-2ubuntu3.8 zip=3.0-11build1 \ 18 | && apt-get clean \ 19 | && rm -rf /var/lib/apt/lists/* 20 | 21 | # Install Miniconda 22 | RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh -o /miniconda.sh \ 23 | && sh /miniconda.sh -b -p "${CONDA_ROOT}" \ 24 | && rm /miniconda.sh 25 | 26 | # Install Conda environment 27 | RUN conda env update --name base -f /content/environment.yml \ 28 | && conda install pytorch=1.5.0 torchvision cudatoolkit="${CUDA_VERSION}" -y -c pytorch \ 29 | && conda install tensorflow-gpu=2.1.0 -y \ 30 | && pip install tensorflow-serving-api==2.1.0 31 | 32 | # Install TRTIS 33 | RUN mkdir -p ~/tmp 34 | WORKDIR /root/tmp 35 | RUN curl -LJ https://github.com/NVIDIA/triton-inference-server/releases/download/v${TRITONIS_VERSION}/v${TRITONIS_VERSION}_ubuntu1804.clients.tar.gz \ 36 | -o tritonis.clients.tar.gz \ 37 | && tar xzf tritonis.clients.tar.gz \ 38 | && pip install ~/tmp/python/tensorrtserver-${TRITONIS_VERSION}-py2.py3-none-linux_x86_64.whl 39 | 40 | # Uninstall build dependency 41 | RUN apt-get remove -y curl wget \ 42 | && apt-get clean \ 43 | && apt-get autoremove -y \ 44 | && rm -rf /var/lib/apt/lists/* 45 | 46 | # remove cache 47 | RUN conda clean -ya \ 48 | && rm -rf ~/.cache/pip \ 49 | && rm -rf ~/tmp 50 | 51 | WORKDIR /content 52 | 53 | ENTRYPOINT ["/bin/bash"] 54 | -------------------------------------------------------------------------------- /modelci/hub/converter/to_torchscript.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at: 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | 17 | import torch 18 | from pathlib import Path 19 | from modelci.utils import Logger 20 | 21 | logger = Logger('converter', welcome=False) 22 | 23 | 24 | class TorchScriptConverter(object): 25 | supported_framework = ["pytorch"] 26 | 27 | @staticmethod 28 | def from_pytorch(model: torch.nn.Module, save_path: Path, override: bool = False): 29 | """Convert a PyTorch nn.Module into TorchScript. 30 | """ 31 | if save_path.with_suffix('.zip').exists(): 32 | if not override: # file exist yet override flag is not set 33 | logger.info('Use cached model') 34 | return True 35 | model.eval() 36 | try: 37 | traced = torch.jit.script(model) 38 | save_path.parent.mkdir(parents=True, exist_ok=True) 39 | traced.save(str(save_path.with_suffix('.zip'))) 40 | logger.info('Torchscript format converted successfully') 41 | return True 42 | except Exception: 43 | # TODO catch different types of error 44 | logger.warning("This model is not supported as torchscript format") 45 | return False 46 | -------------------------------------------------------------------------------- /modelci/types/bo/static_profile_result_bo.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from modelci.types.do import StaticProfileResultDO 4 | 5 | 6 | class StaticProfileResultBO(object): 7 | def __init__(self, parameters: int, flops: int, memory: int, mread: int, mwrite: int, mrw: int): 8 | """Initializer. 9 | 10 | Args: 11 | parameters (int): number of parameters. 12 | flops (int): total floating point operations to run the model. 13 | memory (int): memory occupation in Byte. 14 | mread (int): memory read size. 15 | mwrite (int): memory write size. 16 | mrw (int): memory read write size. 17 | """ 18 | self.parameters = parameters 19 | self.flops = flops 20 | self.memory = memory 21 | self.mread = mread 22 | self.mwrite = mwrite 23 | self.mrw = mrw 24 | 25 | def to_static_profile_result_po(self): 26 | """Convert business object to plain object. 27 | """ 28 | static_profile_result_po = StaticProfileResultDO( 29 | parameters=self.parameters, flops=self.flops, 30 | memory=self.memory, mread=self.mread, mwrite=self.mwrite, 31 | mrw=self.mrw) 32 | return static_profile_result_po 33 | 34 | @staticmethod 35 | def from_static_profile_result_po(spr_po: Optional[StaticProfileResultDO]): 36 | """Create business object from a plain object. 37 | 38 | Args: 39 | spr_po (Optional[StaticProfileResultPO]): static profiling result plain object. Default to None. 40 | """ 41 | # spr_po nullable 42 | if spr_po is None: 43 | return None 44 | 45 | spr = StaticProfileResultBO(parameters=spr_po.parameters, flops=spr_po.flops, memory=spr_po.memory, 46 | mread=spr_po.mread, mwrite=spr_po.mwrite, mrw=spr_po.mrw) 47 | return spr 48 | -------------------------------------------------------------------------------- /modelci/hub/model_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 2/17/2021 7 | 8 | Load model from weight files stored in local disk to the memory. Such loaded model can be used for inference. 9 | 10 | This file provides a unify API for loading models that stores in different formats (e.g. PyTorch pickle, 11 | TensorFlow saved model). 12 | """ 13 | import os 14 | from pathlib import Path 15 | 16 | import tensorflow as tf 17 | import torch 18 | import joblib 19 | from modelci.types.models.common import Framework, Engine 20 | from modelci.types.models.mlmodel import MLModel 21 | 22 | 23 | def joblib_loader(model_weight_path: Path): 24 | """Load from sklearn api of XGBoost or LightGBM, and sklearn model. 25 | """ 26 | return joblib.load(model_weight_path) 27 | 28 | 29 | def pytorch_loader(model_weight_path: Path): 30 | return torch.load(model_weight_path) 31 | 32 | 33 | def savedmodel_loader(model_weight_path: Path): 34 | """Load from TensorFlow saved model or HDF5 model. 35 | 36 | References: 37 | https://www.tensorflow.org/tutorials/keras/save_and_load 38 | """ 39 | return tf.keras.models.load_model(model_weight_path) 40 | 41 | 42 | def load(model: MLModel): 43 | # TODO only support torch.save, saved_model, and joblib serialization for the time being 44 | """A unify API to load model weight files in various format. 45 | 46 | Args: 47 | model: MLModel 48 | """ 49 | if model.framework == Framework.PyTorch and model.engine in (Engine.PYTORCH, Engine.NONE): # PyTorch 50 | return pytorch_loader(model.saved_path) 51 | elif model.framework == Framework.TensorFlow and model.engine in (Engine.TFS, Engine.NONE): # TensorFlow 52 | return savedmodel_loader(model.saved_path) 53 | elif model.framework in (Framework.Sklearn, Framework.XGBoost, Framework.LightGBM) and model.engine == 'NONE': # sklearn 54 | return joblib_loader(model.saved_path) 55 | -------------------------------------------------------------------------------- /modelci/app/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 6/19/2020 7 | """ 8 | import os 9 | import sys 10 | from typing import Optional 11 | 12 | import uvicorn 13 | from fastapi import FastAPI 14 | from starlette.middleware.cors import CORSMiddleware 15 | 16 | from modelci import config 17 | from modelci.app.experimental.api import api_router as api_rounter_exp 18 | from modelci.app.v1.api import api_router 19 | 20 | settings = config.AppSettings() 21 | app = FastAPI(title=settings.project_name, openapi_url="/api/v1/openapi.json") 22 | 23 | # CORS 24 | origins = [] 25 | 26 | # Set all CORS enabled origins 27 | if settings.backend_cors_origins: 28 | origins_raw = settings.backend_cors_origins.split(",") 29 | for origin in origins_raw: 30 | use_origin = origin.strip().replace('"', '') 31 | origins.append(use_origin) 32 | app.add_middleware( 33 | CORSMiddleware, 34 | allow_origins=origins, 35 | allow_credentials=True, 36 | allow_methods=["*"], 37 | allow_headers=["*"], 38 | ) 39 | 40 | app.include_router(api_router, prefix=config.API_V1_STR) 41 | app.include_router(api_rounter_exp, prefix=config.API_EXP_STR) 42 | 43 | 44 | def _app_start_detach(output_file: Optional[str] = None): 45 | """Start FastAPI as a detached process. 46 | 47 | This is a double fork approach. 48 | 49 | Reference: 50 | https://stackoverflow.com/a/49123627 51 | """ 52 | 53 | if os.fork() != 0: # do a double fork, 54 | return 55 | 56 | if output_file: 57 | # redirect stdout, stderr to a file 58 | output_file = open(output_file, 'a') 59 | sys.stdout = output_file 60 | sys.stderr = output_file 61 | uvicorn.run(app, host=settings.server_host, port=settings.server_port) 62 | if output_file: 63 | output_file.close() 64 | 65 | 66 | if __name__ == '__main__': 67 | uvicorn.run(app, host=settings.server_host, port=settings.server_port) 68 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@alifd/scaffold-lite", 3 | "version": "0.1.6", 4 | "description": "Lightweight template, using TypeScript, contains only the basic Layout. ", 5 | "dependencies": { 6 | "@alifd/next": "^1.18.16", 7 | "@alifd/theme-design-pro": "^0.x", 8 | "@formily/antd": "^1.3.12", 9 | "@formily/antd-components": "^1.3.12", 10 | "@gitgraph/react": "^1.5.4", 11 | "@hikeman/react-graphviz": "^0.0.7", 12 | "@material-ui/core": "^4.11.3", 13 | "@material-ui/icons": "^4.11.2", 14 | "@rjsf/core": "^2.4.1", 15 | "@rjsf/material-ui": "^2.4.1", 16 | "antd": "^4.12.2", 17 | "axios": "^0.19.2", 18 | "env-cmd": "^10.1.0", 19 | "generate-schema": "^2.6.0", 20 | "graphviz-react": "^1.1.1", 21 | "moment": "^2.29.1", 22 | "prop-types": "^15.7.2", 23 | "react": "^16.4.1", 24 | "react-dom": "^16.4.1", 25 | "reqwest": "^2.0.5", 26 | "styled-components": "^5.2.1" 27 | }, 28 | "devDependencies": { 29 | "@ice/spec": "^1.0.0", 30 | "@types/d3-graphviz": "^2.6.6", 31 | "@types/react": "^16.9.41", 32 | "@types/react-dom": "^16.9.8", 33 | "build-plugin-antd": "^0.1.0", 34 | "build-plugin-fusion": "^0.1.0", 35 | "build-plugin-moment-locales": "^0.1.0", 36 | "eslint": "^6.0.1", 37 | "ice.js": "^1.0.0", 38 | "prettier": "^2.0.5", 39 | "stylelint": "^13.2.0" 40 | }, 41 | "scripts": { 42 | "start": "env-cmd --silent icejs start", 43 | "build": "icejs build", 44 | "lint": "npm run eslint && npm run stylelint", 45 | "eslint": "eslint --cache --ext .js,.jsx ./", 46 | "stylelint": "stylelint ./**/*.scss" 47 | }, 48 | "ideMode": { 49 | "name": "ice-react" 50 | }, 51 | "iceworks": { 52 | "type": "react", 53 | "adapter": "adapter-react-v3" 54 | }, 55 | "engines": { 56 | "node": ">=8.0.0" 57 | }, 58 | "repository": { 59 | "type": "git", 60 | "url": "https://github.com/alibaba-fusion/materials/tree/master/scaffolds/scaffold-lite" 61 | }, 62 | "private": true, 63 | "originTemplate": "@alifd/scaffold-lite" 64 | } 65 | -------------------------------------------------------------------------------- /modelci/data_engine/preprocessor/image_classification.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | from modelci.hub.utils import TensorRTModelInputFormat 5 | from modelci.types.trtis_objects import DataType 6 | 7 | 8 | def preprocess( 9 | img: Image.Image, 10 | format: TensorRTModelInputFormat, 11 | dtype: DataType, 12 | c: int, 13 | h: int, 14 | w: int, 15 | scaling: str 16 | ): 17 | """ 18 | Pre-process an image to meet the size, type and format 19 | requirements specified by the parameters. 20 | 21 | Arguments: 22 | img (Image.Image): Image object to be predicted. 23 | format (TensorRTModelInputFormat): Format of input tensor. 24 | dtype (DataType): Data type of input tensor. 25 | c (int): Channel size. 26 | h (int): Height size. 27 | w (int): Weight size. 28 | scaling (str): Image scaling algorithm. Supported one of `'INCEPTION'`, `'VGG'` and `None`. 29 | """ 30 | if c == 1: 31 | sample_img = img.convert('L') 32 | else: 33 | sample_img = img.convert('RGB') 34 | 35 | resized_img = sample_img.resize((w, h), Image.BILINEAR) 36 | resized = np.array(resized_img) 37 | if resized.ndim == 2: 38 | resized = resized[:, :, np.newaxis] 39 | 40 | typed = resized.astype(dtype) 41 | 42 | if scaling == 'INCEPTION': 43 | scaled = (typed / 128) - 1 44 | elif scaling == 'VGG': 45 | if c == 1: 46 | scaled = typed - np.asarray((128,), dtype=dtype) 47 | else: 48 | scaled = typed - np.asarray((123, 117, 104), dtype=dtype) 49 | else: 50 | scaled = typed 51 | 52 | # Swap to CHW if necessary 53 | if format == TensorRTModelInputFormat.FORMAT_NCHW: 54 | ordered = np.transpose(scaled, (2, 0, 1)) 55 | else: 56 | ordered = scaled 57 | 58 | # Channels are in RGB order. Currently model configuration data 59 | # doesn't provide any information as to other channel orderings 60 | # (like BGR) so we just assume RGB. 61 | return ordered 62 | -------------------------------------------------------------------------------- /frontend/src/layouts/BasicLayout/components/PageNav/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import PropTypes from 'prop-types'; 3 | import { Link, withRouter } from 'ice'; 4 | import { Nav } from '@alifd/next'; 5 | import { asideMenuConfig } from '../../menuConfig'; 6 | 7 | const SubNav = Nav.SubNav; 8 | const NavItem = Nav.Item; 9 | 10 | export interface IMenuItem { 11 | name: string; 12 | path: string; 13 | icon?: string; 14 | children?: IMenuItem[]; 15 | } 16 | 17 | function getNavMenuItems(menusData: any[]) { 18 | if (!menusData) { 19 | return []; 20 | } 21 | 22 | return menusData 23 | .filter((item) => item.name && !item.hideInMenu) 24 | .map((item, index) => { 25 | return getSubMenuOrItem(item, index); 26 | }); 27 | } 28 | 29 | function getSubMenuOrItem(item: IMenuItem, index: number) { 30 | if (item.children && item.children.some((child) => child.name)) { 31 | const childrenItems = getNavMenuItems(item.children); 32 | if (childrenItems && childrenItems.length > 0) { 33 | const subNav = ( 34 | 35 | {childrenItems} 36 | 37 | ); 38 | 39 | return subNav; 40 | } 41 | return null; 42 | } 43 | const navItem = ( 44 | 45 | {item.name} 46 | 47 | ); 48 | 49 | return navItem; 50 | } 51 | 52 | const Navigation = (props, context) => { 53 | const { location } = props; 54 | const { pathname } = location; 55 | const { isCollapse } = context; 56 | 57 | return ( 58 | 70 | ); 71 | }; 72 | 73 | Navigation.contextTypes = { 74 | isCollapse: PropTypes.bool, 75 | }; 76 | 77 | const PageNav = withRouter(Navigation); 78 | 79 | export default PageNav; 80 | -------------------------------------------------------------------------------- /modelci/cli/archive/model_cli.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | 15 | 16 | # TODO(ZHZ): remove the file after moving all functions to model_manager.py 17 | import click 18 | import requests 19 | 20 | from modelci.config import app_settings 21 | from modelci.types.models import MLModel 22 | from modelci.ui import model_view 23 | from modelci.utils.misc import remove_dict_null 24 | 25 | 26 | @click.command() 27 | @click.argument('name', type=click.STRING, required=False) 28 | @click.option( 29 | '-f', '--framework', 30 | type=click.Choice(['TensorFlow', 'PyTorch'], case_sensitive=False), 31 | help='Model framework.' 32 | ) 33 | @click.option( 34 | '-e', '--engine', 35 | type=click.Choice(['NONE', 'TFS', 'TORCHSCRIPT', 'ONNX', 'TRT', 'TVM', 'CUSTOMIZED'], case_sensitive=False), 36 | help='Model serving engine.' 37 | ) 38 | @click.option('-v', '--version', type=click.INT, help='Model version.') 39 | @click.option('-a', '--all', 'list_all', type=click.BOOL, is_flag=True, help='Show all models.') 40 | @click.option('-q', '--quiet', type=click.BOOL, is_flag=True, help='Only show numeric IDs.') 41 | def models(name, framework, engine, version, list_all, quiet): 42 | payload = remove_dict_null({'name': name, 'framework': framework, 'engine': engine, 'version': version}) 43 | with requests.get(f'{app_settings.api_v1_prefix}/model', params=payload) as r: 44 | model_list = r.json() 45 | model_view([MLModel.parse_obj(model) for model in model_list], list_all=list_all, quiet=quiet) 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /modelci/hub/deployer/tfs/README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow Serving 2 | Deploy Keras model with TensorFlow Serving. 3 | CPU version is based on the latest tensorflow version (2.0.0). 4 | GPU version is not supported yet due to some TF/CUDA version issues. 5 | 6 | ## Usage 7 | 8 |
    9 |
  1. Get pre-trained Keras model 10 | 11 | We assume you have setup the MongoDB and all environment variables [install](/README.md#installation). 12 | See `modelci/hub/init_data.py`. 13 | For example, running 14 | ```shell script 15 | python modelci/hub/init_data.py export --model resnet50 --framework tensorflow 16 | ``` 17 | Models will be saved at `~/.modelci/ResNet50/tensorflow-tfs/` directory. 18 | **Note**: You do not need to rerun the above code if you have done so for [TensorRT](/modelci/hub/deployer/trt). 19 | 20 |
  2. 21 |
  3. Deploy model 22 | 23 | ```shell script 24 | sh deploy_model_cpu.sh {MODEL_NAME} {GRPC_PORT} {REST_API_PORT} 25 | ``` 26 | Or on a gpu 27 | ```shell script 28 | sh deploy_model_gpu.sh {MODEL_NAME} {GRPC_PORT} {REST_API_PORT} 29 | ``` 30 | You may check the deployed model using `save_model_cli` from https://www.tensorflow.org/guide/saved_model 31 | ```shell script 32 | saved_model_cli show --dir {PATH_TO_SAVED_MODEL}/{MODEL_NAME}/{MODEL_VARIANT}/{MODEL_VERSION} --all 33 | ``` 34 | 35 |
  4. 36 |
  5. Testing 37 | 38 | ```shell script 39 | # 4.1. MAKE REST REQUEST 40 | python rest_request.py --model {MODEL_NAME} --port {PORT} 41 | # 4.2. MAKE GRPC REQUEST 42 | python grpc_request.py --model {MODEL_NAME} --input_name {INPUT_NAME} 43 | ``` 44 | 45 |
  6. 46 |
47 | 48 | ## Example 49 | Let's deploy a pre-trained ResNet50 model (at working directory `modelci/hub`) 50 | ```shell script 51 | python init_data.py export --model resnet50 --framework tensorflow 52 | bash deploy_model_gpu.sh resnet50 8500 8501 53 | saved_model_cli show --dir ./resnet50/1 --all 54 | python rest_client.py --model resnet50 --port 8501 55 | python grpc_client.py --model resnet50 --input_name input_1 56 | 57 | # FOR TESTING THE LATENCY AND THROUGHPUT 58 | python rest_client.py --model resnet50 --port 8501 -t 59 | python grpc_client.py --model resnet50 --input_name input_1 -t 60 | ``` 61 | 62 | -------------------------------------------------------------------------------- /modelci/types/do/model_do.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Module for model plain object.""" 3 | 4 | from mongoengine import Document, EmbeddedDocument 5 | from mongoengine.fields import ( 6 | DateTimeField, 7 | EmbeddedDocumentField, 8 | EmbeddedDocumentListField, 9 | FileField, 10 | IntField, 11 | ListField, 12 | StringField, 13 | DictField 14 | ) 15 | 16 | from .profile_result_do import ProfileResultDO 17 | 18 | 19 | class IOShapeDO(EmbeddedDocument): 20 | name = StringField() 21 | shape = ListField(IntField(), required=True) 22 | dtype = StringField(required=True) 23 | format = IntField(required=True) 24 | 25 | 26 | class ModelDO(Document): 27 | """Model Plain Object. 28 | 29 | The primary key of the model plain object is (engine, name, version) pair. 30 | """ 31 | 32 | # Model architecture 33 | architecture = StringField(required=True) 34 | # Supported engine enum (aka framework, e.g.: TensorFlow (0) or PyTorch (1)) 35 | framework = IntField(required=True) 36 | # ONNX or TensorRT 37 | engine = IntField(required=True) 38 | # Version of the model. e.g.: `1` 39 | version = IntField(required=True) 40 | # Dataset 41 | dataset = StringField(required=True) 42 | # Model evaluation metric 43 | metric = DictField(required=True) 44 | # Model weights 45 | weight = FileField() 46 | # Model task 47 | task = IntField(required=True) 48 | # Parent Model ID 49 | parent_model_id = StringField() 50 | # inputs standard 51 | inputs = EmbeddedDocumentListField(IOShapeDO) 52 | # outputs standard 53 | outputs = EmbeddedDocumentListField(IOShapeDO) 54 | # Profile result 55 | profile_result = EmbeddedDocumentField(ProfileResultDO) 56 | # Status enum value 57 | status = IntField(required=True) 58 | # Model Status enum value 59 | model_status = ListField() 60 | # Model provider (uploader) 61 | creator = StringField(required=True) 62 | # Creation time of this record 63 | create_time = DateTimeField(required=True) 64 | 65 | meta = { 66 | 'indexes': [ 67 | {'fields': ('engine', 'architecture', 'framework', 'version', 'task'), 'unique': True} 68 | ] 69 | } 70 | -------------------------------------------------------------------------------- /modelci/hub/client/sample.py: -------------------------------------------------------------------------------- 1 | """ 2 | An example usage of profiler.py 3 | @author huangyz0918 4 | """ 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | 9 | from modelci.hub.client.tfs_client import CVTFSClient 10 | from modelci.hub.manager import retrieve_model 11 | from modelci.hub.profiler import Profiler 12 | from modelci.types.bo import Engine, Framework 13 | 14 | # import torch 15 | # from trt_client import CVTRTClient 16 | # from torch_client import CVTorchClient 17 | # from onnx_client import CVONNXClient 18 | 19 | if __name__ == "__main__": 20 | # Fake data for testing 21 | data_path = './data/cat.jpg' 22 | 23 | # for TensorFlow Serving 24 | with open(data_path, 'rb') as f: 25 | test_img_bytes = f.read() 26 | 27 | # for TensorRT Serving 28 | test_img = Image.open(data_path) 29 | 30 | # for TorchScript and ONNX 31 | test_img_ndarray: np.ndarray = cv2.imread(data_path) 32 | # input = torch.randn(1, 3, 224, 224) 33 | 34 | # init clients for different serving platforms, you can custom a client by implementing the BaseModelInspector class. 35 | model_bo = retrieve_model(architecture_name='ResNet50', framework=Framework.PYTORCH, 36 | engine=Engine.TORCHSCRIPT)[0] 37 | 38 | tfs_client = CVTFSClient( 39 | test_img_bytes, 40 | batch_num=100, 41 | batch_size=32, 42 | asynchronous=False, 43 | model_info=model_bo, 44 | ) 45 | # trt_client = CVTRTClient(test_img, batch_num=100, batch_size=32, asynchronous=False) 46 | # torch_client = CVTorchClient(test_img_ndarray, batch_num=100, batch_size=32, asynchronous=False) 47 | # onnx_client = CVONNXClient(test_img_ndarray, batch_num=100, batch_size=32, asynchronous=False) 48 | 49 | # model_path = '../resnet50_explicit_path.yml' 50 | # register_model_from_yaml(model_path) 51 | profiler = Profiler(model_info=model_bo, server_name='tfs', inspector=tfs_client) 52 | profiler.diagnose(device='cuda:0') 53 | # profiler.diagnose(batch_size=1) # you can use a new batch_size to overwrite the client's. 54 | # profiler.diagnose_all_batches([1, 2, 4, 8, 16, 32]) # run all 1, 2, 4, 8, 16, 32 batch size 55 | 56 | # profiler.auto_diagnose([2, 4, 16]) 57 | -------------------------------------------------------------------------------- /scripts/generate_env.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | from typing import TextIO 5 | 6 | env_dir = Path(__file__).absolute().parents[1] / 'modelci' 7 | 8 | 9 | def _decode_dotenv(env_file: TextIO): 10 | lines = env_file.readlines() 11 | 12 | env_data = dict() 13 | for line in lines: 14 | key, value = line.strip().split('=') 15 | env_data[key] = value 16 | 17 | return env_data 18 | 19 | 20 | def _encode_dotenv(env_data: dict, env_file: TextIO): 21 | lines = [f'{k}={v}' for k, v in env_data.items()] 22 | env_file.write(f'{os.linesep}'.join(lines)) 23 | 24 | 25 | if __name__ == '__main__': 26 | backend_env, frontend_env = dict(), dict() 27 | 28 | print('Read env-backend.env ...') 29 | with open(env_dir / 'env-backend.env') as f: 30 | backend_env.update(_decode_dotenv(f)) 31 | 32 | print('Read env-mongodb.env ...') 33 | with open(env_dir / 'env-mongodb.env') as f: 34 | backend_env.update(_decode_dotenv(f)) 35 | 36 | print(f'Read env-frontend.env ...') 37 | with open(env_dir / 'env-frontend.env') as f: 38 | frontend_env.update(_decode_dotenv(f)) 39 | 40 | backend_url = f"{backend_env.get('SERVER_HOST', 'localhost')}:{backend_env.get('SERVER_PORT', 8000)}" 41 | frontend_url = f"{frontend_env.get('HOST', 'localhost')}:{frontend_env.get('PORT', 3333)}" 42 | 43 | # Put frontend url into backend CORS origins 44 | raw_cors_origins = backend_env.get('BACKEND_CORS_ORIGINS', '') 45 | cors_origins = set(filter(lambda origin: origin, raw_cors_origins.split(','))) 46 | cors_origins.add(frontend_url) 47 | backend_env['BACKEND_CORS_ORIGINS'] = ','.join(cors_origins) 48 | 49 | # Put backend url into frontend env 50 | frontend_env['REACT_APP_BACKEND_URL'] = backend_url 51 | 52 | # save to backend .env 53 | print(f'Write .env for backend with setup:\n {json.dumps(backend_env, indent=2)}') 54 | with open(env_dir / '.env', 'w') as f: 55 | _encode_dotenv(backend_env, f) 56 | 57 | # save to frontend .env 58 | print(f'Write .env for frontend with setup:\n {json.dumps(frontend_env, indent=2)}') 59 | with open(env_dir.parent / 'frontend/.env', 'w') as f: 60 | _encode_dotenv(frontend_env, f) 61 | -------------------------------------------------------------------------------- /frontend/src/pages/Visualizer/utils/type.tsx: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/camelcase */ 2 | export type ModelStructure = { 3 | layer: object; 4 | connection: object; 5 | } 6 | 7 | export interface IFinetuneConfigObject { 8 | model: string; 9 | data_module: object; 10 | min_epochs: number; 11 | max_epochs: number; 12 | optimizer_type: string; 13 | optimizer_property: object; 14 | lr_scheduler_type: string; 15 | lr_scheduler_property: object; 16 | loss_function: string; 17 | } 18 | 19 | export type FinetuneConfig = 20 | | Partial 21 | | IFinetuneConfigObject; 22 | 23 | export const DEFAULT_FINETUNE_CONFIG: FinetuneConfig = { 24 | model: '', 25 | data_module: {dataset_name: 'CIFAR10', batch_size: 4}, 26 | min_epochs: 10, 27 | max_epochs: 15, 28 | optimizer_type: 'Adam', 29 | optimizer_property: { 30 | betas: [0.9, 0.99], 31 | eps: 1e-08, 32 | weight_decay: 0, 33 | amsgrad: false 34 | }, 35 | lr_scheduler_type: 'StepLR', 36 | lr_scheduler_property: {lr: 0.01, step_size: 30}, 37 | loss_function: 'torch.nn.CrossEntropyLoss' 38 | } 39 | 40 | 41 | export const DEFAULT_CONFIG_SCHEMA = { 42 | 'type': 'object', 43 | 'properties': { 44 | 'dataset': { 45 | 'key': 'dataset', 46 | 'type': 'string', 47 | 'title': 'dataset', 48 | 'name': 'Select Dataset', 49 | 'x-component': 'select', 50 | 'enum': [ 51 | { 52 | 'label': 'CIFAR10', 53 | 'value': 'CIFAR10' 54 | }, 55 | { 56 | 'label': 'MNIST', 57 | 'value': 'MNIST' 58 | }, 59 | { 60 | 'label': 'ImageNet', 61 | 'value': 'ImageNet' 62 | }, 63 | { 64 | 'label': 'Customized', 65 | 'value': 'Customized' 66 | } 67 | ], 68 | 'default': 'CIFAR10' 69 | }, 70 | 'upload': { 71 | 'key': 'upload', 72 | 'type': 'array', 73 | 'title': 'Upload Dataset', 74 | 'name': 'upload', 75 | 'text': 'Click or drag file to this area to upload', 76 | 'x-component-props': { 77 | 'listType': 'dragger', 78 | 'locale': true, 79 | 'locale.uploadText': 'test' 80 | }, 81 | 'x-component': 'upload' 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /modelci/types/do/dynamic_profile_result_do.py: -------------------------------------------------------------------------------- 1 | from mongoengine import EmbeddedDocument 2 | from mongoengine.fields import StringField, IntField, FloatField, ListField, DateTimeField 3 | 4 | 5 | class DynamicProfileResultDO(EmbeddedDocument): 6 | """ 7 | Dynamic profiling result plain object. 8 | 9 | The primary key of the document is (ip, device_id) pair. 10 | """ 11 | # IP address of the cluster node 12 | ip = StringField(required=True) 13 | # Device ID, e.g. cpu, cuda:0, cuda:1 14 | device_id = StringField(required=False) 15 | # Device name, e.g. Tesla K40c 16 | device_name = StringField(required=True) 17 | # Batch size 18 | batch = IntField(min_value=1, required=True) 19 | # Main or GPU memory consumption in Byte for loading and initializing the model 20 | total_memory = IntField(min_value=0, required=True) 21 | # GPU memory consumption in Byte for processing batch data 22 | memory_usage = IntField(min_value=0, required=True) 23 | # GPU utilization rate for processing batch data 24 | utilization = FloatField(min_value=0, max_value=1, required=True) 25 | # Min, max and avg model loading and initialization latencies 26 | initialization_latency = ListField(FloatField(min_value=0), required=True) 27 | # Min, max and avg preprocess latencies 28 | preprocess_latency = ListField(FloatField(min_value=0), required=True) 29 | # Min, max and avg inference latencies 30 | inference_latency = ListField(FloatField(min_value=0), required=True) 31 | # Min, max and avg postprocess latencies 32 | postprocess_latency = ListField(FloatField(min_value=0), required=True) 33 | # Batch formation QPS 34 | batch_formation_throughput = FloatField(min_value=0, required=True) 35 | # Batch preprocess QPS 36 | preprocess_throughput = FloatField(min_value=0, required=True) 37 | # Batch inference QPS 38 | inference_throughput = FloatField(min_value=0, required=True) 39 | # Batch postprocess QPS 40 | postprocess_throughput = FloatField(min_value=0, required=True) 41 | # Creation time of this record 42 | create_time = DateTimeField(required=True) 43 | 44 | meta = { 45 | 'indexes': [ 46 | {'fields': ('ip', 'device_id'), 'unique': True} 47 | ], 48 | } 49 | -------------------------------------------------------------------------------- /modelci/hub/client/onnx_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: huangyz0918 3 | Author: Li Yuanming 4 | Desc: template client for ONNX of ResNet-50 5 | Date: 26/04/2020 6 | """ 7 | 8 | import json 9 | import time 10 | 11 | import grpc 12 | import torch 13 | from torchvision import transforms 14 | 15 | from modelci.hub.deployer.config import ONNX_GRPC_PORT 16 | from modelci.metrics.benchmark.metric import BaseModelInspector 17 | from modelci.types.bo import ModelBO 18 | from modelci.types.proto.service_pb2 import InferRequest 19 | from modelci.types.proto.service_pb2_grpc import PredictStub 20 | 21 | 22 | class CVONNXClient(BaseModelInspector): 23 | SERVER_HOST = 'localhost' 24 | 25 | def __init__(self, repeat_data, model_info: ModelBO, batch_num=1, batch_size=1, asynchronous=None): 26 | super().__init__( 27 | repeat_data=repeat_data, 28 | model_info=model_info, 29 | batch_num=batch_num, 30 | batch_size=batch_size, 31 | asynchronous=asynchronous 32 | ) 33 | self.stub = PredictStub(grpc.insecure_channel(f'{self.SERVER_HOST}:{ONNX_GRPC_PORT}')) 34 | 35 | def data_preprocess(self, x): 36 | transform = transforms.Compose( 37 | [ 38 | transforms.ToPILImage(), 39 | transforms.Resize(255), 40 | transforms.CenterCrop(self.model_info.inputs[0].shape[2:]), 41 | transforms.ToTensor(), 42 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 43 | torch.Tensor.numpy 44 | ] 45 | ) 46 | return transform(x) 47 | 48 | def make_request(self, input_batch): 49 | meta = json.dumps( 50 | {'shape': self.model_info.inputs[0].shape[1:], 'dtype': self.model_info.inputs[0].dtype} 51 | ) 52 | request = InferRequest() 53 | request.model_name = self.model_info.architecture 54 | request.meta = meta 55 | request.raw_input.extend(list(map(bytes, input_batch))) 56 | return request 57 | 58 | def check_model_status(self) -> bool: 59 | """TODO: wait for status API for TorchServing.""" 60 | time.sleep(5) 61 | return True 62 | 63 | def infer(self, request): 64 | self.stub.Infer(request) 65 | -------------------------------------------------------------------------------- /tests/test_onnx_conversion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) NTU_CAP 2021. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at: 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 14 | # or implied. See the License for the specific language governing 15 | # permissions and limitations under the License. 16 | 17 | import unittest 18 | 19 | import numpy as np 20 | import onnxruntime as rt 21 | import torch 22 | import lightgbm as lgb 23 | from sklearn.datasets import load_breast_cancer 24 | 25 | from modelci.hub.converter import convert 26 | from modelci.types.bo import IOShape 27 | 28 | 29 | class TestONNXConverter(unittest.TestCase): 30 | 31 | @classmethod 32 | def setUpClass(cls): 33 | X_bc, y_bc = load_breast_cancer(return_X_y=True) 34 | nrows = 15000 35 | X_bc: np.ndarray = X_bc[0:nrows] 36 | y_bc: np.ndarray = y_bc[0:nrows] 37 | model = lgb.LGBMRegressor(n_estimators=3, min_child_samples=1) 38 | model.fit(X_bc, y_bc) 39 | inputs_bc = [IOShape(shape=[-1, X_bc.shape[1]], dtype=float, name='input_0')] 40 | cls.onnx_model = convert(model, 'lightgbm', 'onnx', inputs=inputs_bc, optimize=False) 41 | sess = rt.InferenceSession(cls.onnx_model.SerializeToString()) 42 | cls.sample_input = torch.rand(2, X_bc.shape[1], dtype=torch.float32) 43 | cls.onnx_model_predict = sess.run(None, {'input_0': cls.sample_input.numpy()})[0].flatten() 44 | 45 | # noinspection DuplicatedCode 46 | def test_onnx_to_pytorch(self): 47 | torch_model = convert(self.onnx_model, 'onnx', 'pytorch') 48 | torch_model.eval() 49 | torch_model_predict = torch_model(self.sample_input).data.numpy().flatten() 50 | np.testing.assert_allclose(self.onnx_model_predict, torch_model_predict, rtol=1e-05, atol=1e-05) 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /.github/workflows/run_test.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths-ignore: 7 | - 'frontend/**' 8 | pull_request: 9 | branches: [ master ] 10 | paths-ignore: 11 | - 'frontend/**' 12 | jobs: 13 | test: 14 | runs-on: ubuntu-18.04 15 | services: 16 | mongodb: 17 | image: mongo 18 | ports: 19 | - 27017:27017 20 | env: 21 | MONGO_HOST: localhost 22 | MONGO_PORT: 27017 23 | PYTHONPATH: . 24 | steps: 25 | 26 | - name: Set up Python 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: '3.7' 30 | 31 | - name: Configure MongoDB 32 | run: mongo modelci --eval 'db.createUser({user:"modelci",pwd:"modelci@2020",roles:["readWrite"]});' 33 | 34 | # refer to https://github.com/dmlc/gluon-nlp/blob/master/.github/workflows/unittests.yml 35 | - name: Install tvm 36 | run: | 37 | sudo apt-get update 38 | sudo apt-get install -y gcc libtinfo-dev zlib1g-dev build-essential cmake libedit-dev libxml2-dev libopenblas-dev ninja-build 39 | git clone https://github.com/apache/incubator-tvm tvm --recursive 40 | cd tvm 41 | mkdir -p build 42 | cp cmake/config.cmake build 43 | echo set\(USE_LLVM ON\) >> build/config.cmake 44 | echo set\(USE_GRAPH_RUNTIME ON\) >> build/config.cmake 45 | echo set\(USE_BLAS openblas\) >> build/config.cmake 46 | cd build 47 | cmake .. -G Ninja 48 | ninja 49 | cd ../python 50 | python -m pip install -U -e . 51 | cd .. 52 | 53 | - name: Checkout 54 | uses: actions/checkout@v2 55 | 56 | - name: Install dependencies 57 | run: | 58 | pip install flake8 pytest 59 | pip install . 60 | 61 | - name: Lint with flake8 62 | run: | 63 | # stop the build if there are Python syntax errors or undefined names 64 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 65 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 66 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 67 | 68 | - name: Test with pytest 69 | run: | 70 | python modelci/app/__init__.py 71 | python -m pytest tests/ 72 | -------------------------------------------------------------------------------- /modelci/persistence/mongo_db.py: -------------------------------------------------------------------------------- 1 | import mongoengine as mongo 2 | 3 | 4 | class MongoDB(object): 5 | """MongoDB connection manager. 6 | 7 | This class manages the connections made to MongoDB once the connection settings are given. Connections are held 8 | and could be closed given connection alias. 9 | """ 10 | 11 | def __init__(self, db: str = None, *, host: str = None, port: int = None, username: str = None, 12 | password: str = None, 13 | auth_source: str = 'admin', **kwargs): 14 | """Create a MongoDB connection manager. 15 | 16 | Args: 17 | db (:obj:`str`, optional): Database name. Default to None. 18 | host (:obj:`str`, optional): MongoDB host address. Default to None. 19 | port (:obj:`str`, optional): MongoDB port address. Default to None. 20 | username (:obj:`str`, optional): Username. Default to None. 21 | password (:obj:`str`, optional): Password. Default to None. 22 | auth_source (:obj:`str`, optional): Authentication source database. Default to 'admin'. 23 | """ 24 | self._conn_settings = { 25 | 'name': db, 26 | 'host': host, 27 | 'port': port, 28 | 'username': username, 29 | 'password': password, 30 | 'authentication_source': auth_source, 31 | **kwargs 32 | } 33 | self._sessions = [] 34 | 35 | self.db = None 36 | 37 | def connect(self, alias='default'): 38 | """Connect to a MongoDB session. 39 | 40 | Args: 41 | alias (:obj:`str`, optional): The alias name. Default to 'default'. 42 | """ 43 | self.db = mongo.connect(alias=alias, **self._conn_settings) 44 | self._sessions.append(alias) 45 | 46 | def close(self, alias: str = None): 47 | """Close a connection, given alias name. 48 | 49 | Args: 50 | alias (:obj:`str`, optional): The alias name. Default to None. 51 | When alias name is provided, connection with the alias name is closed. Otherwise, it closes all the 52 | tracked connections. 53 | """ 54 | if alias in self._sessions: 55 | mongo.disconnect(alias) 56 | if alias is None: 57 | for alias in self._sessions: 58 | mongo.disconnect(alias) 59 | -------------------------------------------------------------------------------- /modelci/hub/client/torch_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: huangyz0918 3 | Author: Li Yuanming 4 | Desc: template client for TorchScript of ResNet-50 5 | Date: 26/04/2020 6 | """ 7 | 8 | import json 9 | import time 10 | import grpc 11 | import torch 12 | from torchvision import transforms 13 | 14 | from modelci.hub.deployer.config import TORCHSCRIPT_GRPC_PORT 15 | from modelci.metrics.benchmark.metric import BaseModelInspector 16 | from modelci.types.models.mlmodel import MLModel 17 | from modelci.types.proto.service_pb2 import InferRequest 18 | from modelci.types.proto.service_pb2_grpc import PredictStub 19 | 20 | 21 | class CVTorchClient(BaseModelInspector): 22 | SERVER_HOST = 'localhost' 23 | 24 | def __init__(self, repeat_data, model_info: MLModel, batch_num=1, batch_size=1, asynchronous=None): 25 | super().__init__( 26 | repeat_data=repeat_data, 27 | model_info=model_info, 28 | batch_num=batch_num, 29 | batch_size=batch_size, 30 | asynchronous=asynchronous 31 | ) 32 | self.stub = PredictStub(grpc.insecure_channel(f'{self.SERVER_HOST}:{TORCHSCRIPT_GRPC_PORT}')) 33 | 34 | def data_preprocess(self, x): 35 | transform = transforms.Compose( 36 | [ 37 | transforms.ToPILImage(), 38 | transforms.Resize(255), 39 | transforms.CenterCrop(self.model_info.inputs[0].shape[2:]), 40 | transforms.ToTensor(), 41 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 42 | torch.Tensor.numpy 43 | ] 44 | ) 45 | return transform(x) 46 | 47 | def make_request(self, input_batch): 48 | meta = json.dumps( 49 | {'shape': self.model_info.inputs[0].shape[1:], 50 | 'dtype': self.model_info.inputs[0].dtype.value, 51 | 'torch_flag': True} 52 | ) 53 | request = InferRequest() 54 | request.model_name = self.model_info.architecture 55 | request.meta = meta 56 | 57 | request.raw_input.extend(list(map(bytes, input_batch))) 58 | 59 | return request 60 | 61 | def check_model_status(self) -> bool: 62 | """TODO: wait for status API for TorchServing.""" 63 | time.sleep(5) 64 | return True 65 | 66 | def infer(self, request): 67 | self.stub.Infer(request) 68 | -------------------------------------------------------------------------------- /docker/cpu.Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage1: Compile 2 | FROM ubuntu:18.04 AS compile-image 3 | 4 | # Install tvm dependencies and python 5 | WORKDIR /root 6 | 7 | RUN apt-get update \ 8 | && apt-get install -y --no-install-recommends\ 9 | gcc=4:7.4.0-1ubuntu2.3 \ 10 | libtinfo-dev=6.1-1ubuntu1.18.04 \ 11 | zlib1g-dev=1:1.2.11.dfsg-0ubuntu2 \ 12 | build-essential=12.4ubuntu1 \ 13 | cmake=3.10.2-1ubuntu2.18.04.1 \ 14 | libedit-dev=3.1-20170329-1 \ 15 | libxml2-dev=2.9.4+dfsg1-6.1ubuntu1.3 \ 16 | libopenblas-dev=0.2.20+ds-4 \ 17 | ninja-build=1.8.2-1 \ 18 | git=1:2.17.1-1ubuntu0.8 \ 19 | llvm-10-dev=1:10.0.0-4ubuntu1~18.04.2 \ 20 | wget=1.19.4-1ubuntu2.2 \ 21 | python3.7=3.7.5-2~18.04.4 \ 22 | python3.7-venv=3.7.5-2~18.04.4 \ 23 | python3.7-dev=3.7.5-2~18.04.4 \ 24 | && apt-get clean \ 25 | && rm -rf /var/lib/apt/lists/* 26 | 27 | WORKDIR /tmp 28 | RUN wget -q https://bootstrap.pypa.io/get-pip.py && python3.7 get-pip.py 29 | 30 | COPY . /content 31 | 32 | # Use venv 33 | ENV VIRTUAL_ENV=/venv 34 | RUN python3.7 -m venv $VIRTUAL_ENV 35 | ENV PATH="$VIRTUAL_ENV/bin:$PATH" 36 | 37 | # Build tvm 38 | WORKDIR /root 39 | RUN git clone https://github.com/apache/tvm tvm --recursive 40 | WORKDIR /root/tvm 41 | RUN mkdir -p build \ 42 | && cp cmake/config.cmake build \ 43 | && echo set\(USE_LLVM ON\) >> build/config.cmake \ 44 | && echo set\(USE_GRAPH_RUNTIME ON\) >> build/config.cmake \ 45 | && echo set\(USE_BLAS openblas\) >> build/config.cmake 46 | 47 | WORKDIR /root/tvm/build 48 | RUN cmake .. -G Ninja && ninja 49 | 50 | WORKDIR /root/tvm/python 51 | RUN pip install --no-cache-dir pip -U \ 52 | && python setup.py install --no-cache-dir 53 | 54 | # Install python dependencies 55 | WORKDIR /content 56 | RUN pip install --no-cache-dir . 57 | 58 | # Stage2: Build 59 | FROM ubuntu:18.04 AS build-image 60 | COPY --from=compile-image /venv /venv 61 | 62 | RUN apt-get update && apt-get install -y --no-install-recommends \ 63 | llvm-10=1:10.0.0-4ubuntu1~18.04.2 \ 64 | libopenblas-dev=0.2.20+ds-4 \ 65 | lsof=4.89+dfsg-0.1 \ 66 | libgl1-mesa-glx=20.0.8-0ubuntu1~18.04.1 \ 67 | libglib2.0-0=2.56.4-0ubuntu0.18.04.8 \ 68 | python3.7-distutils=3.7.5-2~18.04.4 \ 69 | python3.7=3.7.5-2~18.04.4 \ 70 | python3.7-venv=3.7.5-2~18.04.4 \ 71 | python3.7-dev=3.7.5-2~18.04.4 \ 72 | && apt-get clean \ 73 | && rm -rf /var/lib/apt/lists/* 74 | ENV PATH="/venv/bin:$PATH" 75 | CMD ["uvicorn", "modelci.app.main:app", "--host", "0.0.0.0", "--port", "8000"] -------------------------------------------------------------------------------- /modelci/app/experimental/endpoints/trainer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: Li Yuanming 5 | Email: yli056@e.ntu.edu.sg 6 | Date: 1/15/2021 7 | """ 8 | from typing import List 9 | 10 | from fastapi import APIRouter 11 | from starlette.responses import JSONResponse 12 | 13 | from modelci.experimental.curd.model_train import save, get_by_id, get_all, delete_by_id, delete_all 14 | from modelci.experimental.finetuner.trainer import PyTorchTrainer 15 | from modelci.experimental.model.model_train import TrainingJob, TrainingJobIn 16 | 17 | router = APIRouter() 18 | 19 | 20 | @router.post('/', status_code=201) 21 | def create_training_job(training_job: TrainingJobIn): 22 | """ 23 | Create a training job data object, and save it into the database. Then, submit the created training job 24 | (with job ID generated by database) to the training job coordinator. 25 | TODO return training job as soon as created 26 | 27 | Args: 28 | training_job (TrainingJobIn): Training job to be submitted. 29 | 30 | Returns: 31 | Submitted training job data class object. 32 | """ 33 | id_ = save(training_job_in=training_job) 34 | if id_ is not None: 35 | training_job = get_by_id(id_) 36 | trainer = PyTorchTrainer.from_training_job(training_job) 37 | trainer.start() 38 | trainer.join() 39 | return {'id': str(id_)} 40 | 41 | 42 | @router.get('/') 43 | def get_all_training_jobs() -> List[TrainingJob]: 44 | return get_all() 45 | 46 | 47 | @router.get('/{id}') 48 | def get_training_job(id: str) -> TrainingJob: 49 | """ 50 | Get a training job. 51 | 52 | Args: 53 | id (str): Training job ID. 54 | 55 | Returns: 56 | int: Affected number of records. 57 | """ 58 | return get_by_id(id) 59 | 60 | 61 | @router.delete('/{id}') 62 | def delete_training_job(id: str): 63 | """ 64 | Delete a training job. 65 | 66 | Args: 67 | id (str): Training job ID. 68 | 69 | Returns: 70 | int: Affected number of records. 71 | """ 72 | if bool(delete_by_id(id)): 73 | return JSONResponse(status_code=204) 74 | else: 75 | return JSONResponse(status_code=400, content={'message': 'Failed in deletion.'}) 76 | 77 | 78 | @router.delete('/') 79 | def delete_all_training_job(): 80 | count = delete_all() 81 | return JSONResponse(status_code=204, content={'deleted': count}) 82 | -------------------------------------------------------------------------------- /modelci/hub/deployer/environment.yml: -------------------------------------------------------------------------------- 1 | name: serving 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1 7 | - _tflow_select=2.1.0 8 | - absl-py=0.8.1 9 | - astor=0.8.0 10 | - blas=1.0 11 | - c-ares=1.15.0 12 | - ca-certificates=2019.11.27 13 | - certifi=2019.11.28 14 | - cffi=1.13.2 15 | - cudatoolkit=10.1.243 16 | - cudnn=7.6.5 17 | - cupti=10.1.168 18 | - freetype=2.9.1 19 | - google-pasta=0.1.8 20 | - grpcio=1.16.1 21 | - h5py=2.9.0 22 | - hdf5=1.10.4 23 | - intel-openmp=2019.4 24 | - jpeg=9b 25 | - keras-applications=1.0.8 26 | - keras-preprocessing=1.1.0 27 | - libedit=3.1.20181209 28 | - libffi=3.2.1 29 | - libgcc-ng=9.1.0 30 | - libgfortran-ng=7.3.0 31 | - libpng=1.6.37 32 | - libprotobuf=3.11.2 33 | - libstdcxx-ng=9.1.0 34 | - libtiff=4.1.0 35 | - markdown=3.1.1 36 | - mkl=2019.4 37 | - mkl-service=2.3.0 38 | - mkl_fft=1.0.15 39 | - mkl_random=1.1.0 40 | - ncurses=6.1 41 | - ninja=1.9.0 42 | - numpy=1.17.4 43 | - numpy-base=1.17.4 44 | - olefile=0.46 45 | - openssl=1.1.1d 46 | - pillow=6.2.1 47 | - pip=19.3.1 48 | - protobuf=3.11.2 49 | - pycparser=2.19 50 | - python=3.7.5 51 | - pytorch=1.4.0 52 | - readline=7.0 53 | - scipy=1.3.2 54 | - setuptools=42.0.2 55 | - six=1.13.0 56 | - sqlite=3.30.1 57 | - tensorflow=1.14.0 58 | - tensorflow-base=1.14.0 59 | - termcolor=1.1.0 60 | - tk=8.6.8 61 | - torchvision=0.5.0.dev20191221 62 | - werkzeug=0.16.0 63 | - wheel=0.33.6 64 | - wrapt=1.11.2 65 | - xz=5.2.4 66 | - zlib=1.2.11 67 | - zstd=1.3.7 68 | - pip: 69 | - cachetools==4.0.0 70 | - chardet==3.0.4 71 | - click==7.0 72 | - fastapi==0.45.0 73 | - gast==0.2.2 74 | - google-auth==1.10.0 75 | - google-auth-oauthlib==0.4.1 76 | - h11==0.9.0 77 | - httptools==0.0.13 78 | - idna==2.8 79 | - mongoengine==0.19.1 80 | - oauthlib==3.1.0 81 | - opt-einsum==3.1.0 82 | - pyasn1==0.4.8 83 | - pyasn1-modules==0.2.7 84 | - pydantic==1.3 85 | - pymongo==3.10.0 86 | - python-multipart==0.0.5 87 | - requests==2.22.0 88 | - requests-oauthlib==1.3.0 89 | - rsa==4.0 90 | - starlette==0.12.9 91 | - tensorboard==2.0.2 92 | - tensorflow-estimator==2.0.1 93 | - tensorflow-gpu==2.0.0 94 | - urllib3==1.25.7 95 | - uvicorn==0.11.1 96 | - uvloop==0.14.0 97 | - websockets==8.1 98 | -------------------------------------------------------------------------------- /frontend/src/layouts/BasicLayout/index.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import { ConfigProvider, Shell } from '@alifd/next'; 3 | import enUS from '@alifd/next/lib/locale/en-us'; 4 | import PageNav from './components/PageNav'; 5 | import Logo from './components/Logo'; 6 | import Footer from './components/Footer'; 7 | 8 | (function () { 9 | const throttle = function (type: string, name: string, obj: Window = window) { 10 | let running = false; 11 | 12 | const func = () => { 13 | if (running) { 14 | return; 15 | } 16 | 17 | running = true; 18 | requestAnimationFrame(() => { 19 | obj.dispatchEvent(new CustomEvent(name)); 20 | running = false; 21 | }); 22 | }; 23 | 24 | obj.addEventListener(type, func); 25 | }; 26 | 27 | throttle('resize', 'optimizedResize'); 28 | })(); 29 | 30 | export default function BasicLayout({ 31 | children, 32 | }: { 33 | children: React.ReactNode; 34 | }) { 35 | const getDevice = (width: number) => { 36 | const isPhone = 37 | typeof navigator !== 'undefined' && 38 | navigator && 39 | navigator.userAgent.match(/phone/gi); 40 | 41 | if (width < 680 || isPhone) { 42 | return 'phone'; 43 | } else if (width < 1280 && width > 680) { 44 | return 'tablet'; 45 | } else { 46 | return 'desktop'; 47 | } 48 | }; 49 | 50 | const [device, setDevice] = useState(getDevice(NaN)); 51 | window.addEventListener('optimizedResize', (e) => { 52 | setDevice(getDevice(e && e.target && e.target.innerWidth)); 53 | }); 54 | return ( 55 | 56 | 62 | 63 | 67 | 68 | 74 | 75 | 76 | 77 | 78 | 79 | {children} 80 | 81 |