├── src
    └── PyTorch2ONNX
    │   ├── data
    │       └── cat.jpg
    │   ├── utils.py
    │   └── PyTorch2ONNX_Run_in_ONNX_RUNTIME.py
├── LICENSE
├── .gitignore
└── README.md


/src/PyTorch2ONNX/data/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yulv-git/Model-Inference-Deployment/HEAD/src/PyTorch2ONNX/data/cat.jpg


--------------------------------------------------------------------------------
/src/PyTorch2ONNX/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | '''
 4 | Author: Shuangchi He / Yulv
 5 | Email: yulvchi@qq.com
 6 | Date: 2022-04-06 11:15:27
 7 | Motto: Entities should not be multiplied unnecessarily.
 8 | LastEditors: Shuangchi He
 9 | LastEditTime: 2022-04-06 11:34:36
10 | FilePath: /Model_Inference_Deployment/src/PyTorch2ONNX/utils.py
11 | Description: Modify here please
12 | '''
13 | import os
14 | 
15 | 
16 | def check_dir(path):
17 |     if not os.path.exists(path):
18 |         try:
19 |             os.mkdir(path)
20 |         except:
21 |             os.makedirs(path)
22 | 
23 | 
24 | def torchtensor2numpy(tensor):
25 |     return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     pass
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Yulv-git
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Inference_Deployment
132 | *.onnx
133 | *_ort.jpg
134 | 


--------------------------------------------------------------------------------
/src/PyTorch2ONNX/PyTorch2ONNX_Run_in_ONNX_RUNTIME.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | '''
  4 | Author: Shuangchi He / Yulv
  5 | Email: yulvchi@qq.com
  6 | Date: 2022-01-28 14:21:09
  7 | Motto: Entities should not be multiplied unnecessarily.
  8 | LastEditors: Shuangchi He
  9 | LastEditTime: 2022-04-06 11:40:23
 10 | FilePath: /Model_Inference_Deployment/src/PyTorch2ONNX/PyTorch2ONNX_Run_in_ONNX_RUNTIME.py
 11 | Description: Init from https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html
 12 |     Export a model from PyTorch to ONNX and run it using ONNX RUNTIME.
 13 | '''
 14 | import argparse
 15 | import os
 16 | import numpy as np
 17 | from PIL import Image
 18 | import torch
 19 | import torch.nn as nn
 20 | import torch.nn.init as init
 21 | import torch.utils.model_zoo as model_zoo
 22 | import torchvision.transforms as transforms
 23 | import onnx
 24 | import onnxruntime
 25 | 
 26 | from utils import check_dir, torchtensor2numpy
 27 | 
 28 | 
 29 | class SuperResolutionNet(nn.Module):
 30 |     ''' Super Resolution model definition in PyTorch. '''
 31 |     def __init__(self, upscale_factor, inplace=False):
 32 |         super(SuperResolutionNet, self).__init__()
 33 |         self.relu = nn.ReLU(inplace=inplace)
 34 |         self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
 35 |         self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
 36 |         self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
 37 | 
 38 |         self.conv4 = nn.Conv2d(32, 1 * (upscale_factor ** 2), (3, 3), (1, 1), (1, 1))
 39 |         self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
 40 | 
 41 |         self._initialize_weights()
 42 | 
 43 |     def forward(self, x):
 44 |         x = self.relu(self.conv1(x))
 45 |         x = self.relu(self.conv2(x))
 46 |         x = self.relu(self.conv3(x))
 47 | 
 48 |         # Increase spatial resolution with Sub-Pixel conv.
 49 |         x = self.pixel_shuffle(self.conv4(x))
 50 | 
 51 |         return x
 52 | 
 53 |     def _initialize_weights(self):
 54 |         init.orthogonal_(self.conv1.weight, init.calculate_gain('relu'))
 55 |         init.orthogonal_(self.conv2.weight, init.calculate_gain('relu'))
 56 |         init.orthogonal_(self.conv3.weight, init.calculate_gain('relu'))
 57 |         init.orthogonal_(self.conv4.weight)
 58 | 
 59 | 
 60 | def PyTorch2ONNX(torch_model, dummy_input_to_model, onnx_save_dir, check_onnx_model=True):
 61 |     ''' Export the model. (PyTorch2ONNX) '''
 62 |     torch.onnx.export(
 63 |         torch_model,                                    # model being run.
 64 |         dummy_input_to_model,                           # model input (or a tuple for multiple inputs).
 65 |         onnx_save_dir,                                  # where to save the model (can be a file or file-like object).
 66 |         export_params=True,                             # store the trained parameter weights inside the model file.
 67 |         opset_version=10,                               # the ONNX version to export the model to.
 68 |         do_constant_folding=True,                       # whether to execute constant folding for optimization.
 69 |         input_names=['input'],                          # the model's input names.
 70 |         output_names=['output'],                        # the model's output names.
 71 |         dynamic_axes={                                  # variable length axes.
 72 |             'input': {0: 'batch_size'},
 73 |             'output': {0: 'batch_size'}})
 74 | 
 75 |     if check_onnx_model:  # Verify the model’s structure and confirm that the model has a valid schema.
 76 |         onnx_model = onnx.load(onnx_save_dir)
 77 |         onnx.checker.check_model(onnx_model)
 78 | 
 79 | 
 80 | def Verify_ONNX_in_ONNX_RUNTIME(onnx_dir, dummy_input_to_model, torch_out):
 81 |     ''' Verify ONNX Runtime and PyTorch are computing the same value for the model. '''
 82 |     # Create an inference session.
 83 |     ort_session = onnxruntime.InferenceSession(onnx_dir)
 84 | 
 85 |     # Compute ONNX Runtime output prediction.
 86 |     ort_inputs = {ort_session.get_inputs()[0].name: torchtensor2numpy(dummy_input_to_model)}
 87 |     ort_outs = ort_session.run(None, ort_inputs)
 88 | 
 89 |     # Compare ONNX Runtime and PyTorch results.
 90 |     np.testing.assert_allclose(torchtensor2numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
 91 | 
 92 |     print("Exported model has been tested with ONNXRuntime, and the result looks good!")
 93 | 
 94 | 
 95 | def Run_ONNX_in_ONNX_RUNTIME(onnx_dir, img_path, img_save_path):
 96 |     ''' Run the model on an image using ONNX Runtime. '''
 97 |     # Take the tensor representing the greyscale resized image.
 98 |     img = Image.open(img_path)
 99 |     resize = transforms.Resize([224, 224])
100 |     img = resize(img)
101 |     img_ycbcr = img.convert('YCbCr')
102 |     img_y, img_cb, img_cr = img_ycbcr.split()
103 |     to_tensor = transforms.ToTensor()
104 |     img_y = to_tensor(img_y)
105 |     img_y.unsqueeze_(0)
106 | 
107 |     # Create an inference session.
108 |     ort_session = onnxruntime.InferenceSession(onnx_dir)
109 | 
110 |     # Run the ONNX model in ONNX Runtime.
111 |     ort_inputs = {ort_session.get_inputs()[0].name: torchtensor2numpy(img_y)}
112 |     ort_outs = ort_session.run(None, ort_inputs)
113 |     img_out_y = ort_outs[0]
114 | 
115 |     # Get the output image.
116 |     img_out_y = Image.fromarray(np.uint8((img_out_y[0] * 255.0).clip(0, 255)[0]), mode='L')
117 |     final_img = Image.merge(
118 |         "YCbCr", [
119 |             img_out_y,
120 |             img_cb.resize(img_out_y.size, Image.BICUBIC),
121 |             img_cr.resize(img_out_y.size, Image.BICUBIC),
122 |         ]).convert("RGB")
123 | 
124 |     # Save the image, compare this with the output image from mobile device.
125 |     final_img.save(img_save_path)
126 | 
127 | 
128 | def main(args):
129 |     # Create the super-resolution model.
130 |     torch_model = SuperResolutionNet(upscale_factor=3)
131 | 
132 |     # Initialize model with the pretrained weights.
133 |     def map_location(storage, loc): return storage
134 |     if torch.cuda.is_available():
135 |         map_location = None
136 |     torch_model.load_state_dict(model_zoo.load_url(
137 |         url='https://s3.amazonaws.com/pytorch/test_data/export/superres_epoch100-44c6958e.pth', map_location=map_location))
138 | 
139 |     # Set the model to inference mode.
140 |     torch_model.eval()
141 | 
142 |     # Input to the model.
143 |     batch_size = 1
144 |     dummy_input_to_model = torch.randn(batch_size, 1, 224, 224, requires_grad=True)
145 |     torch_out = torch_model(dummy_input_to_model)
146 | 
147 |     # Export the model. (PyTorch2ONNX)
148 |     PyTorch2ONNX(torch_model, dummy_input_to_model, args.onnx_save_dir, args.check_onnx_model)
149 | 
150 |     # Verify ONNX Runtime and PyTorch are computing the same value for the model.
151 |     Verify_ONNX_in_ONNX_RUNTIME(args.onnx_save_dir, dummy_input_to_model, torch_out)
152 | 
153 |     # Run the model on an image using ONNX Runtime.
154 |     Run_ONNX_in_ONNX_RUNTIME(args.onnx_save_dir, args.img_path, args.img_save_path)
155 | 
156 | 
157 | if __name__ == "__main__":
158 |     parse = argparse.ArgumentParser(description='Export a model from PyTorch to ONNX and run it using ONNX RUNTIME.')
159 |     parse.add_argument('--img_path', type=str, default='{}/data/cat.jpg'.format(os.path.dirname(os.path.abspath(__file__))))
160 |     parse.add_argument('--check_onnx_model', type=bool, default=True)
161 |     parse.add_argument('--output_dir', type=str, default='{}/output'.format(os.path.dirname(os.path.abspath(__file__))))
162 |     args = parse.parse_args()
163 | 
164 |     check_dir(args.output_dir)
165 |     args.onnx_save_dir = '{}/super_resolution.onnx'.format(args.output_dir)
166 |     args.img_save_path = '{}/cat_superres_with_ort.jpg'.format(args.output_dir)
167 | 
168 |     main(args)
169 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 |  * @Author: Yulv
  3 |  * @Email: yulvchi@qq.com
  4 |  * @Date: 2022-01-24 10:48:28
  5 |  * @LastEditTime: 2024-05-04 01:28:53
  6 |  * @FilePath: \Model-Inference-Deployment\README.md
  7 |  * @Description: A curated list of awesome inference deployment framework of artificial intelligence models.
  8 |  * Repository: https://github.com/Yulv-git/Model-Inference-Deployment
  9 | -->
 10 | 
 11 | <h1><center> Model Inference Deployment </center></h1>
 12 | 
 13 | A curated list of awesome inference deployment framework of artificial intelligence (AI) models.
 14 | 
 15 | | Framework | Main Developer | API | Framework / ONNX | Quantization | Processors / Accelerator | Hardware | OS | Application | Other Features |
 16 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
 17 | | [OpenVINO](https://docs.openvino.ai/latest/index.html) | Intel | C, C++, Python | TensorFlow, Caffe, MXNet, Keras, PyTorch, PaddlePaddle, ONNX | INT8, FP16 | CPU, iGPU, GPU, VPU, GNA, FPGA (deprecated after 2020.4) | [Intel series devices, Amazon Alexa Premium Far-Field Developer Kit, etc](https://docs.openvino.ai/2022.3/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html). | Linux, Windows, macOS, Raspbian  |  |  |
 18 | | [TensorRT](https://developer.nvidia.com/zh-cn/tensorrt) | NVIDIA | C++, Python | TensorFlow, Caffe, CNTK, Chainer, PyTorch, MXNet, PaddlePaddle, MATLAB, ONNX | INT8, FP16 | GPU | NIVDIA GPU, NIVDIA Jetson, Tesla GPU, etc. | Linux, Windows |  |  |
 19 | | [MediaPipe](https://developers.google.com/mediapipe) | Google | C++, JavaScript, Python | TensorFlow |  | GPU, TPU | Google Coral, etc. | Linux, Android, iOS, Raspbian, macOS, Windows (experimental) | Youtube, Google Lens, ARCore, Google Home, etc. |  |
 20 | | [TensorFlow Lite](https://www.tensorflow.org/lite) | Google | C++, Java, Python, Swift, Objective-C (coming soon) | TensorFlow | INT8, FP16 | CPU, GPU, TPU, NPU, DSP | Google Coral, Microcontrollers, etc. | Linux, iOS, Android, Raspberry Pi | Google Search, Gmail, Google Translate, WPS Office, VSCO, etc. |  |
 21 | | [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) | Google | gRPC, RESTful | TensorFlow |  | GPU, TPU |  |  |  |  |
 22 | | [ONNX Runtime](https://onnxruntime.ai) | Microsoft | C, C++, C#, Java, JavaScript, Python, WinRT, Objective-C, Ruby, Julia | TensorFlow, PyTorch, Keras, SciKit Learn, LightGBM, XGBoost, ONNX | INT8, UINT8 | CPU, GPU, NPU (preview) |  | Linux, Windows, macOS, iOS, Android, WebAssembly | Office 365, Bing, Visual Studio, etc. |  |
 23 | | [LibTorch](https://pytorch.org/cppdocs/installing.html) | FaceBook/Meta | C++ | PyTorch |  | CPU, GPU |  | Linux, Windows, macOS |  |  |
 24 | | [NCNN](https://github.com/Tencent/ncnn) | Tencent |  | TensorFlow, Caffe, MXNet, Keras, PyTorch, ONNX | INT8, FP16 | CPU, GPU |  | Linux, Windows, Android, macOS, iOS, WebAssembly, RISC-V GCC/Newlib | QQ, QZone (QQ 空间), WeChat (微信), Pitu (天天 P 图), etc. |  |
 25 | | [TNN](https://github.com/Tencent/TNN) | Tencent |  | TensorFlow, Caffe, MXNet, PyTorch, ONNX | INT8, FP16 | CPU, GPU, NPU |  | Linux, Android, iOS, Windows | mobile QQ, weishi, Pitu (天天 P 图), etc. |  |
 26 | | [MNN](http://www.mnn.zone) | Alibaba | Python | TensorFlow, Caffe, ONNX | INT8, BF16, FP16 | CPU, GPU, NPU | embedded devices with POSIX interface, etc. | iOS, Android, Linux, Windows | Taobao, Tmall, Youku, Dingtalk, Xianyu, etc. | |
 27 | | [TVM](https://tvm.apache.org) | University of Washington | Python, Java, C++, TypeScript | TensorFlow, Keras, MXNet, PyTorch, CoreML, DarkNet, ONNX |  | CPU, GPU, NPU, DSP, FPGA | Microcontrollers, Browsers, etc. |  |  |  |
 28 | | [MACE](https://mace.readthedocs.io/en/latest/introduction.html) | Xiaomi |  | TensorFlow, Caffe, ONNX |  | CPU, GPU, DSP |  | Android, iOS, Linux, Windows |  |  |
 29 | | [Paddle Lite](https://www.paddlepaddle.org.cn/lite) | Baidu | C++, Java, Python |  PaddlePaddle | INT8, INT16 | CPU, GPU, NPU, FPGA, XPU, APU, NNA, TPU | [ARM Cortex-A family of processors, ARM Mali, Qualcomm Adreno, Apple A Series GPU](https://www.paddlepaddle.org.cn/lite/v2.12/quick_start/support_hardware.html), etc. | Android, iOS, Linux, Windows, macOS |  |  |
 30 | | [MegEngine Lite](https://megengine.org.cn/doc/stable/zh/user-guide/deployment/lite/index.html) | Megvii | Python, C, C++ | MegEngine | INT8 | CPU, GPU, FPGA, NPU |  | Linux, Windows, macOS, Android |  |  |
 31 | | [OpenPPL](https://openppl.ai/home) | SenseTime | C++, Python, Lua | ONNX | FP16 | CPU, GPU |  | Linux, RISC-V |  |  |
 32 | | [Bolt](https://huawei-noah.github.io/bolt) | Huawei | C, Java | TensorFlow, Caffe, ONNX | 1-BIT, INT8, FP16, FP32 | CPU, GPU |  | Linux, Windows, macOS, Andriod, iOS | 2012 Laboratory, CBG, HUAWEI Product Lines |  |
 33 | | [ExecuTorch](https://pytorch.org/executorch/stable/index.html) | FaceBook/Meta | C++, Python | PyTorch, CoreML |  | CPU, NPU, DSP |  | iOS, Android |  |
 34 | 
 35 | ---
 36 | 
 37 | <font size=4><b><center> Table of Contents </center></b></font>
 38 | 
 39 | - [1. ONNX](#1-onnx)
 40 | - [2. Framework](#2-framework)
 41 |   - [2.1. OpenVINO](#21-openvino)
 42 |   - [2.2. TensorRT](#22-tensorrt)
 43 |   - [2.3. MediaPipe](#23-mediapipe)
 44 |   - [2.4. TensorFlow Lite](#24-tensorflow-lite)
 45 |   - [2.5. TensorFlow Serving](#25-tensorflow-serving)
 46 |   - [2.6. ONNX Runtime](#26-onnx-runtime)
 47 |   - [2.7. LibTorch](#27-libtorch)
 48 |   - [2.8. NCNN](#28-ncnn)
 49 |   - [2.9. TNN](#29-tnn)
 50 |   - [2.10. MNN](#210-mnn)
 51 |   - [2.11. TVM](#211-tvm)
 52 |   - [2.12. MACE](#212-mace)
 53 |   - [2.13. Paddle Lite](#213-paddle-lite)
 54 |   - [2.14. MegEngine Lite](#214-megengine-lite)
 55 |   - [2.15. OpenPPL](#215-openppl)
 56 |   - [2.16. Bolt](#216-bolt)
 57 |   - [2.17. ExecuTorch](#217-executorch)
 58 | - [3. Practice](#3-practice)
 59 |   - [3.1. ONNX](#31-onnx)
 60 |     - [3.1.1. Export a model from PyTorch to ONNX and run it using ONNX RUNTIME](#311-export-a-model-from-pytorch-to-onnx-and-run-it-using-onnx-runtime)
 61 | 
 62 | ---
 63 | 
 64 | # 1. ONNX
 65 | 
 66 | [Official Website](https://onnx.ai) | [GitHub](https://github.com/onnx)
 67 | 
 68 | ONNX (Open Neural Network Exchange) is an open format built to represent machine learning models. ONNX defines a common set of operators - the building blocks of machine learning and deep learning models - and a common file format to enable AI developers to use models with a variety of frameworks, tools, runtimes, and compilers.
 69 | 
 70 | ONNX developed by Microsoft, Amazon, FaceBook/Meta, IBM, etc. [ONNX supported tools](https://onnx.ai/supported-tools.html): Caffe, CoreML, Keras, libSVM, MATLAB, MindSpore, MXNet, PaddlePaddle, PyTorch, SciKit Learn, TensorFlow, XGBoost, OpenVINO, TensorRT, ONNX MLIR, ONNX RUNTIME, MACE, NCNN, TVM, etc.
 71 | 
 72 | Eg:
 73 | 
 74 | - PyTorch → ONNX → ONNX RUNTIME
 75 | - PyTorch → ONNX → TensorRT
 76 | - PyTorch → ONNX → TVM
 77 | - TensorFlow → ONNX → NCNN
 78 | - PyTorch → ONNX → TensorFlow
 79 | 
 80 | ---
 81 | 
 82 | # 2. Framework
 83 | 
 84 | ## 2.1. OpenVINO
 85 | 
 86 | [Official Website](https://docs.openvino.ai/latest/index.html) | [GitHub](https://github.com/openvinotoolkit/openvino)
 87 | 
 88 | OpenVINO (Open Visual Inference & Neural Network Optimization) is an open-source toolkit for optimizing and deploying AI inference. It reduce resource demands and efficiently deploy on a range of Intel platforms from edge to cloud.
 89 | 
 90 | [Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo): Pre-trained Deep Learning models and demos (high quality and extremely fast).
 91 | 
 92 | ## 2.2. TensorRT
 93 | 
 94 | [Official Website](https://developer.nvidia.com/zh-cn/tensorrt) | [GitHub](https://github.com/NVIDIA/TensorRT)
 95 | 
 96 | NVIDIA TensorRT is an SDK for high-performance deep learning inference. This SDK contains a deep learning inference optimizer and runtime environment that provides low latency and high throughput for deep learning inference applications.
 97 | 
 98 | ## 2.3. MediaPipe
 99 | 
100 | [Official Website](https://developers.google.com/mediapipe) | [GitHub](https://github.com/google/mediapipe)
101 | 
102 | MediaPipe offers cross-platform, customizable ML solutions for live and streaming media.
103 | 
104 | ## 2.4. TensorFlow Lite
105 | 
106 | [Official Website](https://www.tensorflow.org/lite) | [GitHub](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite)
107 | 
108 | TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration.
109 | 
110 | [TensorFlow Lite for Microcontrollers](https://github.com/tensorflow/tflite-micro): A port of TensorFlow Lite designed to run machine learning models on DSPs, microcontrollers and other devices with limited memory.
111 | 
112 | [Awesome TensorFlow Lite](https://github.com/margaretmz/awesome-tensorflow-lite): An awesome list of TensorFlow Lite models with sample apps, helpful tools and learning resources.
113 | 
114 | ## 2.5. TensorFlow Serving
115 | 
116 | [Official Website](https://www.tensorflow.org/tfx/guide/serving) | [GitHub](https://github.com/tensorflow/serving)
117 | 
118 | TensorFlow Serving is a flexible, high-performance serving system for machine learning models, designed for production environments. TensorFlow Serving makes it easy to deploy new algorithms and experiments, while keeping the same server architecture and APIs. TensorFlow Serving provides out-of-the-box integration with TensorFlow models, but can be easily extended to serve other types of models and data.
119 | 
120 | ## 2.6. ONNX Runtime
121 | 
122 | [Official Website](https://onnxruntime.ai) | [GitHub](https://github.com/microsoft/onnxruntime)
123 | 
124 | ONNX Runtime is an open source project that is designed to accelerate machine learning across a wide range of frameworks, operating systems, and hardware platforms. It enables acceleration of machine learning inferencing across all of your deployment targets using a single set of API. ONNX Runtime automatically parses through your model to identify optimization opportunities and provides access to the best hardware acceleration available.
125 | 
126 | ONNX Runtime also offers training acceleration, which incorporates innovations from Microsoft Research and is proven across production workloads like Office 365, Bing and Visual Studio.
127 | 
128 | ## 2.7. LibTorch
129 | 
130 | [Official Website](https://pytorch.org/cppdocs/installing.html) | [LibTorch Tutorials](https://github.com/AllentDan/LibtorchTutorials)
131 | 
132 | LibTorch: C++ distributions of PyTorch.
133 | 
134 | ## 2.8. NCNN
135 | 
136 | [GitHub](https://github.com/Tencent/ncnn)
137 | 
138 | NCNN is a high-performance neural network inference computing framework optimized for mobile platforms. NCNN is deeply considerate about deployment and uses on mobile phones from the beginning of design. NCNN does not have third party dependencies. It is cross-platform, and runs faster than all known open source frameworks on mobile phone cpu. Developers can easily deploy deep learning algorithm models to the mobile platform by using efficient NCNN implementation, create intelligent APPs, and bring the artificial intelligence to your fingertips. NCNN is currently being used in many Tencent applications, such as QQ, QZone (QQ 空间), WeChat (微信), Pitu (天天 P 图) and so on.
139 | 
140 | ## 2.9. TNN
141 | 
142 | [GitHub](https://github.com/Tencent/TNN)
143 | 
144 | TNN: A high-performance, lightweight neural network inference framework open sourced by Tencent Youtu Lab. It also has many outstanding advantages such as cross-platform, high performance, model compression, and code tailoring. The TNN framework further strengthens the support and performance optimization of mobile devices on the basis of the original Rapidnet and NCNN frameworks. At the same time, it refers to the high performance and good scalability characteristics of the industry's mainstream open source frameworks, and expands the support for X86 and NV GPUs. On the mobile phone, TNN has been used by many applications such as mobile QQ, weishi (微视), and Pitu (天天 P 图). As a basic acceleration framework for Tencent Cloud AI, TNN has provided acceleration support for the implementation of many businesses. Everyone is welcome to participate in the collaborative construction to promote the further improvement of the TNN inference framework.
145 | 
146 | ## 2.10. MNN
147 | 
148 | [Official Website](http://www.mnn.zone) | [GitHub](https://github.com/alibaba/MNN)
149 | 
150 | MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models, and has industry leading performance for inference and training on-device. At present, MNN has been integrated in more than 30 apps of Alibaba Inc, such as Taobao (淘宝), Tmall (天猫), Youku (优酷), Dingtalk (钉钉), Xianyu (咸鱼) and etc., covering more than 70 usage scenarios such as live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control. In addition, MNN is also used on embedded devices, such as IoT.
151 | 
152 | ## 2.11. TVM
153 | 
154 | [Official Website](https://tvm.apache.org) | [GitHub](https://github.com/apache/tvm)
155 | 
156 | Apache TVM is an open source machine learning compiler framework for CPUs, GPUs, and machine learning accelerators. It aims to enable machine learning engineers to optimize and run computations efficiently on any hardware backend.
157 | 
158 | ## 2.12. MACE
159 | 
160 | [Official Website](https://mace.readthedocs.io/en/latest/introduction.html) | [GitHub](https://github.com/XiaoMi/mace)
161 | 
162 | MACE (Mobile AI Compute Engine) is a deep learning inference framework optimized for mobile heterogeneous computing platforms computing on Android, iOS, Linux and Windows devices. MACE provides tools and documents to help users to deploy deep learning models to mobile phones, tablets, personal computers and IoT devices.
163 | 
164 | ## 2.13. Paddle Lite
165 | 
166 | [Official Website](https://www.paddlepaddle.org.cn/lite) | [GitHub](https://github.com/PaddlePaddle/Paddle-Lite)
167 | 
168 | Paddle Lite is an updated version of Paddle-Mobile, an open-open source deep learning framework designed to make it easy to perform inference on mobile, embeded, and IoT devices. It is compatible with PaddlePaddle and pre-trained models from other sources.
169 | 
170 | ## 2.14. MegEngine Lite
171 | 
172 | [Official Website](https://megengine.org.cn/doc/stable/zh/user-guide/deployment/lite/index.html) | [GitHub](https://github.com/MegEngine/MegEngine/tree/master/lite)
173 | 
174 | MegEngine Lite is a layer of interface encapsulation for MegEngine. The main purpose of MegEngine Lite is to provide users with a more concise, easy-to-use and efficient inference interface, and to make full use of the multi-platform inference capabilities of MegEngine.
175 | 
176 | ## 2.15. OpenPPL
177 | 
178 | [Official Website](https://openppl.ai/home) | [GitHub](https://github.com/openppl-public/ppl.nn)
179 | 
180 | OpenPPL is an open-source deep-learning inference platform based on self-developed high-performance kernel libraries. It enables AI applications to run efficiently on mainstream CPU and GPU platforms, delivering reliable inference services in cloud scenarios.
181 | 
182 | ## 2.16. Bolt
183 | 
184 | [Official Website](https://huawei-noah.github.io/bolt) | [GitHub](https://github.com/huawei-noah/bolt)
185 | 
186 | Bolt is a light-weight library for deep learning. Bolt, as a universal deployment tool for all kinds of neural networks, aims to minimize the inference runtime as much as possible. Bolt has been widely deployed and used in many departments of HUAWEI company, such as 2012 Laboratory, CBG and HUAWEI Product Lines.
187 | 
188 | ## 2.17. ExecuTorch
189 | 
190 | [Official Website](https://pytorch.org/executorch/stable/index.html) | [GitHub](https://github.com/pytorch/executorch)
191 | 
192 | ExecuTorch is an end-to-end solution for enabling on-device inference capabilities across mobile and edge devices including wearables, embedded devices and microcontrollers. It is part of the PyTorch Edge ecosystem and enables efficient deployment of PyTorch models to edge devices.
193 | 
194 | ---
195 | 
196 | # 3. Practice
197 | 
198 | ## 3.1. ONNX
199 | 
200 | ONNX is widely supported and can be found in many frameworks, tools, and hardware. Enabling interoperability between different frameworks and streamlining the path from research to production helps increase the speed of innovation in the AI community.
201 | 
202 | ### 3.1.1. Export a model from PyTorch to ONNX and run it using ONNX RUNTIME
203 | 
204 | The main functions are as follows:
205 | 
206 | ``` python
207 | def PyTorch2ONNX(torch_model, dummy_input_to_model, onnx_save_dir, check_onnx_model=True):
208 |     ''' Export the model. (PyTorch2ONNX) '''
209 |     torch.onnx.export(
210 |         torch_model,                                    # model being run.
211 |         dummy_input_to_model,                           # model input (or a tuple for multiple inputs).
212 |         onnx_save_dir,                                  # where to save the model (can be a file or file-like object).
213 |         export_params=True,                             # store the trained parameter weights inside the model file.
214 |         opset_version=10,                               # the ONNX version to export the model to.
215 |         do_constant_folding=True,                       # whether to execute constant folding for optimization.
216 |         input_names=['input'],                          # the model's input names.
217 |         output_names=['output'],                        # the model's output names.
218 |         dynamic_axes={                                  # variable length axes.
219 |             'input': {0: 'batch_size'},
220 |             'output': {0: 'batch_size'}})
221 | 
222 |     if check_onnx_model:  # Verify the model’s structure and confirm that the model has a valid schema.
223 |         onnx_model = onnx.load(onnx_save_dir)
224 |         onnx.checker.check_model(onnx_model)
225 | ```
226 | 
227 | ``` python
228 | def Run_ONNX_in_ONNX_RUNTIME(onnx_dir, img_path, img_save_path):
229 |     ''' Run the model on an image using ONNX Runtime. '''
230 |     # Take the tensor representing the greyscale resized image.
231 |     img = Image.open(img_path)
232 |     resize = transforms.Resize([224, 224])
233 |     img = resize(img)
234 |     img_ycbcr = img.convert('YCbCr')
235 |     img_y, img_cb, img_cr = img_ycbcr.split()
236 |     to_tensor = transforms.ToTensor()
237 |     img_y = to_tensor(img_y)
238 |     img_y.unsqueeze_(0)
239 | 
240 |     # Create an inference session.
241 |     ort_session = onnxruntime.InferenceSession(onnx_dir)
242 | 
243 |     # Run the ONNX model in ONNX Runtime.
244 |     ort_inputs = {ort_session.get_inputs()[0].name: torchtensor2numpy(img_y)}
245 |     ort_outs = ort_session.run(None, ort_inputs)
246 |     img_out_y = ort_outs[0]
247 | 
248 |     # Get the output image.
249 |     img_out_y = Image.fromarray(np.uint8((img_out_y[0] * 255.0).clip(0, 255)[0]), mode='L')
250 |     final_img = Image.merge(
251 |         "YCbCr", [
252 |             img_out_y,
253 |             img_cb.resize(img_out_y.size, Image.BICUBIC),
254 |             img_cr.resize(img_out_y.size, Image.BICUBIC),
255 |         ]).convert("RGB")
256 | 
257 |     # Save the image, compare this with the output image from mobile device.
258 |     final_img.save(img_save_path)
259 | ```
260 | 
261 | And see [PyTorch2ONNX_Run_in_ONNX_RUNTIME.py](./src/PyTorch2ONNX/PyTorch2ONNX_Run_in_ONNX_RUNTIME.py) for the full Python script.
262 | 
263 | ---
264 | 
265 | <font size=4><b><big> Contributing </b></big></font>
266 | 
267 | If you have any suggestions or improvements, please feel free to create issues or pull requests.
268 | 


--------------------------------------------------------------------------------