├── .gitattributes ├── .xet └── config.toml ├── LICENSE ├── ONNX_HUB_MANIFEST.json ├── README.md ├── contribute.md ├── resource ├── docs │ └── INC_code.md └── images │ ├── INC_GUI.gif │ ├── ONNX Model Zoo Graphics.png │ ├── ONNX_Model_Zoo_Graphics.png │ ├── ONNX_logo_main.png │ ├── bottom.png │ ├── mid.png │ └── top.png ├── target.md ├── text └── machine_comprehension │ ├── bert-squad │ ├── BERT-Squad.ipynb │ ├── README.md │ ├── dependencies │ │ ├── run_onnx_squad.py │ │ └── tokenization.py │ └── model │ │ ├── bertsquad-10.onnx │ │ ├── bertsquad-10.tar.gz │ │ ├── bertsquad-12-int8.onnx │ │ ├── bertsquad-12-int8.tar.gz │ │ ├── bertsquad-12.onnx │ │ ├── bertsquad-12.tar.gz │ │ ├── bertsquad-8.onnx │ │ └── bertsquad-8.tar.gz │ ├── bidirectional_attention_flow │ ├── README.md │ └── model │ │ ├── bidaf-9.onnx │ │ └── bidaf-9.tar.gz │ ├── gpt-2 │ ├── README.md │ ├── dependencies │ │ └── GPT2-export.py │ └── model │ │ ├── gpt2-10.onnx │ │ ├── gpt2-10.tar.gz │ │ ├── gpt2-lm-head-10.onnx │ │ └── gpt2-lm-head-10.tar.gz │ ├── gpt2-bs │ ├── README.md │ └── model │ │ └── gpt2-lm-head-bs-12.onnx │ ├── roberta │ ├── README.md │ ├── dependencies │ │ ├── roberta-sequence-classification-inference.ipynb │ │ └── roberta-sequence-classification-validation.ipynb │ └── model │ │ ├── roberta-base-11.onnx │ │ ├── roberta-base-11.tar.gz │ │ ├── roberta-sequence-classification-9.onnx │ │ └── roberta-sequence-classification-9.tar.gz │ └── t5 │ ├── README.md │ ├── dependencies │ ├── T5-export.py │ └── models.py │ └── model │ ├── t5-decoder-with-lm-head-12.onnx │ ├── t5-decoder-with-lm-head-12.tar.gz │ ├── t5-encoder-12.onnx │ └── t5-encoder-12.tar.gz ├── vision ├── body_analysis │ ├── age_gender │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── baby.jpg │ │ │ ├── bella.jpg │ │ │ ├── bruce.jpg │ │ │ └── kid.jpg │ │ ├── levi_googlenet.py │ │ ├── models │ │ │ ├── age_googlenet.onnx │ │ │ ├── gender_googlenet.onnx │ │ │ ├── vgg_ilsvrc_16_age_chalearn_iccv2015.onnx │ │ │ ├── vgg_ilsvrc_16_age_imdb_wiki.onnx │ │ │ └── vgg_ilsvrc_16_gender_imdb_wiki.onnx │ │ └── rothe_vgg.py │ ├── arcface │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── arcface_inference.ipynb │ │ │ ├── arcface_validation.ipynb │ │ │ ├── face_image.py │ │ │ ├── face_postprocess.py │ │ │ ├── face_preprocess.py │ │ │ ├── fresnet.py │ │ │ ├── helper.py │ │ │ ├── image_iter.py │ │ │ ├── mtcnn_detector.py │ │ │ ├── symbol_utils.py │ │ │ ├── train_arcface.ipynb │ │ │ └── verification.py │ │ └── model │ │ │ ├── arcfaceresnet100-8.onnx │ │ │ └── arcfaceresnet100-8.tar.gz │ ├── emotion_ferplus │ │ ├── README.md │ │ └── model │ │ │ ├── emotion-ferplus-2.onnx │ │ │ ├── emotion-ferplus-2.tar.gz │ │ │ ├── emotion-ferplus-7.onnx │ │ │ ├── emotion-ferplus-7.tar.gz │ │ │ ├── emotion-ferplus-8.onnx │ │ │ └── emotion-ferplus-8.tar.gz │ └── ultraface │ │ ├── README.md │ │ ├── demo.py │ │ ├── dependencies │ │ ├── 1.jpg │ │ ├── 2.jpg │ │ ├── 3.jpg │ │ └── box_utils.py │ │ └── models │ │ ├── version-RFB-320.onnx │ │ ├── version-RFB-320.tar.gz │ │ ├── version-RFB-640.onnx │ │ └── version-RFB-640.tar.gz ├── classification │ ├── alexnet │ │ ├── LICENSE │ │ ├── README.md │ │ └── model │ │ │ ├── bvlcalexnet-12-int8.onnx │ │ │ ├── bvlcalexnet-12-int8.tar.gz │ │ │ ├── bvlcalexnet-12-qdq.onnx │ │ │ ├── bvlcalexnet-12-qdq.tar.gz │ │ │ ├── bvlcalexnet-12.onnx │ │ │ ├── bvlcalexnet-12.tar.gz │ │ │ ├── bvlcalexnet-3.onnx │ │ │ ├── bvlcalexnet-3.tar.gz │ │ │ ├── bvlcalexnet-6.onnx │ │ │ ├── bvlcalexnet-6.tar.gz │ │ │ ├── bvlcalexnet-7.onnx │ │ │ ├── bvlcalexnet-7.tar.gz │ │ │ ├── bvlcalexnet-8.onnx │ │ │ ├── bvlcalexnet-8.tar.gz │ │ │ ├── bvlcalexnet-9.onnx │ │ │ └── bvlcalexnet-9.tar.gz │ ├── caffenet │ │ ├── LICENSE │ │ ├── README.md │ │ └── model │ │ │ ├── caffenet-12-int8.onnx │ │ │ ├── caffenet-12-int8.tar.gz │ │ │ ├── caffenet-12-qdq.onnx │ │ │ ├── caffenet-12-qdq.tar.gz │ │ │ ├── caffenet-12.onnx │ │ │ ├── caffenet-12.tar.gz │ │ │ ├── caffenet-3.onnx │ │ │ ├── caffenet-3.tar.gz │ │ │ ├── caffenet-6.onnx │ │ │ ├── caffenet-6.tar.gz │ │ │ ├── caffenet-7.onnx │ │ │ ├── caffenet-7.tar.gz │ │ │ ├── caffenet-8.onnx │ │ │ ├── caffenet-8.tar.gz │ │ │ ├── caffenet-9.onnx │ │ │ └── caffenet-9.tar.gz │ ├── densenet-121 │ │ ├── README.md │ │ └── model │ │ │ ├── densenet-12-int8.onnx │ │ │ ├── densenet-12-int8.tar.gz │ │ │ ├── densenet-12.onnx │ │ │ ├── densenet-12.tar.gz │ │ │ ├── densenet-3.onnx │ │ │ ├── densenet-3.tar.gz │ │ │ ├── densenet-6.onnx │ │ │ ├── densenet-6.tar.gz │ │ │ ├── densenet-7.onnx │ │ │ ├── densenet-7.tar.gz │ │ │ ├── densenet-8.onnx │ │ │ ├── densenet-8.tar.gz │ │ │ ├── densenet-9.onnx │ │ │ └── densenet-9.tar.gz │ ├── efficientnet-lite4 │ │ ├── README.md │ │ ├── dependencies │ │ │ └── labels_map.txt │ │ └── model │ │ │ ├── efficientnet-lite4-11-int8.onnx │ │ │ ├── efficientnet-lite4-11-int8.tar.gz │ │ │ ├── efficientnet-lite4-11-qdq.onnx │ │ │ ├── efficientnet-lite4-11-qdq.tar.gz │ │ │ ├── efficientnet-lite4-11.onnx │ │ │ └── efficientnet-lite4-11.tar.gz │ ├── extract_imagenet.py │ ├── imagenet_inference.ipynb │ ├── imagenet_postprocess.py │ ├── imagenet_prep.md │ ├── imagenet_preprocess.py │ ├── imagenet_val_maps.pklz │ ├── imagenet_validation.ipynb │ ├── inception_and_googlenet │ │ ├── googlenet │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ └── model │ │ │ │ ├── googlenet-12-int8.onnx │ │ │ │ ├── googlenet-12-int8.tar.gz │ │ │ │ ├── googlenet-12-qdq.onnx │ │ │ │ ├── googlenet-12-qdq.tar.gz │ │ │ │ ├── googlenet-12.onnx │ │ │ │ ├── googlenet-12.tar.gz │ │ │ │ ├── googlenet-3.onnx │ │ │ │ ├── googlenet-3.tar.gz │ │ │ │ ├── googlenet-6.onnx │ │ │ │ ├── googlenet-6.tar.gz │ │ │ │ ├── googlenet-7.onnx │ │ │ │ ├── googlenet-7.tar.gz │ │ │ │ ├── googlenet-8.onnx │ │ │ │ ├── googlenet-8.tar.gz │ │ │ │ ├── googlenet-9.onnx │ │ │ │ └── googlenet-9.tar.gz │ │ ├── inception_v1 │ │ │ ├── README.md │ │ │ └── model │ │ │ │ ├── inception-v1-12-int8.onnx │ │ │ │ ├── inception-v1-12-int8.tar.gz │ │ │ │ ├── inception-v1-12-qdq.onnx │ │ │ │ ├── inception-v1-12-qdq.tar.gz │ │ │ │ ├── inception-v1-12.onnx │ │ │ │ ├── inception-v1-12.tar.gz │ │ │ │ ├── inception-v1-3.onnx │ │ │ │ ├── inception-v1-3.tar.gz │ │ │ │ ├── inception-v1-6.onnx │ │ │ │ ├── inception-v1-6.tar.gz │ │ │ │ ├── inception-v1-7.onnx │ │ │ │ ├── inception-v1-7.tar.gz │ │ │ │ ├── inception-v1-8.onnx │ │ │ │ ├── inception-v1-8.tar.gz │ │ │ │ ├── inception-v1-9.onnx │ │ │ │ └── inception-v1-9.tar.gz │ │ └── inception_v2 │ │ │ ├── README.md │ │ │ └── model │ │ │ ├── inception-v2-3.onnx │ │ │ ├── inception-v2-3.tar.gz │ │ │ ├── inception-v2-6.onnx │ │ │ ├── inception-v2-6.tar.gz │ │ │ ├── inception-v2-7.onnx │ │ │ ├── inception-v2-7.tar.gz │ │ │ ├── inception-v2-8.onnx │ │ │ ├── inception-v2-8.tar.gz │ │ │ ├── inception-v2-9.onnx │ │ │ └── inception-v2-9.tar.gz │ ├── mnist │ │ ├── README.md │ │ └── model │ │ │ ├── mnist-1.onnx │ │ │ ├── mnist-1.tar.gz │ │ │ ├── mnist-12-int8.onnx │ │ │ ├── mnist-12-int8.tar.gz │ │ │ ├── mnist-12.onnx │ │ │ ├── mnist-12.tar.gz │ │ │ ├── mnist-7.onnx │ │ │ ├── mnist-7.tar.gz │ │ │ ├── mnist-8.onnx │ │ │ └── mnist-8.tar.gz │ ├── mobilenet │ │ ├── README.md │ │ ├── model │ │ │ ├── mobilenetv2-10.onnx │ │ │ ├── mobilenetv2-10.tar.gz │ │ │ ├── mobilenetv2-12-int8.onnx │ │ │ ├── mobilenetv2-12-int8.tar.gz │ │ │ ├── mobilenetv2-12-qdq.onnx │ │ │ ├── mobilenetv2-12-qdq.tar.gz │ │ │ ├── mobilenetv2-12.onnx │ │ │ ├── mobilenetv2-12.tar.gz │ │ │ ├── mobilenetv2-7.onnx │ │ │ └── mobilenetv2-7.tar.gz │ │ └── train_mobilenet.ipynb │ ├── onnxrt_inference.ipynb │ ├── rcnn_ilsvrc13 │ │ ├── LICENSE │ │ ├── README.md │ │ └── model │ │ │ ├── rcnn-ilsvrc13-3.onnx │ │ │ ├── rcnn-ilsvrc13-3.tar.gz │ │ │ ├── rcnn-ilsvrc13-6.onnx │ │ │ ├── rcnn-ilsvrc13-6.tar.gz │ │ │ ├── rcnn-ilsvrc13-7.onnx │ │ │ ├── rcnn-ilsvrc13-7.tar.gz │ │ │ ├── rcnn-ilsvrc13-8.onnx │ │ │ ├── rcnn-ilsvrc13-8.tar.gz │ │ │ ├── rcnn-ilsvrc13-9.onnx │ │ │ └── rcnn-ilsvrc13-9.tar.gz │ ├── resnet │ │ ├── README.md │ │ ├── model │ │ │ ├── resnet101-v1-7.onnx │ │ │ ├── resnet101-v1-7.tar.gz │ │ │ ├── resnet101-v2-7.onnx │ │ │ ├── resnet101-v2-7.tar.gz │ │ │ ├── resnet152-v1-7.onnx │ │ │ ├── resnet152-v1-7.tar.gz │ │ │ ├── resnet152-v2-7.onnx │ │ │ ├── resnet152-v2-7.tar.gz │ │ │ ├── resnet18-v1-7.onnx │ │ │ ├── resnet18-v1-7.tar.gz │ │ │ ├── resnet18-v2-7.onnx │ │ │ ├── resnet18-v2-7.tar.gz │ │ │ ├── resnet34-v1-7.onnx │ │ │ ├── resnet34-v1-7.tar.gz │ │ │ ├── resnet34-v2-7.onnx │ │ │ ├── resnet34-v2-7.tar.gz │ │ │ ├── resnet50-caffe2-v1-3.onnx │ │ │ ├── resnet50-caffe2-v1-3.tar.gz │ │ │ ├── resnet50-caffe2-v1-6.onnx │ │ │ ├── resnet50-caffe2-v1-6.tar.gz │ │ │ ├── resnet50-caffe2-v1-7.onnx │ │ │ ├── resnet50-caffe2-v1-7.tar.gz │ │ │ ├── resnet50-caffe2-v1-8.onnx │ │ │ ├── resnet50-caffe2-v1-8.tar.gz │ │ │ ├── resnet50-caffe2-v1-9.onnx │ │ │ ├── resnet50-caffe2-v1-9.tar.gz │ │ │ ├── resnet50-v1-12-int8.onnx │ │ │ ├── resnet50-v1-12-int8.tar.gz │ │ │ ├── resnet50-v1-12-qdq.onnx │ │ │ ├── resnet50-v1-12-qdq.tar.gz │ │ │ ├── resnet50-v1-12.onnx │ │ │ ├── resnet50-v1-12.tar.gz │ │ │ ├── resnet50-v1-7.onnx │ │ │ ├── resnet50-v1-7.tar.gz │ │ │ ├── resnet50-v2-7.onnx │ │ │ └── resnet50-v2-7.tar.gz │ │ └── train_resnet.ipynb │ ├── shufflenet │ │ ├── README.md │ │ ├── ShufflenetV2-export.py │ │ └── model │ │ │ ├── shufflenet-3.onnx │ │ │ ├── shufflenet-3.tar.gz │ │ │ ├── shufflenet-6.onnx │ │ │ ├── shufflenet-6.tar.gz │ │ │ ├── shufflenet-7.onnx │ │ │ ├── shufflenet-7.tar.gz │ │ │ ├── shufflenet-8.onnx │ │ │ ├── shufflenet-8.tar.gz │ │ │ ├── shufflenet-9.onnx │ │ │ ├── shufflenet-9.tar.gz │ │ │ ├── shufflenet-v2-10.onnx │ │ │ ├── shufflenet-v2-10.tar.gz │ │ │ ├── shufflenet-v2-12-int8.onnx │ │ │ ├── shufflenet-v2-12-int8.tar.gz │ │ │ ├── shufflenet-v2-12-qdq.onnx │ │ │ ├── shufflenet-v2-12-qdq.tar.gz │ │ │ ├── shufflenet-v2-12.onnx │ │ │ └── shufflenet-v2-12.tar.gz │ ├── squeezenet │ │ ├── README.md │ │ ├── model │ │ │ ├── squeezenet1.0-12-int8.onnx │ │ │ ├── squeezenet1.0-12-int8.tar.gz │ │ │ ├── squeezenet1.0-12.onnx │ │ │ ├── squeezenet1.0-12.tar.gz │ │ │ ├── squeezenet1.0-13-qdq.onnx │ │ │ ├── squeezenet1.0-13-qdq.tar.gz │ │ │ ├── squeezenet1.0-3.onnx │ │ │ ├── squeezenet1.0-3.tar.gz │ │ │ ├── squeezenet1.0-6.onnx │ │ │ ├── squeezenet1.0-6.tar.gz │ │ │ ├── squeezenet1.0-7.onnx │ │ │ ├── squeezenet1.0-7.tar.gz │ │ │ ├── squeezenet1.0-8.onnx │ │ │ ├── squeezenet1.0-8.tar.gz │ │ │ ├── squeezenet1.0-9.onnx │ │ │ ├── squeezenet1.0-9.tar.gz │ │ │ ├── squeezenet1.1-7.onnx │ │ │ └── squeezenet1.1-7.tar.gz │ │ └── train_squeezenet.ipynb │ ├── synset.txt │ ├── vgg │ │ ├── README.md │ │ ├── model │ │ │ ├── vgg16-12-int8.onnx │ │ │ ├── vgg16-12-int8.tar.gz │ │ │ ├── vgg16-12.onnx │ │ │ ├── vgg16-12.tar.gz │ │ │ ├── vgg16-7.onnx │ │ │ ├── vgg16-7.tar.gz │ │ │ ├── vgg16-bn-7.onnx │ │ │ ├── vgg16-bn-7.tar.gz │ │ │ ├── vgg19-7.onnx │ │ │ ├── vgg19-7.tar.gz │ │ │ ├── vgg19-bn-7.onnx │ │ │ ├── vgg19-bn-7.tar.gz │ │ │ ├── vgg19-caffe2-3.onnx │ │ │ ├── vgg19-caffe2-3.tar.gz │ │ │ ├── vgg19-caffe2-6.onnx │ │ │ ├── vgg19-caffe2-6.tar.gz │ │ │ ├── vgg19-caffe2-7.onnx │ │ │ ├── vgg19-caffe2-7.tar.gz │ │ │ ├── vgg19-caffe2-8.onnx │ │ │ ├── vgg19-caffe2-8.tar.gz │ │ │ ├── vgg19-caffe2-9.onnx │ │ │ └── vgg19-caffe2-9.tar.gz │ │ └── train_vgg.ipynb │ └── zfnet-512 │ │ ├── README.md │ │ └── model │ │ ├── zfnet512-12-int8.onnx │ │ ├── zfnet512-12-int8.tar.gz │ │ ├── zfnet512-12.onnx │ │ ├── zfnet512-12.tar.gz │ │ ├── zfnet512-3.onnx │ │ ├── zfnet512-3.tar.gz │ │ ├── zfnet512-6.onnx │ │ ├── zfnet512-6.tar.gz │ │ ├── zfnet512-7.onnx │ │ ├── zfnet512-7.tar.gz │ │ ├── zfnet512-8.onnx │ │ ├── zfnet512-8.tar.gz │ │ ├── zfnet512-9.onnx │ │ └── zfnet512-9.tar.gz ├── object_detection_segmentation │ ├── duc │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── cityscapes_labels.py │ │ │ ├── cityscapes_loader.py │ │ │ ├── duc-inference.ipynb │ │ │ ├── duc-postprocess.py │ │ │ ├── duc-preprocess.py │ │ │ ├── duc-validation.ipynb │ │ │ └── utils.py │ │ └── model │ │ │ ├── ResNet101-DUC-12-int8.onnx │ │ │ ├── ResNet101-DUC-12-int8.tar.gz │ │ │ ├── ResNet101-DUC-12.onnx │ │ │ ├── ResNet101-DUC-12.tar.gz │ │ │ ├── ResNet101-DUC-7.onnx │ │ │ └── ResNet101-DUC-7.tar.gz │ ├── faster-rcnn │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── coco_classes.txt │ │ │ └── demo.jpg │ │ └── model │ │ │ ├── FasterRCNN-10.onnx │ │ │ ├── FasterRCNN-10.tar.gz │ │ │ ├── FasterRCNN-12-int8.onnx │ │ │ ├── FasterRCNN-12-int8.tar.gz │ │ │ ├── FasterRCNN-12.onnx │ │ │ └── FasterRCNN-12.tar.gz │ ├── fcn │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── 000000017968.jpg │ │ │ ├── 000000025205.jpg │ │ │ ├── conversion.ipynb │ │ │ ├── inference.ipynb │ │ │ ├── validation_accuracy.ipynb │ │ │ └── voc_classes.txt │ │ └── model │ │ │ ├── fcn-resnet101-11.onnx │ │ │ ├── fcn-resnet101-11.tar.gz │ │ │ ├── fcn-resnet50-11.onnx │ │ │ ├── fcn-resnet50-11.tar.gz │ │ │ ├── fcn-resnet50-12-int8.onnx │ │ │ ├── fcn-resnet50-12-int8.tar.gz │ │ │ ├── fcn-resnet50-12.onnx │ │ │ └── fcn-resnet50-12.tar.gz │ ├── mask-rcnn │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── coco_classes.txt │ │ │ └── demo.jpg │ │ └── model │ │ │ ├── MaskRCNN-10.onnx │ │ │ ├── MaskRCNN-10.tar.gz │ │ │ ├── MaskRCNN-12-int8.onnx │ │ │ ├── MaskRCNN-12-int8.tar.gz │ │ │ ├── MaskRCNN-12.onnx │ │ │ └── MaskRCNN-12.tar.gz │ ├── retinanet │ │ ├── README.md │ │ ├── dependencies │ │ │ ├── demo.jpg │ │ │ └── retinanet-export.py │ │ └── model │ │ │ ├── retinanet-9.onnx │ │ │ └── retinanet-9.tar.gz │ ├── ssd-mobilenetv1 │ │ ├── README.md │ │ └── model │ │ │ ├── ssd_mobilenet_v1_10.onnx │ │ │ ├── ssd_mobilenet_v1_10.tar.gz │ │ │ ├── ssd_mobilenet_v1_12-int8.onnx │ │ │ ├── ssd_mobilenet_v1_12-int8.tar.gz │ │ │ ├── ssd_mobilenet_v1_12.onnx │ │ │ └── ssd_mobilenet_v1_12.tar.gz │ ├── ssd │ │ ├── README.md │ │ └── model │ │ │ ├── ssd-10.onnx │ │ │ ├── ssd-10.tar.gz │ │ │ ├── ssd-12-int8.onnx │ │ │ ├── ssd-12-int8.tar.gz │ │ │ ├── ssd-12.onnx │ │ │ └── ssd-12.tar.gz │ ├── tiny-yolov2 │ │ ├── README.md │ │ └── model │ │ │ ├── tinyyolov2-7.onnx │ │ │ ├── tinyyolov2-7.tar.gz │ │ │ ├── tinyyolov2-8.onnx │ │ │ └── tinyyolov2-8.tar.gz │ ├── tiny-yolov3 │ │ ├── README.md │ │ └── model │ │ │ ├── tiny-yolov3-11.onnx │ │ │ └── tiny-yolov3-11.tar.gz │ ├── yolov2-coco │ │ ├── README.md │ │ └── model │ │ │ ├── yolov2-coco-9.onnx │ │ │ └── yolov2-coco-9.tar.gz │ ├── yolov3 │ │ ├── README.md │ │ └── model │ │ │ ├── yolov3-10.onnx │ │ │ ├── yolov3-10.tar.gz │ │ │ ├── yolov3-12-int8.onnx │ │ │ ├── yolov3-12-int8.tar.gz │ │ │ ├── yolov3-12.onnx │ │ │ └── yolov3-12.tar.gz │ └── yolov4 │ │ ├── README.md │ │ ├── dependencies │ │ ├── Conversion.ipynb │ │ ├── coco.names │ │ ├── inference.ipynb │ │ ├── onnx-model-validation.ipynb │ │ └── yolov4_anchors.txt │ │ └── model │ │ ├── yolov4.onnx │ │ └── yolov4.tar.gz ├── style_transfer │ └── fast_neural_style │ │ ├── README.md │ │ ├── dependencies │ │ ├── conversion.ipynb │ │ ├── style-transfer-ort.ipynb │ │ └── transformer_net.py │ │ └── model │ │ ├── candy-8.onnx │ │ ├── candy-8.tar.gz │ │ ├── candy-9.onnx │ │ ├── candy-9.tar.gz │ │ ├── mosaic-8.onnx │ │ ├── mosaic-8.tar.gz │ │ ├── mosaic-9.onnx │ │ ├── mosaic-9.tar.gz │ │ ├── pointilism-8.onnx │ │ ├── pointilism-8.tar.gz │ │ ├── pointilism-9.onnx │ │ ├── pointilism-9.tar.gz │ │ ├── rain-princess-8.onnx │ │ ├── rain-princess-8.tar.gz │ │ ├── rain-princess-9.onnx │ │ ├── rain-princess-9.tar.gz │ │ ├── udnie-8.onnx │ │ ├── udnie-8.tar.gz │ │ ├── udnie-9.onnx │ │ └── udnie-9.tar.gz └── super_resolution │ └── sub_pixel_cnn_2016 │ ├── README.md │ ├── dependencies │ └── Run_Super_Resolution_Model.ipynb │ └── model │ ├── super-resolution-10.onnx │ └── super-resolution-10.tar.gz └── workflow_scripts ├── check_model.py ├── generate_onnx_hub_manifest.py ├── onnx_test_data_utils.py ├── ort_test_dir_utils.py ├── test_models.py └── test_utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # XET LOCK 2 | * filter=xet diff=xet merge=xet -text 3 | *.gitattributes filter= 4 | *.xet/** filter= 5 | -------------------------------------------------------------------------------- /.xet/config.toml: -------------------------------------------------------------------------------- 1 | [upstream] 2 | origin_type = "github" 3 | user_name = "xetdata" 4 | repo_name = "onnx-models" 5 | -------------------------------------------------------------------------------- /ONNX_HUB_MANIFEST.json: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 292959 3 | hash = '0b33ab89e95b7ae93424465a2b8734ea4d59220a432e96cbdea97acc3539d357' 4 | -------------------------------------------------------------------------------- /contribute.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # How to Contribute to the Model Zoo 4 | 5 | To contribute a new model, create a [pull request](https://github.com/onnx/models/pull/new/). A pre-defined pull request [template](.github/PULL_REQUEST_TEMPLATE.md) is loaded when creating a new pull request, which describes all the artifacts required for making the contribution. 6 | 7 | View an [example submission](vision/classification/resnet/README.md) to get a sense of the final output. 8 | -------------------------------------------------------------------------------- /resource/docs/INC_code.md: -------------------------------------------------------------------------------- 1 | ### Intel® Neural Compressor Code-based Demo 2 | 3 | This is an example showing how to quantize an ONNX model with [Intel® Neural Compressor](https://github.com/intel/neural-compressor) step by step: 4 | 5 | - Config file 6 | 7 | ```yaml 8 | model: 9 | name: alexnet 10 | framework: onnxrt_qlinearops 11 | 12 | quantization: 13 | approach: post_training_static_quant 14 | 15 | evaluation: 16 | accuracy: 17 | metric: 18 | topk: 1 19 | 20 | tuning: 21 | accuracy_criterion: 22 | relative: 0.01 # accuracy target 23 | ``` 24 | 25 | - Launcher code 26 | 27 | ```python 28 | import numpy as np 29 | import re 30 | import os 31 | from PIL import Image 32 | 33 | # extract dataset class from inference code 34 | class dataset: 35 | def __init__(self, data_path, image_list): 36 | self.image_list = [] 37 | self.label_list = [] 38 | with open(image_list, 'r') as f: 39 | for s in f: 40 | image_name, label = re.split(r"\s+", s.strip()) 41 | src = os.path.join(data_path, image_name) 42 | if not os.path.exists(src): 43 | continue 44 | self.image_list.append(src) 45 | self.label_list.append(int(label)) 46 | 47 | def __len__(self): 48 | return len(self.image_list) 49 | 50 | def __getitem__(self, index): 51 | image_path, label = self.image_list[index], self.label_list[index] 52 | with Image.open(image_path) as image: 53 | image = np.array(image.convert('RGB').resize((224, 224))).astype(np.float32) 54 | image[:, :, 0] -= 123.68 55 | image[:, :, 1] -= 116.779 56 | image[:, :, 2] -= 103.939 57 | image[:,:,[0,1,2]] = image[:,:,[2,1,0]] 58 | image = image.transpose((2, 0, 1)) 59 | return image, label 60 | 61 | from neural_compressor.experimental import Quantization, common 62 | ds = dataset('/path/to/imagenet', '/path/to/label') 63 | quantize = Quantization('/path/to/config_file') 64 | quantize.calib_dataloader = common.DataLoader(ds) 65 | quantize.model = model 66 | q_model = quantize() 67 | q_model.save("int8.onnx") 68 | ``` 69 | -------------------------------------------------------------------------------- /resource/images/INC_GUI.gif: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5661078 3 | hash = '5893604eeb6d2ce682ad2b1631dea0c4e698f048d57f360f823dc1897843b6cc' 4 | -------------------------------------------------------------------------------- /resource/images/ONNX Model Zoo Graphics.png: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1344985 3 | hash = 'bc4609a6fe21da59c5dc340e16da6d62a216a76299c4fdad864e09bb4fafde50' 4 | -------------------------------------------------------------------------------- /resource/images/ONNX_Model_Zoo_Graphics.png: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1344985 3 | hash = 'bc4609a6fe21da59c5dc340e16da6d62a216a76299c4fdad864e09bb4fafde50' 4 | -------------------------------------------------------------------------------- /resource/images/ONNX_logo_main.png: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 51834 3 | hash = 'e4e56f3bd75ded43953752ab275a53fba2b33833296c89c2c4b498f1e5b69035' 4 | -------------------------------------------------------------------------------- /resource/images/bottom.png: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 101594 3 | hash = 'c32205621d3ed1d37bfd12927cae829a30031225184dd7abd5b4f624f6318973' 4 | -------------------------------------------------------------------------------- /resource/images/mid.png: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 114156 3 | hash = '982756763a2df48384e7db4ece20be4bed1feac96fd0a17553c33cc93cfe35db' 4 | -------------------------------------------------------------------------------- /resource/images/top.png: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35121 3 | hash = 'c4867a0b115ca9d0765de2db325e67cb93407f393661b4c00f466f87f2bb02bd' 4 | -------------------------------------------------------------------------------- /target.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Target models 4 | Below is a list of models that we would like to have in the model zoo in the near future. Please refer to the [contribution guidelines](contribute.md) to contribute a model. 5 | ## CNN models 6 | 7 | |Model | Reference | 8 | |-------------|:--------------| 9 | | Gender Detection| [Age and Gender Classification using Convolutional Neural Networks](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=7&ved=0ahUKEwjEzsev5pDaAhVI2GMKHZvjCjEQFgiLATAG&url=http%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fdownload%3Fdoi%3D10.1.1.722.9654%26rep%3Drep1%26type%3Dpdf&usg=AOvVaw0-c9n2_ZcsRCyc6BCb0Zdj) | 10 | | Single Shot Detector| [SSD: Single Shot Multi Detector](https://arxiv.org/abs/1512.02325) | 11 | | Super Resolution| [Image Super resolution using deep convolutional networks ](http://ieeexplore.ieee.org/document/7115171/?reload=true) | 12 | | Face Detection|[A Convolutional Neural Network Cascade for Face Detection ](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Li_A_Convolutional_Neural_2015_CVPR_paper.pdf)| 13 | |Face Detection|[ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) | 14 | |Semantic Segmentation|[Fully Convolutional Networks for Semantic Segmentation ](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=5&ved=0ahUKEwiI9YjW5ZDaAhVW5mMKHYifAL4QFghUMAQ&url=https%3A%2F%2Fpeople.eecs.berkeley.edu%2F~jonlong%2Flong_shelhamer_fcn.pdf&usg=AOvVaw1OZIT1dpO9NAS45hB7mhG8)| 15 | |Object Detection & Segmentation| [Faster-RCNN ](https://arxiv.org/abs/1506.01497) | 16 | |Object Detection & Segmentation| [Mask-RCNN ](https://arxiv.org/abs/1703.06870) | 17 | |Object Detection & Segmentation|[YOLO v2 ](https://arxiv.org/abs/1612.08242)/ [v3 ](https://pjreddie.com/media/files/papers/YOLOv3.pdf)| 18 | 19 | 20 | ## GAN models 21 | |Model | Reference | 22 | |-------------|:--------------| 23 | | Text to Image| [Generative Adversarial Text to image Synthesis ](https://arxiv.org/abs/1605.05396)| 24 | |Style Transfer |[Unpaired Image to Image Translation using Cycle consistent Adversarial Network ](https://arxiv.org/abs/1703.10593)| 25 | |Sound Generative models| [WaveNet: A Generative Model for Raw Audio ](https://arxiv.org/abs/1609.03499)| 26 | ## NLP models 27 | |Model | Reference | 28 | |-------------|:--------------| 29 | |Speech Recognition| [Speech recognition with deep recurrent neural networks ](https://www.cs.toronto.edu/~fritz/absps/RNN13.pdf)| 30 | | Text To Speech| [Deep voice: Real time neural text to speech ](https://arxiv.org/abs/1702.07825) | 31 | | Language Model| [Deep Neural Network Language Models ](https://pdfs.semanticscholar.org/a177/45f1d7045636577bcd5d513620df5860e9e5.pdf) | 32 | | Machine Translation| [Neural Machine Translation by jointly learning to align and translate ](https://arxiv.org/abs/1409.0473)| 33 | |Machine Translation| [Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation ](https://arxiv.org/abs/1609.08144) | 34 | 35 | ## Models using Image as well as NLP 36 | |Model | Reference | 37 | |-------------|:--------------| 38 | |Visual Question Answering |[VQA: Visual Question Answering ](https://arxiv.org/pdf/1505.00468v6.pdf) 39 | |Visual Question Answering |[Yin and Yang: Balancing and Answering Binary Visual Questions ](https://arxiv.org/pdf/1511.05099.pdf) 40 | |Visual Question Answering |[Making the V in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering](https://arxiv.org/pdf/1612.00837.pdf) 41 | | Visual Dialog| [Visual Dialog ](https://arxiv.org/abs/1611.08669) 42 | 43 | 44 | ## Other interesting models 45 | |Model | Reference | 46 | |-------------|:--------------| 47 | |Time Series Forecasting| [Modeling Long- and Short-Term Temporal Patterns with Deep Neural Networks ](https://arxiv.org/pdf/1703.07015.pdf) 48 | |Recommender systems|[DropoutNet: Addressing Cold Start in Recommender Systems](http://www.cs.toronto.edu/~mvolkovs/nips2017_deepcf.pdf) 49 | |Collaborative filtering|| 50 | |Autoencoders|| 51 | 52 | Want other models in this Model Zoo? Go ahead and raise a GitHub issue outlining details and update the above list to call the community for contribution. 53 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 435852734 3 | hash = '215008ef2815bf30e72bee82924720566380f1c2742b473b14560d62090c1312' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 403398451 3 | hash = '0ad1f50952ac6a0082d733743848cff03399983bd4d2c1db47b7bd6409fc1285' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 124565601 3 | hash = '7efec3f0ef4b89450c40e2e89a2739bca0a6bd8b2d04efb206d462963e7e2211' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 106044512 3 | hash = '21486774966a5fe0189fc367dcd3e27650f9cdbc6dba524c24123944f2f0132c' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 435852736 3 | hash = 'c39a0849406cc8b87202f25ed33ca130c556385c8914fe20ab4caad9745c8df4' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 403082198 3 | hash = '43d6dc69c8e05d99546cc82eebd98e6af8a2d03649572940330d4dc30bbb2834' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 435882893 3 | hash = 'de94be61c1cd617f46e4d1de85794ef130db4a2def8788650675d166cbfbca74' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bert-squad/model/bertsquad-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 403400046 3 | hash = '3afd0b728e6f906b63d00b64fcabeb56bfb3c742f56277c287b107a4b78a8fb3' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bidirectional_attention_flow/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # BiDAF 4 | 5 | ## Description 6 | This model is a neural network for answering a query about a given context paragraph. 7 | 8 | ## Model 9 | 10 | |Model |Download |Download (with sample test data)|ONNX version|Opset version|Accuracy | 11 | |-------------|:--------------|:--------------|:--------------|:--------------|:--------------| 12 | |BiDAF |[41.5 MB](model/bidaf-9.onnx) |[37.3 MB](model/bidaf-9.tar.gz)|1.4 |ONNX 9, ONNX.ML 1 |EM of 68.1 in SQuAD v1.1 | 13 | 14 |
15 | 16 | ## Inference 17 | 18 | ### Input to model 19 | Tokenized strings of context paragraph and query. 20 | 21 | ### Preprocessing steps 22 | Tokenize words and chars in string for context and query. The tokenized words are in lower case, while chars are not. Chars of each word needs to be clamped or padded to list of length 16. Note [NLTK](https://www.nltk.org/install.html) is used in preprocess for word tokenize. 23 | 24 | * context_word: [seq, 1,] of string 25 | * context_char: [seq, 1, 1, 16] of string 26 | * query_word: [seq, 1,] of string 27 | * query_char: [seq, 1, 1, 16] of string 28 | 29 | The following code shows how to preprocess input strings: 30 | 31 | ```python 32 | import numpy as np 33 | import string 34 | from nltk import word_tokenize 35 | 36 | def preprocess(text): 37 | tokens = word_tokenize(text) 38 | # split into lower-case word tokens, in numpy array with shape of (seq, 1) 39 | words = np.asarray([w.lower() for w in tokens]).reshape(-1, 1) 40 | # split words into chars, in numpy array with shape of (seq, 1, 1, 16) 41 | chars = [[c for c in t][:16] for t in tokens] 42 | chars = [cs+['']*(16-len(cs)) for cs in chars] 43 | chars = np.asarray(chars).reshape(-1, 1, 1, 16) 44 | return words, chars 45 | 46 | # input 47 | context = 'A quick brown fox jumps over the lazy dog.' 48 | query = 'What color is the fox?' 49 | cw, cc = preprocess(context) 50 | qw, qc = preprocess(query) 51 | ``` 52 | 53 | ### Output of model 54 | The model has 2 outputs. 55 | 56 | * start_pos: the answer's start position (0-indexed) in context, 57 | * end_pos: the answer's inclusive end position (0-indexed) in context. 58 | 59 | ### Postprocessing steps 60 | Post processing and meaning of output 61 | ``` 62 | # assuming answer contains the np arrays for start_pos/end_pos 63 | start = np.asscalar(answer[0]) 64 | end = np.asscalar(answer[1]) 65 | print([w.encode() for w in cw[start:end+1].reshape(-1)]) 66 | ``` 67 | 68 | For this testcase, it would output 69 | ``` 70 | [b'brown']. 71 | ``` 72 |
73 | 74 | ## Dataset (Train and validation) 75 | The model is trained with [SQuAD v1.1](https://rajpurkar.github.io/SQuAD-explorer/explore/1.1/dev/). 76 |
77 | 78 | ## Validation accuracy 79 | Metric is Exact Matching (EM) of 68.1, computed over SQuAD v1.1 dev data. 80 |
81 | 82 | ## Publication/Attribution 83 | Minjoon Seo, Aniruddha Kembhavi, Ali Farhadi, Hannaneh Hajishirzi. Bidirectional Attention Flow for Machine Comprehension, [paper](https://arxiv.org/abs/1611.01603) 84 | 85 |
86 | 87 | ## References 88 | This model is converted from a CNTK model trained from [this implementation](https://github.com/microsoft/CNTK/tree/nikosk/bidaf/Examples/Text/BidirectionalAttentionFlow/squad). 89 |
90 | 91 | ## License 92 | MIT License 93 |
94 | -------------------------------------------------------------------------------- /text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 43522228 3 | hash = '823c09ccc64de5089b0f3c6e5aba7e6edcf2360adceb277bbb1a6ed9e69f8369' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 39092248 3 | hash = 'c2111c3c4c45992a326fcb57ed86d49aefa0bb522f222d3ede9d72c6630a85d2' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/gpt-2/model/gpt2-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 548227537 3 | hash = '8b982f4052d6d98647cc4176d443a545979016ff6ffa275cc34be1ab47e90114' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 463131530 3 | hash = '532061460222231d35feaad74919f44349167e9c068fc2da1eb1055676bae000' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 664871060 3 | hash = '02a66f9bfb0cb6ee9a3653d8545e1e445b70ab03bb7e12bd8abaad717c4fab69' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 606542743 3 | hash = '6bd56f37cd6d7ef0e9065662ae43132751effacf90f275dd47cd36733c994bfa' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/gpt2-bs/README.md: -------------------------------------------------------------------------------- 1 | 2 | # GPT-2 with Beam Search Generation 3 | 4 | ## Use-cases 5 | Transformer-based language model for text generation. 6 | 7 | ## Description 8 | This GPT-2 model with generation can produce the result without any extra code or algorithm. It already embedded a beam search algorithm into the ONNX model, so there is **NO** Post-Processing code to inference. 9 | 10 | 11 | ## Model 12 | 13 | |Model |Download | Compressed |ONNX version|Opset version| 14 | |-------------|:--------------|:--------------|:--------------|:--------------| 15 | |gpt2-lm-head-bs |[635 MB](model/gpt2-lm-head-bs-12.onnx) | N/A (similiar size) | 1.7 | 12 16 | 17 | 18 | ### Source 19 | Huggingface PyTorch GPT-2-with-lm-head + conversion script ==> ONNX GPT-2-LM-HEAD-BS.onnx 20 | The full conversion script is in [onnxruntime-extentions](https://github.com/microsoft/onnxruntime-extensions/blob/main/tutorials/gpt2bs.py), and some model parameters can be changed if the number in the script was changed. 21 | 22 | ## Inference 23 | running this model is straightforward, with onnxruntime-extensions, it only contains several lines for an end-to-end inference. 24 | ```python 25 | 26 | from onnxruntime_extensions import PyOrtFunction 27 | 28 | gpt2_all = PyOrtFunction.from_model('model/gpt2-lm-head-bs-12.onnx') 29 | encdict = tokenizer('What is the best story', padding=True, return_tensors='np') 30 | 31 | outputs = gpt2_all(encdict['input_ids'], encdict['attention_mask'].astype('float32'), 30) 32 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) 33 | ``` 34 | 35 | and the tokenizer used in the above code example can be get like 36 | 37 | ```python 38 | from transformers import GPT2Tokenizer 39 | 40 | tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 41 | tokenizer.padding_side = "left" 42 | tokenizer.pad_token = tokenizer.eos_token 43 | ``` 44 | 45 | 46 | ## Publication/Attribution 47 | Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, andIlya Sutskever. Language Models are Unsupervised Multitask Learners. 2019. 48 | 49 | ## References 50 | This model is converted directly from [huggingface/transformers](https://github.com/huggingface/transformers/blob/master/src/transformers/modeling_gpt2.py). 51 |
52 | 53 | ## Contributors 54 | Wenbing Li 55 |
56 | 57 | ## License 58 | Apache 2.0 License 59 |
-------------------------------------------------------------------------------- /text/machine_comprehension/gpt2-bs/model/gpt2-lm-head-bs-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 664855867 3 | hash = '93a7d37f152fd8ba084824ce98b8ad601fd208a639b63cd402b9f3ec6f6cc5e7' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/roberta/dependencies/roberta-sequence-classification-validation.ipynb: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 411428 3 | hash = 'a7d02a5e67319b9577e402bcfa1ad671dd1b7a366138261ecea3cbf0811c9ddd' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/roberta/model/roberta-base-11.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 498649858 3 | hash = 'b49854a3e1640dd97a56bb78bc2e753103cc5a526b8e8e45dc8944d9707f0f41' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/roberta/model/roberta-base-11.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 291239125 3 | hash = '0e57b63bd36769f563e37442d3724ceb68938da9dc63e2ffa3d7e3be70ee0cde' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/roberta/model/roberta-sequence-classification-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 498658080 3 | hash = '6a08d718e0d3b0c28d7aa4d395dc8051c25b4f62b1b036043f19370efa195170' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/roberta/model/roberta-sequence-classification-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 431071460 3 | hash = 'c105a0983ec6c2d5f7660c9f9ae6f3e9fc0efafaa8ed392a8f2b9259f9fbb0ba' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/t5/dependencies/T5-export.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from transformers import T5ForConditionalGeneration 4 | from .models import CombinedDecoder, SimplifiedT5Encoder 5 | import torch 6 | 7 | 8 | def create_t5_encoder_decoder(pretrained_version='t5-base'): 9 | """ Generates an encoder and a decoder model with a language model head from a pretrained huggingface model 10 | 11 | Args: 12 | pretrained_version (str): Name of a pretrained model, or path to a pretrained / finetuned version of T5 13 | 14 | Returns: 15 | simplified_encoder: pytorch t5 encoder with a wrapper to output only the hidden states 16 | decoder_with_lm_head: pytorch t5 decoder with a language modeling head 17 | """ 18 | 19 | # T5 is an encoder / decoder model with a language modeling head on top. 20 | # We need to separate those out for efficient language generation 21 | model = T5ForConditionalGeneration.from_pretrained(pretrained_version) 22 | 23 | return turn_model_into_encoder_decoder(model) 24 | 25 | def turn_model_into_encoder_decoder(model): 26 | encoder = model.encoder 27 | decoder = model.decoder 28 | lm_head = model.lm_head 29 | 30 | decoder_with_lm_head = CombinedDecoder(decoder, lm_head, model.config) 31 | simplified_encoder = SimplifiedT5Encoder(encoder) 32 | 33 | return simplified_encoder, decoder_with_lm_head 34 | 35 | 36 | def generate_onnx_representation(pretrained_version=None, output_prefix=None, model=None): 37 | """ Exports a given huggingface pretrained model, or a given model and tokenizer, to onnx 38 | 39 | Args: 40 | pretrained_version (str): Name of a pretrained model, or path to a pretrained / finetuned version of T5 41 | output_prefix (str): Path to the onnx file 42 | """ 43 | if (pretrained_version is None or output_prefix is None) and model is None: 44 | print("You need to specify both pretrained_version (the pretrained model you wish to export) and output_prefix" 45 | "(the path you want to export to). Alternatively you can export a model you have in memory.") 46 | return 47 | if model is not None: 48 | # Transform model into encoder and decoder with lm head 49 | simplified_encoder, decoder_with_lm_head = turn_model_into_encoder_decoder(model) 50 | else: 51 | # Loading model_data 52 | simplified_encoder, decoder_with_lm_head = create_t5_encoder_decoder(pretrained_version) 53 | 54 | # Example sequence 55 | input_ids = torch.tensor([[42] * 10]) 56 | 57 | # Exports to ONNX 58 | _ = torch.onnx.export( 59 | decoder_with_lm_head, 60 | (input_ids, simplified_encoder(input_ids)), 61 | f"{output_prefix}-decoder-with-lm-head.onnx", 62 | export_params=True, 63 | opset_version=12, 64 | input_names=['input_ids', 'encoder_hidden_states'], 65 | output_names=['hidden_states'], 66 | dynamic_axes={ 67 | 'input_ids': {0:'batch', 1: 'sequence'}, 68 | 'encoder_hidden_states': {0:'batch', 1: 'sequence'}, 69 | 'hidden_states': {0:'batch', 1: 'sequence'}, 70 | }) 71 | 72 | _ = torch.onnx._export( 73 | simplified_encoder, 74 | input_ids, 75 | f"{output_prefix}-encoder.onnx", 76 | export_params=True, 77 | opset_version=12, 78 | input_names=['input_ids'], 79 | output_names=['hidden_states'], 80 | dynamic_axes={ 81 | 'input_ids': {0:'batch', 1: 'sequence'}, 82 | 'encoder_hidden_states': {0:'batch', 1: 'sequence'}, 83 | 'hidden_states': {0:'batch', 1: 'sequence'}, 84 | } 85 | ) 86 | -------------------------------------------------------------------------------- /text/machine_comprehension/t5/model/t5-decoder-with-lm-head-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 650564941 3 | hash = '5aa330f4c72250ce412aaf563baf1ccd966aa4a45cc4dcd04ff66103071ebff5' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/t5/model/t5-decoder-with-lm-head-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 287840759 3 | hash = 'ed9167619331879c39e64302fe7ba53ab0a999553c80bef04ba96af96d36fdad' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/t5/model/t5-encoder-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 438549611 3 | hash = '83060218b833e749002532d41433eba019e173d6d918fbb496956bd4d22f8fb6' 4 | -------------------------------------------------------------------------------- /text/machine_comprehension/t5/model/t5-encoder-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 194535656 3 | hash = '40864ef91e8a214eed78969aaa8c0a75f83d2918598cbeffe7cf7b463212bc37' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Age and Gender Classification using Convolutional Neural Networks 4 | 5 | ## Description 6 | Automatic age and gender classification has become relevant to an increasing amount of applications, particularly since the rise of social platforms and social media. Nevertheless, performance of existing methods on real-world images is still significantly lacking, especially when compared to the tremendous leaps in performance recently reported for the related task of face recognition. 7 | 8 | ## Models 9 | | Model | Download | ONNX version | Opset version | Dataset | 10 | |:-------------|:--------------|:--------------|:--------------|:--------------| 11 | | [googlenet_age_adience](https://drive.google.com/drive/folders/1GeLTHzHALgTYFj2Q9o5aWdztA9WzoErx?usp=sharing) | [23 MB](models/age_googlenet.onnx) | 1.6 | 11 | Adience | 12 | | [googlenet_gender_adience](https://drive.google.com/drive/folders/1r0GroTfsF7VpLhcS3IxU-LmAh6rI6vbQ?usp=sharing) | [23 MB](models/gender_googlenet.onnx)| 1.6 | 11 | Adience | 13 | | [vgg_ilsvrc_16_age_chalearn_iccv2015](https://drive.google.com/drive/folders/1wE4_sj-UBumkjDK9mtfaO9eUan_z44cY?usp=sharing) | [513 MB](models/vgg_ilsvrc_16_age_chalearn_iccv2015.onnx) | 1.6 | 11 | ChaLearn LAP 2015 | 14 | | [vgg_ilsvrc_16_age_imdb_wiki](https://drive.google.com/drive/folders/14wckle-MbnN10xzdzgF464bMnlM-dd5-?usp=sharing) | [513 MB](models/vgg_ilsvrc_16_age_imdb_wiki.onnx)| 1.6 | 11 | IMDB-WIKI | 15 | | [vgg_ilsvrc_16_gender_imdb_wiki](https://drive.google.com/drive/folders/16Z1r7GEXCsJG_384VsjlNxOFXbxcXrqM?usp=sharing) | [512 MB](models/vgg_ilsvrc_16_gender_imdb_wiki.onnx)| 1.6 | 11 | IMDB-WIKI | 16 | 17 | ## Inference 18 | ### GoogleNet 19 | Input tensor is `1 x 3 x height x width` with mean values `104, 117, 123`. Input image have to be previously resized to `224 x 224` pixels and converted to `BGR` format. 20 | Run [levi_googlenet.py](levi_googlenet.py) python script example. 21 | 22 | ### VGG-16 23 | Input tensor is `1 x 3 x height x width`, which values are in range of `[0, 255]`. Input image have to be previously resized to `224 x 224` pixels and converted to `BGR` format. 24 | Run [rothe_vgg.py](rothe_vgg.py) python script example. 25 | 26 | ## References 27 | * Levi et al. - [Age and Gender Classification Using Convolutional Neural Networks](https://talhassner.github.io/home/publication/2015_CVPR). 28 | * Rothe et al. - [IMDB-WIKI – 500k+ face images with age and gender labels](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/). 29 | * Lapuschkin et al. - [Understanding and Comparing Deep Neural Networks for Age and Gender Classification](https://github.com/sebastian-lapuschkin/understanding-age-gender-deep-learning-models). 30 | * Caffe to ONNX: [unofficial converter](https://github.com/asiryan/caffe-onnx). 31 | 32 | ## Contributors 33 | Valery Asiryan ([asiryan](https://github.com/asiryan)) 34 | 35 | ## License 36 | Apache 2.0 37 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/dependencies/baby.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 66052 3 | hash = '7b3f80b0356d469649394e963b6e175e81affac99b85bc3ee5041cefa4db1945' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/dependencies/bella.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 224540 3 | hash = '8f42bb341ac935e098d201ef2c194200ac438f6f70f071e48556ecbb5079d761' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/dependencies/bruce.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102535 3 | hash = '0f9d736595751724ab672a5e1e4acf7bccd914e66c43acf5144f98b7ee899e51' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/dependencies/kid.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 94710 3 | hash = 'c08ad7157f758a182b65e98f94d936ecde38341dabb8d7d2177792d1174a431b' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/models/age_googlenet.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 23960165 3 | hash = '1f3b917f8421d4a03a1859851091134d7ae5025e013ef259b3fcb80e8a097001' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/models/gender_googlenet.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 23935566 3 | hash = '6e02c4acdb6ec615046a649ba9d3f6312cde9b33bdd6162e2a5ce1e8368da8e5' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/models/vgg_ilsvrc_16_age_chalearn_iccv2015.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 538703700 3 | hash = 'c49dce9d2166743a0b654cde4421bc7b71f1a61ed6f3cdace83389c78b6c5b2b' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/models/vgg_ilsvrc_16_age_imdb_wiki.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 538703692 3 | hash = 'bcd0a6c42e964fd5ed7163c4a13f0c95f2264a06e0b9a06a4eb6df88ec75c792' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/age_gender/models/vgg_ilsvrc_16_gender_imdb_wiki.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 537081253 3 | hash = '9ee6d46d5c731952e94f631ef00b7e4a3ef201e1c936a28fcbe3084a157e96f9' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/arcface/dependencies/arcface_inference.ipynb: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 313430 3 | hash = '2995a28fa85bc470b3ddd3ce46efa600a296816591b58167fd86fe2ad7f9bcd1' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/arcface/dependencies/face_postprocess.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import sklearn 4 | 5 | def postprocess(embedding): 6 | embedding = sklearn.preprocessing.normalize(embedding).flatten() 7 | return embedding 8 | -------------------------------------------------------------------------------- /vision/body_analysis/arcface/dependencies/face_preprocess.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | 4 | import cv2 5 | import numpy as np 6 | from skimage import transform as trans 7 | 8 | def parse_lst_line(line): 9 | ''' 10 | Helper function for line parsing 11 | ''' 12 | vec = line.strip().split("\t") 13 | assert len(vec)>=3 14 | aligned = int(vec[0]) 15 | image_path = vec[1] 16 | label = int(vec[2]) 17 | bbox = None 18 | landmark = None 19 | if len(vec)>3: 20 | bbox = np.zeros( (4,), dtype=np.int32) 21 | for i in range(3,7): 22 | bbox[i-3] = int(vec[i]) 23 | landmark = None 24 | if len(vec)>7: 25 | _l = [] 26 | for i in range(7,17): 27 | _l.append(float(vec[i])) 28 | landmark = np.array(_l).reshape( (2,5) ).T 29 | return image_path, label, bbox, landmark, aligned 30 | 31 | 32 | def read_image(img_path, **kwargs): 33 | ''' 34 | Read and transpose input image 35 | ''' 36 | mode = kwargs.get('mode', 'rgb') 37 | layout = kwargs.get('layout', 'HWC') 38 | # Read image (transpose if necessary) 39 | if mode=='gray': 40 | img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_GRAYSCALE) 41 | else: 42 | img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_COLOR) 43 | if mode=='rgb': 44 | img = img[...,::-1] 45 | if layout=='CHW': 46 | img = np.transpose(img, (2,0,1)) 47 | return img 48 | 49 | def preprocess(img, bbox=None, landmark=None, **kwargs): 50 | ''' 51 | Preprocess input image - returns aligned face images 52 | ''' 53 | if isinstance(img, str): 54 | img = read_image(img, **kwargs) 55 | M = None 56 | image_size = [] 57 | str_image_size = kwargs.get('image_size', '') 58 | # Assert input image shape 59 | if len(str_image_size)>0: 60 | image_size = [int(x) for x in str_image_size.split(',')] 61 | if len(image_size)==1: 62 | image_size = [image_size[0], image_size[0]] 63 | assert len(image_size)==2 64 | assert image_size[0]==112 65 | assert image_size[0]==112 or image_size[1]==96 66 | # Do alignment using landmnark points 67 | if landmark is not None: 68 | assert len(image_size)==2 69 | src = np.array([ 70 | [30.2946, 51.6963], 71 | [65.5318, 51.5014], 72 | [48.0252, 71.7366], 73 | [33.5493, 92.3655], 74 | [62.7299, 92.2041] ], dtype=np.float32 ) 75 | if image_size[1]==112: 76 | src[:,0] += 8.0 77 | dst = landmark.astype(np.float32) 78 | 79 | tform = trans.SimilarityTransform() 80 | tform.estimate(dst, src) 81 | M = tform.params[0:2,:] 82 | 83 | # If no landmark points available, do alignment using bounding box. If no bounding box available use center crop 84 | if M is None: 85 | if bbox is None: #use center crop 86 | det = np.zeros(4, dtype=np.int32) 87 | det[0] = int(img.shape[1]*0.0625) 88 | det[1] = int(img.shape[0]*0.0625) 89 | det[2] = img.shape[1] - det[0] 90 | det[3] = img.shape[0] - det[1] 91 | else: 92 | det = bbox 93 | margin = kwargs.get('margin', 44) 94 | bb = np.zeros(4, dtype=np.int32) 95 | bb[0] = np.maximum(det[0]-margin/2, 0) 96 | bb[1] = np.maximum(det[1]-margin/2, 0) 97 | bb[2] = np.minimum(det[2]+margin/2, img.shape[1]) 98 | bb[3] = np.minimum(det[3]+margin/2, img.shape[0]) 99 | ret = img[bb[1]:bb[3],bb[0]:bb[2],:] 100 | if len(image_size)>0: 101 | ret = cv2.resize(ret, (image_size[1], image_size[0])) 102 | return ret 103 | else: #do align using landmark 104 | assert len(image_size)==2 105 | 106 | warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) 107 | 108 | return warped 109 | -------------------------------------------------------------------------------- /vision/body_analysis/arcface/model/arcfaceresnet100-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 261036388 3 | hash = '4002ad80d9e570b9e287d8ba6c5cb667954204b1541043e0afcfd9ae2f5fb5aa' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/arcface/model/arcfaceresnet100-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 237272167 3 | hash = 'af4e7dd90632bcc259a5d7dba78fba870f16aaa8f732f686951ec30557792f17' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # FER+ Emotion Recognition 4 | 5 | ## Description 6 | This model is a deep convolutional neural network for emotion recognition in faces. 7 | 8 | ## Model 9 | 10 | | Model | Download | Download (with sample test data) | ONNX version | Opset version | 11 | |----------------|:-----------|:-----------|:--------|:-------------| 12 | |Emotion FERPlus |[34 MB](model/emotion-ferplus-2.onnx)|[31 MB](model/emotion-ferplus-2.tar.gz)|1.0|2| 13 | |Emotion FERPlus |[34 MB](model/emotion-ferplus-7.onnx)|[31 MB](model/emotion-ferplus-7.tar.gz)|1.2|7| 14 | |Emotion FERPlus |[34 MB](model/emotion-ferplus-8.onnx)|[31 MB](model/emotion-ferplus-8.tar.gz)|1.3|8| 15 | 16 | ### Paper 17 | "Training Deep Networks for Facial Expression Recognition with Crowd-Sourced Label Distribution" [arXiv:1608.01041](https://arxiv.org/abs/1608.01041) 18 | 19 | ### Dataset 20 | The model is trained on the FER+ annotations for the standard Emotion FER [dataset](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data), as described in the above paper. 21 | 22 | ### Source 23 | The model is trained in CNTK, using the cross entropy training mode. You can find the source code [here](https://github.com/ebarsoum/FERPlus). 24 | 25 | ### Demo 26 | [Run Emotion_FERPlus in browser](https://microsoft.github.io/onnxjs-demo/#/emotion_ferplus) - implemented by ONNX.js with Emotion_FERPlus version 1.2 27 | 28 | ## Inference 29 | ### Input 30 | The model expects input of the shape `(Nx1x64x64)`, where `N` is the batch size. 31 | ### Preprocessing 32 | Given a path `image_path` to the image you would like to score: 33 | ```python 34 | import numpy as np 35 | from PIL import Image 36 | 37 | def preprocess(image_path): 38 | input_shape = (1, 1, 64, 64) 39 | img = Image.open(image_path) 40 | img = img.resize((64, 64), Image.ANTIALIAS) 41 | img_data = np.array(img) 42 | img_data = np.resize(img_data, input_shape) 43 | return img_data 44 | ``` 45 | 46 | ### Output 47 | The model outputs a `(1x8)` array of scores corresponding to the 8 emotion classes, where the labels map as follows: 48 | `emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}` 49 | ### Postprocessing 50 | Route the model output through a softmax function to map the aggregated activations across the network to probabilities across the 8 classes. 51 | 52 | ```python 53 | import numpy as np 54 | 55 | def softmax(scores): 56 | # your softmax function 57 | 58 | def postprocess(scores): 59 | ''' 60 | This function takes the scores generated by the network and returns the class IDs in decreasing 61 | order of probability. 62 | ''' 63 | prob = softmax(scores) 64 | prob = np.squeeze(prob) 65 | classes = np.argsort(prob)[::-1] 66 | return classes 67 | ``` 68 | ### Sample test data 69 | Sets of sample input and output files are provided in 70 | * serialized protobuf TensorProtos (`.pb`), which are stored in the folders `test_data_set_*/`. 71 | 72 | ## License 73 | MIT 74 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/model/emotion-ferplus-2.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35041945 3 | hash = '9bb6a3d1fe9f48e3c3e1c800ac832b49202396deba985a1c936667299e88a70d' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/model/emotion-ferplus-2.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32380999 3 | hash = '9e59aa186d7371331eb2991b0ac4fb468f985fc61b37ab6c65f3d2eeeb52a2eb' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35040571 3 | hash = '0d843c0a9efe4fe745cd6c2480f99a33f15d01dc2ebe021356f5adf9ee599782' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32384236 3 | hash = '01079e4235f927950ca0e24a27918cc96b5fcf2e57a86e2b541a7690055db389' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35040571 3 | hash = 'c26ad006e24b68f25ab1601b91a343964cda376758688882d2606dc2bdf10174' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/emotion_ferplus/model/emotion-ferplus-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32384240 3 | hash = 'af291d1cba022235cce720f18b0878611076b425d516e1d4e77443af35fefeb2' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Ultra-lightweight face detection model 4 | 5 | ## Description 6 | This model is a lightweight facedetection model designed for edge computing devices. 7 | 8 | ## Model 9 | | Model | Download | Download (with sample test data) | ONNX version | Opset version | 10 | | ------------- | ------------- | ------------- | ------------- | ------------- | 11 | |version-RFB-320| [1.21 MB](models/version-RFB-320.onnx) | [1.92 MB](models/version-RFB-320.tar.gz) | 1.4 | 9 | 12 | |version-RFB-640| [1.51 MB](models/version-RFB-640.onnx) | [4.59 MB](models/version-RFB-640.tar.gz) | 1.4 | 9 | 13 | 14 | ### Dataset 15 | The training set is the VOC format data set generated by using the cleaned widerface labels provided by [Retinaface](https://arxiv.org/pdf/1905.00641.pdf) in conjunction with the widerface [dataset](http://shuoyang1213.me/WIDERFACE/). 16 | 17 | ### Source 18 | You can find the source code [here](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB). 19 | 20 | ### Demo 21 | Run [demo.py](demo.py) python scripts example. 22 | 23 | ## Inference 24 | 25 | ### Input 26 | Input tensor is `1 x 3 x height x width` with mean values `127, 127, 127` and scale factor `1.0 / 128`. Input image have to be previously converted to `RGB` format and resized to `320 x 240` pixels for **version-RFB-320** model (or `640 x 480` for **version-RFB-640** model). 27 | 28 | ### Preprocessing 29 | Given a path `image_path` to the image you would like to score: 30 | ```python 31 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 32 | image = cv2.resize(image, (320, 240)) 33 | image_mean = np.array([127, 127, 127]) 34 | image = (image - image_mean) / 128 35 | image = np.transpose(image, [2, 0, 1]) 36 | image = np.expand_dims(image, axis=0) 37 | image = image.astype(np.float32) 38 | ``` 39 | 40 | ### Output 41 | The model outputs two arrays `(1 x 4420 x 2)` and `(1 x 4420 x 4)` of scores and boxes. 42 | 43 | ### Postprocessing 44 | In postprocessing, threshold filtration and [non-max suppression](dependencies/box_utils.py) are applied to the scores and boxes arrays. 45 | 46 | ## Contributors 47 | Valery Asiryan ([asiryan](https://github.com/asiryan)) 48 | 49 | ## License 50 | MIT 51 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/demo.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | 3 | import cv2 4 | import onnxruntime as ort 5 | import argparse 6 | import numpy as np 7 | from dependencies.box_utils import predict 8 | 9 | # ------------------------------------------------------------------------------------------------------------------------------------------------ 10 | # Face detection using UltraFace-320 onnx model 11 | face_detector_onnx = "../ultraface/models/version-RFB-320.onnx" 12 | 13 | # Start from ORT 1.10, ORT requires explicitly setting the providers parameter if you want to use execution providers 14 | # other than the default CPU provider (as opposed to the previous behavior of providers getting set/registered by default 15 | # based on the build flags) when instantiating InferenceSession. 16 | # For example, if NVIDIA GPU is available and ORT Python package is built with CUDA, then call API as following: 17 | # ort.InferenceSession(path/to/model, providers=['CUDAExecutionProvider']) 18 | face_detector = ort.InferenceSession(face_detector_onnx) 19 | 20 | # scale current rectangle to box 21 | def scale(box): 22 | width = box[2] - box[0] 23 | height = box[3] - box[1] 24 | maximum = max(width, height) 25 | dx = int((maximum - width)/2) 26 | dy = int((maximum - height)/2) 27 | 28 | bboxes = [box[0] - dx, box[1] - dy, box[2] + dx, box[3] + dy] 29 | return bboxes 30 | 31 | # crop image 32 | def cropImage(image, box): 33 | num = image[box[1]:box[3], box[0]:box[2]] 34 | return num 35 | 36 | # face detection method 37 | def faceDetector(orig_image, threshold = 0.7): 38 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 39 | image = cv2.resize(image, (320, 240)) 40 | image_mean = np.array([127, 127, 127]) 41 | image = (image - image_mean) / 128 42 | image = np.transpose(image, [2, 0, 1]) 43 | image = np.expand_dims(image, axis=0) 44 | image = image.astype(np.float32) 45 | 46 | input_name = face_detector.get_inputs()[0].name 47 | confidences, boxes = face_detector.run(None, {input_name: image}) 48 | boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, threshold) 49 | return boxes, labels, probs 50 | 51 | # ------------------------------------------------------------------------------------------------------------------------------------------------ 52 | # Main void 53 | 54 | parser=argparse.ArgumentParser() 55 | parser.add_argument("-i", "--image", type=str, required=False, help="input image") 56 | args=parser.parse_args() 57 | 58 | img_path = args.image if args.image else "dependencies/1.jpg" 59 | color = (255, 128, 0) 60 | 61 | orig_image = cv2.imread(img_path) 62 | boxes, labels, probs = faceDetector(orig_image) 63 | 64 | for i in range(boxes.shape[0]): 65 | box = scale(boxes[i, :]) 66 | cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), color, 4) 67 | cv2.imshow('', orig_image) 68 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/dependencies/1.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 108342 3 | hash = '365ca3c5aa3f09995634c7b437f1e071e43b69300b5c280740c50554a611d7d9' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/dependencies/2.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 145319 3 | hash = '70fc71bb880cce1d923ef57f308e8b23212717fd3c0703d6d6393abb7c5da29a' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/dependencies/3.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 137149 3 | hash = '2732daa74a86c8c8e9e1eccb76b7a3e377e3fce3c7e0abace7501caa43290906' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/models/version-RFB-320.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1270727 3 | hash = '2afe757f4a54429e86efa4939ed17e8f732c5960fb8ac5d0bd8fd32b510a3369' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/models/version-RFB-320.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2015397 3 | hash = '9e45f201897bee96ccc333d1cd8d72af0ef1feb6bd0ec965c1e38f6c6cdfe1e7' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/models/version-RFB-640.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1588012 3 | hash = '5c88ea9835a7e1ec0e129888bf62eb787f4494999d3f563de4ec1f81b8a0d769' 4 | -------------------------------------------------------------------------------- /vision/body_analysis/ultraface/models/version-RFB-640.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4818743 3 | hash = 'b5146fa33ab99a467d884d83c7d408bade9c3c17c8c86194f81238782d46ccaa' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-present, Facebook Inc., Microsoft Corporation. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL FACEBOOK INC. AND MICROSOFT CORPORATION BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 60984008 3 | hash = '59e051c751dd8ed1411c62a87b5514e8da18847d57b2a5adb89cc2f1179bc6ea' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 40683138 3 | hash = 'b9ceb47f552c9a968998096b1bb89325550f36d61d0de37a7649b0906640cb37' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 61009072 3 | hash = '90d37b39f3118567d8d810ea898260b6cda8cc020c5849c5f3e72e0a4f10c3ed' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45565042 3 | hash = '74549d341b770b4928e4f406f10392a3ecea9f97eee8bcef586c2812acbbfd94' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863787 3 | hash = 'd3ecc6bd1818126c4eb0e4196e61c6b87826bafb7f6993faf31f210f4bc70368' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226657165 3 | hash = '9ad0c775f03bd9fe5328474be5d59e50638e2381ff57227cf9776f70a3d059fb' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863514 3 | hash = 'c22b8b1aae670318dc45f76887c4e14ae1be0e76d51d626f515e20bf8164de2e' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 229606515 3 | hash = 'bf21d68f607faecbe60308ef57fa9085dd9019ccd2bf76f1396c3e941be5f754' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863628 3 | hash = '4700e64220c3c6107fb5e6a940351fe760aa74f5f72e129769fc51d2bae25187' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 229606527 3 | hash = '214635af29ae3f060dd01fdf9beabe8942c58eaab7424820dfc4389d2f1f48ef' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863542 3 | hash = '41a3c6b70d44419eb9dd55de98ec7e64c200f8dce483cdeb255f96dc4a929d81' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226848956 3 | hash = 'b8440831e9d1071862a86fc507e3da87e369413350886b5527f01802c9a73140' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863542 3 | hash = 'c6ef4850bbdaf7ff0bae18ab0111392c292164ab3fe3046d59d996e6258a65ab' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226849065 3 | hash = 'c92581036a233dcb66c67c12279ca84a27ace9d28c063d314acd58eeebfb8e26' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863542 3 | hash = '8d869bd2a7f9cc0f71110684f78e0bbc30ca3f2816f5bc7ee4b86392b04acadf' 4 | -------------------------------------------------------------------------------- /vision/classification/alexnet/model/bvlcalexnet-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226848864 3 | hash = '44013f05db0bf223b42e19e5e8d524cca5afc39e0c8a59908d212046d1158e93' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-present, Facebook Inc., Microsoft Corporation. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL FACEBOOK INC. AND MICROSOFT CORPORATION BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 60984071 3 | hash = '857207fe1bdab0351140b90fd48b6a698cfeeb4f9f39e112c37b07f7c01cfc17' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 40718510 3 | hash = '51144fd1f88c4e4c7c3ece4ea5773f903cf343064873779f587ee65c6de6004b' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 61009797 3 | hash = 'e669ba047db87ad066070c6b138380c7f5f716b0d4c744b040c066f5a154a9f8' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45685092 3 | hash = '8f3f30213513e411a14316e129e11d023c937f5272daf5b6fc71fe69199d1470' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863798 3 | hash = '2f8a5a67693992d6c9f691e9d58a68e27565a7a6ad461825e0ac96e32b665a68' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226654456 3 | hash = '2c4556a33ccd6519d98d23af42034999161fb7da78ce6afaf8228dcf50b80efa' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863525 3 | hash = 'c467a0fc25b65d02f43b79e6ff8f5c7575763e9cfb7bdc5fe979ba39efe70fb2' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 229550734 3 | hash = '22d53af4bc92feb20be1790d1e3a7cd4ed8cdcbf1e1a3a2cc50019612d31bc87' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863639 3 | hash = '6c1b2a41b76374cf0786f7c1f081cc39881bd53368ffd7b1fa0b2bb9326b2c1f' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 229550795 3 | hash = 'd2ab8024f803093a438e2d5e704d07f3f818f649a400caaa7123876960440d01' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863553 3 | hash = 'd406c2807254cf623d33212486ee10c92a9128f85ea8235998885ffdd0c6a3dc' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226844168 3 | hash = '39dee24290b449a2abb7faea0310ffd4be3450841322db1231eaf2517c3036b1' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863553 3 | hash = '9bae3eab151a5ae33ddb95401a1f8edac54864f949c12c05ef31b2b6f286ecda' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226844337 3 | hash = '1c115f9ec9f221e1973a63814033a2ab5589edc6d176baa9ac2239720ce2d301' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243863553 3 | hash = 'cec468dbb2ce14dc3ba3f10612334cc00d6286654a892e67bf6b1bfe8e0d60ba' 4 | -------------------------------------------------------------------------------- /vision/classification/caffenet/model/caffenet-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226844131 3 | hash = '1a2a96ddf83a28c9054024ba226bb5f726417e061218bb5c1e05ce78f9c9e56e' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # DenseNet-121 4 | 5 | |Model |Download |Download (with sample test data)| ONNX version |Opset version|Top-1 accuracy (%)| 6 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | 7 | |DenseNet-121| [32 MB](model/densenet-3.onnx) | [33 MB](model/densenet-3.tar.gz) | 1.1 | 3| | 8 | |DenseNet-121| [32 MB](model/densenet-6.onnx) | [33 MB](model/densenet-6.tar.gz) | 1.1.2 | 6| | 9 | |DenseNet-121| [32 MB](model/densenet-7.onnx) | [33 MB](model/densenet-7.tar.gz) | 1.2 | 7| | 10 | |DenseNet-121| [32 MB](model/densenet-8.onnx) | [33 MB](model/densenet-8.tar.gz) | 1.3 | 8| | 11 | |DenseNet-121| [32 MB](model/densenet-9.onnx) | [33 MB](model/densenet-9.tar.gz) | 1.4 | 9| | 12 | |DenseNet-121-12| [32 MB](model/densenet-12.onnx) | [30 MB](model/densenet-12.tar.gz) | 1.9 | 12| 60.96 | 13 | |DenseNet-121-12-int8| [9 MB](model/densenet-12-int8.onnx) | [6 MB](model/densenet-12-int8.tar.gz) | 1.9 | 12| 60.20 | 14 | > Compared with the DenseNet-121-12, DenseNet-121-12-int8's op-1 accuracy drop ratio is 1.25% and performance improvement is 1.18x. 15 | > 16 | > Note the performance depends on the test hardware. 17 | > 18 | > Performance data here is collected with Intel® Xeon® Platinum 8280 Processor, 1s 4c per instance, CentOS Linux 8.3, data batch size is 1. 19 | 20 | ## Description 21 | DenseNet-121 is a convolutional neural network for classification. 22 | 23 | ### Paper 24 | [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) 25 | 26 | ### Dataset 27 | [ILSVRC2012](http://www.image-net.org/challenges/LSVRC/2012/) 28 | 29 | ## Source 30 | Caffe2 DenseNet-121 ==> ONNX DenseNet 31 | 32 | ## Model input and output 33 | ### Input 34 | ``` 35 | data_0: float[1, 3, 224, 224] 36 | ``` 37 | ### Output 38 | ``` 39 | fc6_1: float[1, 1000, 1, 1] 40 | ``` 41 | ### Pre-processing steps 42 | ### Post-processing steps 43 | ### Sample test data 44 | random generated sampe test data: 45 | - test_data_0.npz 46 | - test_data_1.npz 47 | - test_data_2.npz 48 | - test_data_set_0 49 | - test_data_set_1 50 | - test_data_set_2 51 | 52 | ## Results/accuracy on test set 53 | 54 | ## Quantization 55 | Mask R-CNN R-50-FPN-int8 is obtained by quantizing Mask R-CNN R-50-FPN-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization. 56 | 57 | ### Environment 58 | onnx: 1.9.0 59 | onnxruntime: 1.10.0 60 | 61 | ### Prepare model 62 | ```shell 63 | wget https://github.com/onnx/models/raw/main/vision/classification/densenet-121/model/densenet-12.onnx 64 | ``` 65 | 66 | ### Model quantize 67 | ```bash 68 | bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx 69 | --config=densenet.yaml \ 70 | --output_model=path/to/save 71 | ``` 72 | 73 | ## References 74 | * [Intel® Neural Compressor](https://github.com/intel/neural-compressor) 75 | 76 | ## Contributors 77 | * [mengniwang95](https://github.com/mengniwang95) (Intel) 78 | * [airMeng](https://github.com/airMeng) (Intel) 79 | * [ftian1](https://github.com/ftian1) (Intel) 80 | * [hshen14](https://github.com/hshen14) (Intel) 81 | 82 | ## License 83 | MIT 84 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 8901304 3 | hash = '43160661e34e1e79d382f800e960b47ee21c590efaa16e83ca2cfc11d4ef590b' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6329146 3 | hash = '5e5b96021678948549e706109c383d31a2a7fcb2d82b3fbf569431fd675320fc' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32726877 3 | hash = '45df6c5df3bbe35fc01b00c0b5d2d24e4ada27f3b4273dbba93b65e05334e948' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 30882195 3 | hash = '36426fceb24d100c61e18ea4a982883ae34813494e3509ee87272c54652b1b65' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32718955 3 | hash = '0a3da70b2cb447e513ec7007e69ebcfb278f59d659047bc98f11631080a0bd4c' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 33658961 3 | hash = '8209d440b17fdadab1a066d7a0f2475b5a45cd4715b64744fa62d2df7852f036' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32719461 3 | hash = '05a53493166d4b12d1155af6ad0043f4f9331eca52a782c7cac9759e01998300' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 30902760 3 | hash = '020ed378f69cf4188ee17a3a48ce71699a16fa15ad0ce055bb2d7dcb29e64695' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32719461 3 | hash = '07382960be4f12fc4b4c4199b031cbc0bf9323263eca178dfe8cc1a8060417bb' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 30902681 3 | hash = '6eafd0092492fa994e0fd880024766a9373a1d5b5df1ecce45615e72cd6eb85f' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32719461 3 | hash = '21a3c292b312c392aed2de66e1016cbe0aaa9faf6c9eddf97de333908414652d' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 30902606 3 | hash = '5a5e27bb04f7d8353ec1d86179b38b29cfee6f031e902c69303bb7444a2e2599' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 32719461 3 | hash = '05a53493166d4b12d1155af6ad0043f4f9331eca52a782c7cac9759e01998300' 4 | -------------------------------------------------------------------------------- /vision/classification/densenet-121/model/densenet-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 30902653 3 | hash = '8cad5e8c915c08bdc729b7b6bf44340fde381b535ab2fae1d0e3b41cf3f9c279' 4 | -------------------------------------------------------------------------------- /vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13585963 3 | hash = '728a34f3df5a1afe36ebaffde2240ffe169f3291517fe1c0d051c124053b6dbb' 4 | -------------------------------------------------------------------------------- /vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 12789970 3 | hash = '8dfbb2a8d4231ac779b65330184ba794dde1adebbc41fbd8837bf96a22ff2d7b' 4 | -------------------------------------------------------------------------------- /vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13469992 3 | hash = '162f6741b5990eb99a4587e2c375244f30655e6b44ddf4d4d9f41862aaff58df' 4 | -------------------------------------------------------------------------------- /vision/classification/efficientnet-lite4/model/efficientnet-lite4-11-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 10194220 3 | hash = 'aa9c12f63edaf95f73166284b2016f016d0d05a9b3d50afe47980dd12411d3be' 4 | -------------------------------------------------------------------------------- /vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 51946641 3 | hash = '80b74828ab36ee502a39b9e6f2f9826d781d3c2161056c651b916badd0056fcf' 4 | -------------------------------------------------------------------------------- /vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 48592826 3 | hash = 'e368eb8ff7ff5d23598fbe133596403163717160eec8ab8effb90b55ad8f22ad' 4 | -------------------------------------------------------------------------------- /vision/classification/extract_imagenet.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | """Prepare the ImageNet dataset""" 4 | import os 5 | import argparse 6 | import tarfile 7 | import pickle 8 | import gzip 9 | from tqdm import tqdm 10 | from mxnet.gluon.utils import check_sha1 11 | 12 | _TARGET_DIR = os.path.expanduser('~/.onnx/datasets/imagenet') 13 | _TRAIN_TAR = 'ILSVRC2012_img_train.tar' 14 | _TRAIN_TAR_SHA1 = '43eda4fe35c1705d6606a6a7a633bc965d194284' 15 | _VAL_TAR = 'ILSVRC2012_img_val.tar' 16 | _VAL_TAR_SHA1 = '5f3f73da3395154b60528b2b2a2caf2374f5f178' 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser( 20 | description='Setup the ImageNet dataset.', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | parser.add_argument('--download-dir', required=True, 23 | help="The directory that contains downloaded tar files") 24 | parser.add_argument('--target-dir', default=_TARGET_DIR, 25 | help="The directory to store extracted images") 26 | parser.add_argument('--checksum', action='store_true', 27 | help="If check integrity before extracting.") 28 | args = parser.parse_args() 29 | return args 30 | 31 | def check_file(filename, checksum, sha1): 32 | if not os.path.exists(filename): 33 | raise ValueError('File not found: '+filename) 34 | if checksum and not check_sha1(filename, sha1): 35 | raise ValueError('Corrupted file: '+filename) 36 | 37 | def extract_train(tar_fname, target_dir): 38 | os.makedirs(target_dir) 39 | with tarfile.open(tar_fname) as tar: 40 | print("Extracting "+tar_fname+"...") 41 | # extract each class one-by-one 42 | pbar = tqdm(total=len(tar.getnames())) 43 | for class_tar in tar: 44 | pbar.set_description('Extract '+class_tar.name) 45 | tar.extract(class_tar, target_dir) 46 | class_fname = os.path.join(target_dir, class_tar.name) 47 | class_dir = os.path.splitext(class_fname)[0] 48 | os.mkdir(class_dir) 49 | with tarfile.open(class_fname) as f: 50 | f.extractall(class_dir) 51 | os.remove(class_fname) 52 | pbar.update(1) 53 | pbar.close() 54 | 55 | def extract_val(tar_fname, target_dir): 56 | os.makedirs(target_dir) 57 | print('Extracting ' + tar_fname) 58 | with tarfile.open(tar_fname) as tar: 59 | tar.extractall(target_dir) 60 | # move images to proper subfolders 61 | val_maps_file = os.path.join(os.path.dirname(__file__), 'imagenet_val_maps.pklz') 62 | with gzip.open(val_maps_file, 'rb') as f: 63 | dirs, mappings = pickle.load(f) 64 | for d in dirs: 65 | os.makedirs(os.path.join(target_dir, d)) 66 | for m in mappings: 67 | os.rename(os.path.join(target_dir, m[0]), os.path.join(target_dir, m[1], m[0])) 68 | 69 | def main(): 70 | args = parse_args() 71 | 72 | target_dir = os.path.expanduser(args.target_dir) 73 | if os.path.exists(target_dir): 74 | raise ValueError('Target dir ['+target_dir+'] exists. Remove it first') 75 | 76 | tar_dir = os.path.expanduser(args.download_dir) 77 | train_tar_fname = os.path.join(tar_dir, _TRAIN_TAR) 78 | check_file(train_tar_fname, args.checksum, _TRAIN_TAR_SHA1) 79 | val_tar_fname = os.path.join(tar_dir, _VAL_TAR) 80 | check_file(val_tar_fname, args.checksum, _VAL_TAR_SHA1) 81 | 82 | extract_train(train_tar_fname, os.path.join(target_dir, 'train')) 83 | extract_val(val_tar_fname, os.path.join(target_dir, 'val')) 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /vision/classification/imagenet_postprocess.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | # Post-processing function for ImageNet models 7 | def postprocess(scores): 8 | ''' 9 | Postprocessing with mxnet gluon 10 | The function takes scores generated by the network and returns the class IDs in decreasing order 11 | of probability 12 | ''' 13 | prob = mx.ndarray.softmax(scores).asnumpy() 14 | prob = np.squeeze(prob) 15 | a = np.argsort(prob)[::-1] 16 | return a 17 | -------------------------------------------------------------------------------- /vision/classification/imagenet_prep.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Guide for preparing ImageNet Dataset 4 | 5 | 13 | 14 | ## Download 15 | First, go to the [ImageNet download page](http://www.image-net.org/download-images) (you may need to register an account), and find the page for ILSVRC2012. Next, find and download the following two files: 16 | 17 | |Filename | Size | 18 | |-------------------------|:------| 19 | |ILSVRC2012_img_train.tar | 138 GB| 20 | |ILSVRC2012_img_val.tar | 6.3 GB| 21 | 22 | 23 | ## Setup 24 | 27 | * Download the helper script [extract_imagenet.py](extract_imagenet.py), and the validation labels [imagenet_val_maps.pklz](imagenet_val_maps.pklz). 28 | * Place both files in the same folder. 29 | * Run the following command: 30 | 31 | ``python extract_imagenet.py --download-dir *path to download folder* --target-dir *path to extract folder*`` 32 | 33 | Please note that the usage of the dataset must be non-commercial as per the [ImageNet license](http://www.image-net.org/download-faq) 34 | -------------------------------------------------------------------------------- /vision/classification/imagenet_preprocess.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import mxnet 4 | from mxnet.gluon.data.vision import transforms 5 | 6 | # Pre-processing function for ImageNet models 7 | def preprocess(img): 8 | ''' 9 | Preprocessing required on the images for inference with mxnet gluon 10 | The function takes path to an image and returns processed tensor 11 | ''' 12 | transform_fn = transforms.Compose([ 13 | transforms.Resize(224), 14 | transforms.CenterCrop(224), 15 | transforms.ToTensor(), 16 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 17 | ]) 18 | img = transform_fn(img) 19 | img = img.expand_dims(axis=0) # batchify 20 | 21 | return img 22 | -------------------------------------------------------------------------------- /vision/classification/imagenet_val_maps.pklz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 733949 3 | hash = '0cd627abf193477e0feaf9c5a10005665e4a28e81e281182279c9cc8cf75590a' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-present, Facebook Inc., Microsoft Corporation. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL FACEBOOK INC. AND MICROSOFT CORPORATION BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7122858 3 | hash = 'c7014101d4c922e72ef11dc9e61eea467ffbf284aee1f0cbb8966f5bb2c9e1c8' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5724344 3 | hash = '5f855f949602e86eebe071b53c95ad8bdbb76670a365ed6a007b1fe6d3a4ff73' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7135204 3 | hash = 'dc62b12787351470946930f5d7211204611157b69d9e9c460c6c33ab99cee708' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5562451 3 | hash = '1731ca557834ccba2292faa9392573e09de649ac3594640a16e9b26e9679dfa4' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28021836 3 | hash = '080f6eedc56fcebfdbea0f6b73462622f812e74a2a6e00c3e4741610e0e161ee' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26545491 3 | hash = '9b7225e1ec6c1576db41222310555db02abb71a7f59836a910cc7ff065f285a3' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020232 3 | hash = 'dd1b37f923c5fc27a035fb4e2633aeb1fedd80bd12b24a6cd6042d52672edc75' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565157 3 | hash = '7579dfb577d4653e481ec040001c0cff0e8a37b74e0ed918c667eb84ddcfdb83' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020232 3 | hash = '1b560768c2706facd3ad766205a17b4d3744d0d2e5c4e9fd27d2bfdbf5ec7a56' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565143 3 | hash = 'f189e19e47d16d7517f79be401a4cf598c6321c0f88d59080721a0fd79f2b96f' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020232 3 | hash = 'dd1b37f923c5fc27a035fb4e2633aeb1fedd80bd12b24a6cd6042d52672edc75' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565243 3 | hash = '59fde6960246c08fe29efc8ce6516af62f3b2cd7ff40a442344f94dad681c34e' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020232 3 | hash = 'e0d89b8d9d03dc277ba66094dfa802a7b350d0e785882ecf0070023eafb28c48' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565142 3 | hash = 'aa151ab062fe1d274fcda48aef4e102b799c254d5901591ff604a23c6d1e386b' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020232 3 | hash = '1b560768c2706facd3ad766205a17b4d3744d0d2e5c4e9fd27d2bfdbf5ec7a56' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/googlenet/model/googlenet-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565214 3 | hash = '48ffbed6278fd6ddc099913a35c4724dc2fecd45b106e9b5d0914015fc38c3fb' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Inception v1 4 | 5 | |Model |Download |Download (with sample test data)| ONNX version |Opset version| Top-1 accuracy (%)| 6 | | ------------- | ------------- | ------------- | ------------- | ------------- |------------- | 7 | |Inception-1| [28 MB](model/inception-v1-3.onnx) | [29 MB](model/inception-v1-3.tar.gz) | 1.1 | 3| | 8 | |Inception-1| [28 MB](model/inception-v1-6.onnx) | [29 MB](model/inception-v1-6.tar.gz) | 1.1.2 | 6| | 9 | |Inception-1| [28 MB](model/inception-v1-7.onnx) | [29 MB](model/inception-v1-7.tar.gz) | 1.2 | 7| | 10 | |Inception-1| [28 MB](model/inception-v1-8.onnx) | [29 MB](model/inception-v1-8.tar.gz) | 1.3 | 8| | 11 | |Inception-1| [28 MB](model/inception-v1-9.onnx) | [29 MB](model/inception-v1-9.tar.gz) | 1.4 | 9| | 12 | |Inception-1| [27 MB](model/inception-v1-12.onnx) | [25 MB](model/inception-v1-12.tar.gz) | 1.9 | 12| 67.23| 13 | |Inception-1-int8| [10 MB](model/inception-v1-12-int8.onnx) | [9 MB](model/inception-v1-12-int8.tar.gz) | 1.9 | 12| 67.24| 14 | |Inception-1-qdq| [7 MB](model/inception-v1-12-qdq.onnx) | [5 MB](model/inception-v1-12-qdq.tar.gz) | 1.12 | 12 | 67.21 | 15 | > Compared with the fp32 Inception-1, int8 Inception-1's Top-1 accuracy drop ratio is -0.01% and performance improvement is 1.26x. 16 | > 17 | > **Note** 18 | > 19 | > The performance depends on the test hardware. Performance data here is collected with Intel® Xeon® Platinum 8280 Processor, 1s 4c per instance, CentOS Linux 8.3, data batch size is 1. 20 | 21 | 22 | ## Description 23 | Inception v1 is a reproduction of GoogLeNet. 24 | 25 | ### Dataset 26 | [ILSVRC2012](http://www.image-net.org/challenges/LSVRC/2012/) 27 | 28 | ## Source 29 | Caffe2 Inception v1 ==> ONNX Inception v1 30 | ONNX Inception v1 ==> Quantized ONNX Inception v1 31 | 32 | ## Model input and output 33 | ### Input 34 | ``` 35 | data_0: float[1, 3, 224, 224] 36 | ``` 37 | ### Output 38 | ``` 39 | prob_1: float[1, 1000] 40 | ``` 41 | ### Pre-processing steps 42 | ### Post-processing steps 43 | ### Sample test data 44 | random generated sampe test data: 45 | - test_data_0.npz 46 | - test_data_1.npz 47 | - test_data_2.npz 48 | - test_data_set_0 49 | - test_data_set_1 50 | - test_data_set_2 51 | 52 | ## Results/accuracy on test set 53 | 54 | ## Quantization 55 | Inception-1-int8 and Inception-1-qdq are obtained by quantizing fp32 Inception-1 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization. 56 | 57 | ### Environment 58 | onnx: 1.9.0 59 | onnxruntime: 1.8.0 60 | 61 | ### Prepare model 62 | ```shell 63 | wget https://github.com/onnx/models/raw/main/vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12.onnx 64 | ``` 65 | 66 | ### Model quantize 67 | Make sure to specify the appropriate dataset path in the configuration file. 68 | ```bash 69 | bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx 70 | --config=inception_v1.yaml \ 71 | --data_path=/path/to/imagenet \ 72 | --label_path=/path/to/imagenet/label \ 73 | --output_model=path/to/save 74 | ``` 75 | 76 | ## References 77 | * [Going deeper with convolutions](https://arxiv.org/abs/1409.4842) 78 | 79 | * [Intel® Neural Compressor](https://github.com/intel/neural-compressor) 80 | 81 | 82 | ## Contributors 83 | * [mengniwang95](https://github.com/mengniwang95) (Intel) 84 | * [airMeng](https://github.com/airMeng) (Intel) 85 | * [ftian1](https://github.com/ftian1) (Intel) 86 | * [hshen14](https://github.com/hshen14) (Intel) 87 | 88 | ## License 89 | MIT 90 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 10191535 3 | hash = '0a499c875deb2f0c15cb37113456ca330db612fdab08b2e6cd6ca174d3a2262b' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 9474526 3 | hash = 'a08b7713f8c1d8773d28fd6c534521ba9b5c278a2e35d2bf46f80b9089137be4' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7135424 3 | hash = 'd33ad6f35ac87fd47a18753c3444985a8b01bd57a3179e2d38adaf8573262bd9' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5559367 3 | hash = '6d6dd5c857a6d4abccb86911add46f710fd377de9856ec8769e0f81c93b2ea66' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28021958 3 | hash = '11712643ac570331b32d0c6511910a414d2e18bfa3933b284e5a0ee775a39562' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26545542 3 | hash = '45b8edc3ef8a02dcd4fb91611d2c48b8e208450e0dc9d8e03aac84bd7f33825f' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020217 3 | hash = '577aab9eb35445e5cfd8aa1a22ae91161d26a9e598163fbc59f9520129344806' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 29324028 3 | hash = '33a68dbf5678f07bc0167024dbdb969d84028b77700d06cc8c60833c05025f64' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020390 3 | hash = '79490bb89484daa5bf6604ce63b8a94200193fc4299216dbf2285f94939c6dea' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 29324092 3 | hash = 'db20794ab43a6ed8e7e1f9e585748c85d4a373f31ca8584c0152de729fb54470' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020356 3 | hash = '7f1ea539e71e7a70e6aaef30dcc774488a71455c26a79109e564077505d86e32' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565189 3 | hash = '109641eb268f18057c3945d1c0d91f7d1fe11a951828cad520c359eb56530004' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020356 3 | hash = 'e4f89d8c06a8cd5dde2df76892b8ccdf10f387ae71ecc82ad5c4196f8f72d3e0' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565228 3 | hash = 'e258e4f61180f835dee61cea3fa09774239b8b9d466458fa6eb2b0bb3b699f01' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 28020356 3 | hash = 'f96d9d250c43ef149f9b63b6465f8129c3dc27682c5a92ebabb8200e90164f31' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v1/model/inception-v1-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26565173 3 | hash = '581dbcbb33a352b6b97f19a7cd1b7c8766ce0bce00f0499364ce2bc809d5a1b3' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Inception v2 4 | 5 | |Model |Download |Download (with sample test data)| ONNX version |Opset version| 6 | | ------------- | ------------- | ------------- | ------------- | ------------- | 7 | |Inception-2| [44 MB](model/inception-v2-3.onnx) | [44 MB](model/inception-v2-3.tar.gz) | 1.1 | 3| 8 | |Inception-2| [44 MB](model/inception-v2-6.onnx) | [44 MB](model/inception-v2-6.tar.gz) | 1.1.2 | 6| 9 | |Inception-2| [44 MB](model/inception-v2-7.onnx) | [44 MB](model/inception-v2-7.tar.gz) | 1.2 | 7| 10 | |Inception-2| [44 MB](model/inception-v2-8.onnx) | [44 MB](model/inception-v2-8.tar.gz) | 1.3 | 8| 11 | |Inception-2| [44 MB](model/inception-v2-9.onnx) | [44 MB](model/inception-v2-9.tar.gz) | 1.4 | 9| 12 | 13 | ## Description 14 | Inception v2 is a deep convolutional networks for classification. 15 | 16 | ### Paper 17 | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) 18 | 19 | ### Dataset 20 | [ILSVRC2012](http://www.image-net.org/challenges/LSVRC/2012/) 21 | 22 | ## Source 23 | Caffe2 Inception v2 ==> ONNX Inception v2 24 | 25 | ## Model input and output 26 | ### Input 27 | ``` 28 | data_0: float[1, 3, 224, 224] 29 | ``` 30 | ### Output 31 | ``` 32 | prob_1: float[1, 1000] 33 | ``` 34 | ### Pre-processing steps 35 | ### Post-processing steps 36 | ### Sample test data 37 | random generated sampe test data: 38 | - test_data_0.npz 39 | - test_data_1.npz 40 | - test_data_2.npz 41 | - test_data_set_0 42 | - test_data_set_1 43 | - test_data_set_2 44 | 45 | ## Results/accuracy on test set 46 | 47 | ## License 48 | MIT 49 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45042595 3 | hash = '17997c3ba4db15fbbb5480147aa17fd47d7316484fd701887b955ec6a58c344d' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45015362 3 | hash = '44527fedb70584fea0427e880a4c67b3dc5f76307a8154790c9e3f6128fb30fe' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45040501 3 | hash = '32526aa8f36d624dc68b6654b7d02f42cc1ad511c936110d0bfc31a575382f53' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45015338 3 | hash = '2550273340b08333b94c12a96c1c7433b29971b57298c68cf8c4f459fa067998' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45042799 3 | hash = 'f7556b8d7a57b85540519b65c9b2dcf8181457670b321c624525d8f25737f857' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 42257087 3 | hash = '7ddc8dfc62d8e258c5a660e67df180de632e054b51162eca18530c21356d4273' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45042799 3 | hash = '74ea13cdbdde5b343ca5e0c7e10e68144ff92628e8ef956fd84c06e837edea92' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 42257042 3 | hash = '4f61f8a95d83674ba0c2adc8472d1198bef534cb87a8f2fb05862db2c0145be3' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 45042799 3 | hash = '25d6dd7f503b2b0be8f1eb09f8d7d14687886e5f9989526b2bbce397865b4693' 4 | -------------------------------------------------------------------------------- /vision/classification/inception_and_googlenet/inception_v2/model/inception-v2-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 42256996 3 | hash = '4fc49629f7631c5f2e38716b07ff8e6131385f5e2c10e6447501636e4ec8653c' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # MNIST - Handwritten Digit Recognition 4 | 5 | ## Description 6 | This model predicts handwritten digits using a convolutional neural network (CNN). 7 | 8 | ## Model 9 | |Model|Download|Download (with sample test data)| ONNX version |Opset version|TOP-1 ERROR| 10 | |-----|:-------|:-------------------------------|:-------------|:------------|:------------| 11 | |MNIST|[27 kB](model/mnist-1.onnx)|[26 kB](model/mnist-1.tar.gz) |1.0 |1 |1.1% | 12 | |MNIST|[26 kB](model/mnist-7.onnx)|[26 kB](model/mnist-7.tar.gz) |1.2 |7 |1.1% | 13 | |MNIST|[26 kB](model/mnist-8.onnx)|[26 kB](model/mnist-8.tar.gz) |1.3 |8 |1.1% | 14 | |MNIST-12|[26 kB](model/mnist-12.onnx)|[26 kB](model/mnist-12.tar.gz) |1.9 |12 |1.1% | 15 | |MNIST-12-int8|[11 kB](model/mnist-12-int8.onnx)|[10 kB](model/mnist-12-int8.tar.gz) |1.9 |12 |1.1% | 16 | 17 | ### Dataset 18 | The model has been trained on the popular [MNIST dataset](http://yann.lecun.com/exdb/mnist/). 19 | 20 | ### Source 21 | The model is trained in CNTK following the tutorial [CNTK 103D: Convolutional Neural Network with MNIST](https://github.com/Microsoft/CNTK/blob/master/Tutorials/CNTK_103D_MNIST_ConvolutionalNeuralNetwork.ipynb). Note that the specific architecture used is the model with alternating convolution and max pooling layers (found under the "Solution" section at the end of the tutorial). 22 | 23 | ### Demo 24 | [Run MNIST in browser](https://microsoft.github.io/onnxjs-demo/#/mnist) - implemented by ONNX.js with MNIST version 1.2 25 | 26 | ## Inference 27 | We used CNTK as the framework to perform inference. A brief description of the inference process is provided below: 28 | 29 | ### Input 30 | Input tensor has shape `(1x1x28x28)`, with type of float32. 31 | One image at a time. This model doesn't support mini-batch. 32 | 33 | ### Preprocessing 34 | Images are resized into (28x28) in grayscale, with a black background and a white foreground (the number should be in white). Color value is scaled to [0.0, 1.0]. 35 | 36 | Example: 37 | ```python 38 | import numpy as np 39 | import cv2 40 | 41 | image = cv2.imread('input.png') 42 | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 43 | gray = cv2.resize(gray, (28,28)).astype(np.float32)/255 44 | input = np.reshape(gray, (1,1,28,28) 45 | ``` 46 | 47 | ### Output 48 | The likelihood of each number before [softmax](https://en.wikipedia.org/wiki/Softmax_function), with shape of `(1x10)`. 49 | 50 | ### Postprocessing 51 | Route the model output through a softmax function to map the aggregated activations across the network to probabilities across the 10 classes. 52 | 53 | ### Sample test data 54 | Sets of sample input and output files are provided in 55 | * serialized protobuf TensorProtos (`.pb`), which are stored in the folders `test_data_set_*/`. 56 | 57 | ## Quantization 58 | MNIST-12-int8 is obtained by quantizing MNIST-12 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization. 59 | 60 | ### Environment 61 | onnx: 1.9.0 62 | onnxruntime: 1.10.0 63 | 64 | ### Prepare model 65 | ```shell 66 | wget https://github.com/onnx/models/raw/main/vision/classification/mnist/model/mnist-12.onnx 67 | ``` 68 | 69 | ### Model quantize 70 | ```bash 71 | bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx 72 | --config=mnist.yaml \ 73 | --output_model=path/to/save 74 | ``` 75 | 76 | ## References 77 | * [Intel® Neural Compressor](https://github.com/intel/neural-compressor) 78 | 79 | ## Contributors 80 | * [mengniwang95](https://github.com/mengniwang95) (Intel) 81 | * [airMeng](https://github.com/airMeng) (Intel) 82 | * [ftian1](https://github.com/ftian1) (Intel) 83 | * [hshen14](https://github.com/hshen14) (Intel) 84 | 85 | ## License 86 | MIT 87 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-1.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 27266 3 | hash = '49abf22a007c97b740ea1b94168fb0fb32044298e199d625ede4f9133056e412' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-1.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26518 3 | hash = '924c25688f4ec15be0741ee988cb15dbf4a5f4723805c2f4e3e0b4421fdbffb3' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 10969 3 | hash = '07722839d9c5534c7c10adf155682a4785214fcedd04de0fb9e1eedfb7c705af' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 10668 3 | hash = '6b9c80c82ee7b72dd2c0cf1de558700a650464f30fc69dd1655513ef3c4f106d' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26143 3 | hash = 'b44c0901c98c07227cd2f0949100bad5f09889be4ee7e14541df1b2d7ea6faee' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26741 3 | hash = '67ab845cc479f0776b75589af08283b701fe86fafc964ce4e3417322999ad93e' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26454 3 | hash = '1b56e40fd2907f77aeeaad5a9d55430cf05ddf3e589ea24276c96548c1550b64' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26757 3 | hash = 'ebe9e7d2a076e079c37d11932bad67360b761111b31207b527e72cc7841a7ef1' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26454 3 | hash = '9f9d9a7b0758b2a5559893f308e3e49d37bd93d0c1b80bae5947cc7c3f143862' 4 | -------------------------------------------------------------------------------- /vision/classification/mnist/model/mnist-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 26751 3 | hash = 'ca2e183feeed9f5b3cb24d1bbfb93d20fa3236d8b44bbee85f5c815b3aee92d7' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13963115 3 | hash = '24d4e6ff0492680af2bc15b1efce09f2f63cd1d06e51c0e4ed42632a48df86f4' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13504391 3 | hash = '49a0e36e8a39c45fdd3b1cf9eff52a30b92d148c9e2d969749a0bdf7c3f1fa8b' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 3655033 3 | hash = '2d6b2edea400b62aa04a0f9840b81e2e20dd2fe6dc8d1adcb5333a3f09a2e3c7' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 3914892 3 | hash = '69b1f2d26b1763cd93c98aa96d35bf8ef1420c329ea64fbfcf5a10bac61aee95' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 3593903 3 | hash = 'e882a28f211df2b13570ada7ac8df1ec4ce7a689068334813ed1d133f489c8b6' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 3434401 3 | hash = '6997bbed287f34e143a1364064be4edbdfddb7cc1e16b078aff5359d34a4162c' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13964571 3 | hash = 'b82d6d0bb1a435b606622d752a023b904e5e30ec0843204b441922168f16dc40' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13498787 3 | hash = '5c9f4c208484ec1454d697ecbb050efe5ff700007c07215dc2901da91a384e3a' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 14246826 3 | hash = 'c7fd814f255c9a74f53b07defb0c21238a67339e130ee676c630b41269195236' 4 | -------------------------------------------------------------------------------- /vision/classification/mobilenet/model/mobilenetv2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 13682646 3 | hash = '9f97bab05fe913e230392e2bde951d529368646a9e01b0ed91de1c317789933a' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-present, Facebook Inc., Microsoft Corporation. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL FACEBOOK INC. AND MICROSOFT CORPORATION BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # R-CNN ILSVRC13 4 | 5 | |Model |Download |Download (with sample test data)| ONNX version |Opset version| 6 | | ------------- | ------------- | ------------- | ------------- | ------------- | 7 | |R-CNN ILSVRC13| [32 MB](model/rcnn-ilsvrc13-3.onnx) | [231 MB](model/rcnn-ilsvrc13-3.tar.gz) | 1.1 | 3| 8 | |R-CNN ILSVRC13| [32 MB](model/rcnn-ilsvrc13-6.onnx) | [231 MB](model/rcnn-ilsvrc13-6.tar.gz) | 1.1.2 | 6| 9 | |R-CNN ILSVRC13| [32 MB](model/rcnn-ilsvrc13-7.onnx) | [231 MB](model/rcnn-ilsvrc13-7.tar.gz) | 1.2 | 7| 10 | |R-CNN ILSVRC13| [32 MB](model/rcnn-ilsvrc13-8.onnx) | [231 MB](model/rcnn-ilsvrc13-8.tar.gz) | 1.3 | 8| 11 | |R-CNN ILSVRC13| [32 MB](model/rcnn-ilsvrc13-9.onnx) | [231 MB](model/rcnn-ilsvrc13-9.tar.gz) | 1.4 | 9| 12 | 13 | 14 | ## Description 15 | R-CNN is a convolutional neural network for detection. 16 | This model was made by transplanting the R-CNN SVM classifiers into a fc-rcnn classification layer. 17 | 18 | ### Paper 19 | [Rich feature hierarchies for accurate object detection and semantic segmentation](https://arxiv.org/abs/1311.2524) 20 | 21 | ### Dataset 22 | [ILSVRC2013](http://www.image-net.org/challenges/LSVRC/2013/) 23 | 24 | ## Source 25 | Caffe BVLC R-CNN ILSVRC13 ==> Caffe2 R-CNN ILSVRC13 ==> ONNX R-CNN ILSVRC13 26 | 27 | ## Model input and output 28 | ### Input 29 | ``` 30 | data_0: float[1, 3, 224, 224] 31 | ``` 32 | ### Output 33 | ``` 34 | fc-rcnn_1: float[1, 200] 35 | ``` 36 | ### Pre-processing steps 37 | ### Post-processing steps 38 | ### Sample test data 39 | random generated sampe test data: 40 | - test_data_set_0 41 | - test_data_set_1 42 | - test_data_set_2 43 | - test_data_set_3 44 | - test_data_set_4 45 | - test_data_set_5 46 | 47 | ## Results/accuracy on test set 48 | On the 200-class ILSVRC2013 detection dataset, R-CNN’s mAP is 31.4%. 49 | 50 | ## License 51 | [BSD-3](LICENSE) 52 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 230753133 3 | hash = 'c5af8036d3f68e621b908d1749e194cd54cf2ec5bc9c67efd62562fe2cdc6806' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 217332838 3 | hash = '82f3d719b3cb604372d927b51e655e25fe0fad57d6dbf010af9b93ef3536cd34' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 230753247 3 | hash = 'a7599761561edec448f215fa4739ace65d50a7de13fe9ebcc35d5b8058522fd9' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 217332967 3 | hash = '6898114073e5ccd54c9e86a58332a3e115936ae25c322b00e4f752948dbf21fc' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 230753161 3 | hash = 'bad54b904e3f91516f36e1eb673873ca3a3b1ebf945058d5b0c150e1c36053a1' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 214642762 3 | hash = '720844444f7f8b096e2ebd5e8b488a11fbced26aac87558bc789a75538fe9cc9' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 230753161 3 | hash = '6edcf5da82b70a32d22a4c032f465b98481302b846fe5f086a6fc5a27836810f' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 214642707 3 | hash = '56a3046979c5cdb3e0d7858f14c9f3d0306300bcca5909476b5412f58a8db6b4' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 230753161 3 | hash = '10d8caff3df8a50ef35566a9af07ec33c6ec1a900004cb0993c68960d373dba5' 4 | -------------------------------------------------------------------------------- /vision/classification/rcnn_ilsvrc13/model/rcnn-ilsvrc13-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 214642550 3 | hash = '2620563c8a266d599b42551b56d039977986f33fc6890fec2fc630abc7ca9624' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet101-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 178914043 3 | hash = '5c5b0d98db836b531e2c4167cb33121db436e6dba6cdad369ce08fe98d7963c6' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet101-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 166222423 3 | hash = '85cb413d4a7d90b0f56334ff76ec3c1bbee8655227dd4854332dfe7a5edb8d52' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet101-v2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 178682301 3 | hash = '317a8795696ea3546818a4111d812b1bd821f70fd25e5eef22c7c2641b9ec600' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet101-v2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 165872227 3 | hash = '265bd2e74e594e592e0f1caf5126c230e5dd4af4ab8861ea8c4654401212134e' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet152-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 241816206 3 | hash = 'b5fce8bbeb08f065b73103638f6ac39e6d2954dd46bb4860fef1fd2fae977ef9' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet152-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 226450954 3 | hash = 'b26de49d993812a328c35e7b4c10b2576c8fee52bdde0d66270c42008c82d58d' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet152-v2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 241503848 3 | hash = '116104978fb4be42ad3a76fdce601313d181a09491cb6e7489d75f50cc0def59' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet152-v2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 225131715 3 | hash = 'a6d2208175b6592c991e47a995c46d656c08fbd1f5b1bd180530a8770a3cbef7' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet18-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 46820737 3 | hash = '8e081cac2608cb7d8313f80e2ef82bd6309ea014705bc97e16803ee1716c85b7' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet18-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 43835620 3 | hash = 'eaa5d79c0ca967d6b986b2c45b30e168e9dfbbdbb5ae7151fbda7c6fe48751ba' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet18-v2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 46806737 3 | hash = '9a1fb5016ba194b5ba1f8dba117b40816e4433d65af1321d3ca868ca163f8570' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet18-v2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 43808776 3 | hash = '1097f7ee7c7c54ff41a771b8d26b34e65ffc4b02b0b1f05de22c03f3f74492b2' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet34-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 87302588 3 | hash = '3f5a084512d54c645110402658bfbd649496bfdab1a35080de5a77757ff25d24' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet34-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 81220173 3 | hash = '096d7438285e15d9bd6a17dbf1e50a6ab2c5f29c674da44503cc8dd51694d378' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet34-v2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 87288587 3 | hash = 'a72be8ce0ed28d0b8307c083afe55f80b1c0884f42cba03fc03a5b167e2ac1a5' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet34-v2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 81189514 3 | hash = '01c55f021847c098062cd0a7352ab17726f16185580f1ab02d839059772abd05' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102491873 3 | hash = 'e327335ed3e645350437209da73f40cdfa70cbe52f1c0bebe6186984851e60d4' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 96754305 3 | hash = 'ebed131efb701518bbd48e71a1296355677d2ec2c83a68acd6065d68b4ac7abf' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102490291 3 | hash = 'c1ea52efce04f30111bfdffee7a3ce78fc22a6a68d9eab8b1e903bcdbeb1d774' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 98379563 3 | hash = 'f7b72ae567862cf3eac8b5845910f66bd4717a36bcf6899121791ab72991395b' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102489425 3 | hash = '2ef860106b1e6012a09c029015e812dd8ce3510b3657c5821413740cd74f1a96' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 95636370 3 | hash = '599cd01fba8e83be3f81fb338e6a3d1a2b8afce4218a65cfc0804c33fc5a1bfa' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102489425 3 | hash = 'c7e448520fd7f479d25f25b75418b758f774dccdbbcad12cc4a781ee2cc48b46' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 95636234 3 | hash = '4481d2bb1b596ce836b7026a57356f246a76fe4f244ed237be3e628fea2f756b' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102489425 3 | hash = 'c105fb51e21cd31ade39ff4c0aeedcfe4c7a45968558b873ff14d2423fb6b154' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-caffe2-v1-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 95636091 3 | hash = '3ee637632e7cf9483a3eeedcc1af84c0af139f1f1e466c2d8db1ff63356dd846' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 25816052 3 | hash = '128df815e7f769e4e11869b22834363ed555fa4eaf36f782a35218f555635d3b' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 22355322 3 | hash = '1685840333e85ad60ab52ff977265cc66c7f8a6e0109728a498bfdc38e209f66' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 25753167 3 | hash = '6d1bf01a744dd6c9e7bef468c48186ab2c4d0992930de5f24c14787057f5f1bd' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 17049336 3 | hash = 'b1f2ed75540078609648137a19e80c653f63fa2a1c15b37c90ff5b087653f7b5' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102576593 3 | hash = 'eec67d6cea3542fd79233ec31b41fbe7f041a07fe5b14b0c18a8fe9f431c4df7' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 96559469 3 | hash = '8b14015192544acc8d92e35520ecbe0e76bdb7e72104ba22f17aff9704401c08' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102583340 3 | hash = 'fc9dd40541f6041c4f37a01cf2e5e117a94a49a62b7a9151e017514caf7270b4' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 95435189 3 | hash = 'ba6e4e45ff5c71343c3c91e8bafac80f6cb8cf118733899305810ffec736fc93' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 102442452 3 | hash = 'f24b7e03721eebb53bcb49c8a6247669a78e41cc36ba65ce52bb70c74b822da7' 4 | -------------------------------------------------------------------------------- /vision/classification/resnet/model/resnet50-v2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 95237476 3 | hash = 'b4bdfd37a1f791e5132f11b3ed3d02d240c83f0eb853395f937e32a45d52d609' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5723516 3 | hash = '81c1a2723226ccc4e167f21ca4ab96f8b082fff160de82b6af838a203d00a443' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6980244 3 | hash = 'c6ae3aae81b09a86442f03fbc6eb71bafb106b3e3cc1453e2c5670cd0cca01e8' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5724572 3 | hash = 'f816a72c8fb85ec8ce4a45234caea1ce3f61e34cf8e4ceb9fe528902ed8535f2' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 8604055 3 | hash = '0f3759840bd620193a49116db68dc7216319874bcf3e67e954cc507991c7e074' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5723770 3 | hash = 'e8b6370d1f4d81540e7d2bf5a05060891562108b54b2feeba82636b53df6496f' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5858831 3 | hash = 'ccfa8cf3c523c9a88ae1c1e1c218a1b7927d55d4c646da327cdcdc262599c98c' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5723770 3 | hash = 'fcba25c8363fad9aee9d2365c08e7c1d9e2757b0ee40f68e1dccf025f969c503' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5858851 3 | hash = '1eb42465091ea909f118b9d892bc9ea2399b525abac747af6f45b880455fcf43' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5723770 3 | hash = '0d7363200f0980b359fab7b5d118cef84cdf2ad5fa0a38e7fa8a0c342ec0b275' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5858835 3 | hash = 'a83231faed395b1a1dbcb1b97427b9bd9ad8c833ba12c6b869b8114bf4b1512c' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 9218554 3 | hash = '98fb60cb881eec0b999129f7425752b0caa1e1a0eb64118472ec7b7d3a3a86fb' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 8721925 3 | hash = '39860aef0d2d9400ebe313f36fcc8a1bade105aa6a738e5f020f4ca31eba6a0d' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2388912 3 | hash = '588298e9cae8fb13178d7aa8106ccdb0e95492068a28c2596f0b7db962a2e914' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2488137 3 | hash = '137a70be6a675b1c10a017c2c1a6f57f2eaee90fc9d97373de68527205d073ad' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-12-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2415805 3 | hash = 'baee899f27e29aedc23e89379a92668fd2a0aa381a0caf993199fdeac52f3142' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-12-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2245304 3 | hash = '900d6321da79c88860e45189ade145538b02f44e3e99a745d7487d9a27b11ccf' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 9218556 3 | hash = 'b30fd184ed7f8e6a9b7fce604f44bec69f91647f37a5fce17ba3d1f940dc9b1e' 4 | -------------------------------------------------------------------------------- /vision/classification/shufflenet/model/shufflenet-v2-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 9113274 3 | hash = 'a378d858c0bdb126d7087e92481b0428b1742469d64ffef6b11d461ecae2461f' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1293388 3 | hash = 'f11cd90f2918a8e2ff12852f5afa2fa7b1a9fd5e12afe194ece8371f952a92aa' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1562359 3 | hash = '09df69128b292067410d928e1c57cb47b2cac3dbb83209d51f1cb2d5e4aeda4e' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4952956 3 | hash = 'ebce858691d04e01524312cbd2e36950a7c13f72b3e72be20a098a37055f8a3e' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5151210 3 | hash = 'cabbb66a5cb18577717b6c74266ece34bf581e83ed2e4d83c39533bcf27323d3' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-13-qdq.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1345213 3 | hash = '9bf548c0740267d8bd0f05c765735dd18015b6312e1bcc95dff7f0067b7ec42c' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-13-qdq.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 1565787 3 | hash = 'e07876e9a378cca7af3e679ea6ef7e8a90b51a595588ab3f0ca1f51014e777a6' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4952238 3 | hash = 'eb42fd46108ea5789df5254b6c88ab02f6ca4371b03f12c83ef8eb1e5617ff86' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6226586 3 | hash = '5548b7321baa9e490451e5ef9fdc765c33d1266ca17769aa5229e5ee904123cf' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4952238 3 | hash = '5263a77e1c83545a96f1f114e160e6a04fd09925ea17bcf2b2942d4735b3e672' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6226584 3 | hash = '070dc6724dbe5c27a8d60dc2c08e7a2ca20390f9b6bb44cdb48dc7d0194d082a' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4952222 3 | hash = 'dd340dfe502fc87a4f87a83a14d3517ebc747c5b249ca3a7a0678f824ee7f7aa' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5154452 3 | hash = '0888fd85383dac0d5c6065f77ef2a2cc7b61bc731f780de9f57cf70cee2e5daf' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4952222 3 | hash = '6212f0647aa6424087ff950b673a3fc356d3b4566f80b166c9c28761b54bd366' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5154407 3 | hash = '96e3f4c2b855afe234b6622d4fce9e95cfe9d66df3138e711ae097ac953cdb68' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4952222 3 | hash = 'fe3eecaafe142a2bd79454ef677d5f46219781531c3d22577fd63ac061a7ead9' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.0-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5154458 3 | hash = '0b241c35e63e63b099e4249cf5ef66c43f5f0e2f492a0229e15d03a990862f0d' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.1-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 4956208 3 | hash = '1a20abace0677c97338f4a2997e5fe594a56a08c41860fe8902d5fbb55896951' 4 | -------------------------------------------------------------------------------- /vision/classification/squeezenet/model/squeezenet1.1-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5156484 3 | hash = '185647ae7a056d78512fd4c53abc7e90d87e7eeef4d89748e1e23a7bed268edd' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 138407238 3 | hash = 'e8873e02ecf5a1fb7c66a31b49b8a0f05e826f288632dd9a9e2362efba09a1db' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 106112057 3 | hash = 'cfc33df6a9f5edb5e0ad4d9a80bae17234cb0aff9e4c98d2bff76208155dc367' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 553437752 3 | hash = '890896cd14c489a22dbc239b449a501a4980be6a8e44abcbd2a0405616b19609' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 511942031 3 | hash = 'd2b0bf831b4b548baddefbbbf02b611d2c6b1685891af8604597634c09279b7d' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 553437328 3 | hash = 'bc6251203abab50361af03696414afcb3d9285123ea91489547917bf3a984d57' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 512679010 3 | hash = '9b3369a2e14154c0d358ac5e3c29c0b05da213b8056eb79d5e5a590b2903801f' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-bn-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 553512191 3 | hash = '92a16fb2d5a055c5f0b9a44bc460ac2f968cb2909f998bba0b4ded1f2fc2fc14' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg16-bn-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 512894836 3 | hash = '87fa5fe31b8d50890549339d06de281c41ad13717ef91ad5c6355fd0ad97e758' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574677321 3 | hash = '45fc83bdc4acb2d692569865126cb96cb17eae7c4b43b14e28c3e94e29eba8d7' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 532124692 3 | hash = '3da29c36a2d33d5b2e9cdc6efa71e7f6261e75588ab1e56d4d1a923b7afc236d' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-bn-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574774380 3 | hash = 'e7279382760bd330c9f8edb709d9820646e723f817c267fc0da4d93a90ee457f' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-bn-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 532271265 3 | hash = '5dca595dcb218ef764b368444cdcbcf62f55e2cc5ac8aaffd3c56be55f2315d8' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574674684 3 | hash = '56b4df5430f62d255c4d990061758b8c66f84d4f47296bfd9966e4c158570648' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 536839176 3 | hash = 'dd9e217c6edac6ff05d4d1bce73783d6d9a39e50d3ddbe617397fd18d5686d3c' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574674798 3 | hash = 'daae714b35991cde8750b0c1a7073c632c727a2a4831515a611b6313e9b9365c' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 536839354 3 | hash = '0d160b802310391bf11554410607d8e70d0b043069140769b690f525e6d01e41' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574674712 3 | hash = '0b7f519ce9ef7299fdd2b87d95c5f1a4c80aa5e03946b35fe4826d4b5e4545c9' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 534082025 3 | hash = '04ac558095317c716e4eebfbc039f8f1180dbffeb1ad4c0f02c4bc1697e209f4' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574674712 3 | hash = 'edfa18386f960af4d0c20e737237af28defaf2419480e721259bafbaf7a97276' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 534082067 3 | hash = '70287ef171d7742d5a31abe9aeba8c5cea292f3c2ea08bac027aaf49941d2c85' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 574674712 3 | hash = 'b07cfab7df0ff1ceec7d2c92bfce596943a8b003b939d1ff4bf1764260057271' 4 | -------------------------------------------------------------------------------- /vision/classification/vgg/model/vgg19-caffe2-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 534081932 3 | hash = 'a9affeba74c5ea4653b3f6966673c8b3e8aeaa9d8ccd2c1336a61e9ea37821ba' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # ZFNet-512 4 | 5 | |Model |Download |Download (with sample test data)| ONNX version |Opset version|Top-1 accuracy (%)|Top-5 accuracy (%)| 6 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | 7 | |ZFNet-512| [341 MB](model/zfnet512-3.onnx) | [320 MB](model/zfnet512-3.tar.gz) | 1.1 | 3| | | 8 | |ZFNet-512| [341 MB](model/zfnet512-6.onnx) | [320 MB](model/zfnet512-6.tar.gz) | 1.1.2 | 6| | | 9 | |ZFNet-512| [341 MB](model/zfnet512-7.onnx) | [320 MB](model/zfnet512-7.tar.gz) | 1.2 | 7| | | 10 | |ZFNet-512| [341 MB](model/zfnet512-8.onnx) | [318 MB](model/zfnet512-8.tar.gz) | 1.3 | 8| | | 11 | |ZFNet-512| [341 MB](model/zfnet512-9.onnx) | [318 MB](model/zfnet512-9.tar.gz) | 1.4 | 9| | | 12 | |ZFNet-512| [333 MB](model/zfnet512-12.onnx) | [309 MB](model/zfnet512-12.tar.gz) | 1.9 | 12|55.97|79.41| 13 | |ZFNet-512-int8| [83 MB](model/zfnet512-12-int8.onnx) | [48 MB](model/zfnet512-12-int8.tar.gz) | 1.9 | 12|55.84|79.33| 14 | > Compared with the fp32 ZFNet-512, int8 ZFNet-512's Top-1 accuracy drop ratio is 0.23%, Top-5 accuracy drop ratio is 0.10% and performance improvement is 1.78x. 15 | > 16 | > **Note** 17 | > 18 | > Different preprocess methods will lead to different accuracies, the accuracy in table depends on this specific [preprocess method](https://github.com/intel-innersource/frameworks.ai.lpot.intel-lpot/blob/master/examples/onnxrt/onnx_model_zoo/zfnet/main.py). 19 | > 20 | > The performance depends on the test hardware. Performance data here is collected with Intel® Xeon® Platinum 8280 Processor, 1s 4c per instance, CentOS Linux 8.3, data batch size is 1. 21 | 22 | ## Description 23 | ZFNet-512 is a deep convolutional networks for classification. 24 | This model's 4th layer has 512 maps instead of 1024 maps mentioned in the paper. 25 | 26 | ### Dataset 27 | [ILSVRC2013](http://www.image-net.org/challenges/LSVRC/2013/) 28 | 29 | ## Source 30 | Caffe2 ZFNet-512 ==> ONNX ZFNet-512 31 | 32 | ## Model input and output 33 | ### Input 34 | ``` 35 | gpu_0/data_0: float[1, 3, 224, 224] 36 | ``` 37 | ### Output 38 | ``` 39 | gpu_0/softmax_1: float[1, 1000] 40 | ``` 41 | ### Pre-processing steps 42 | ### Post-processing steps 43 | ### Sample test data 44 | random generated sampe test data: 45 | - test_data_set_0 46 | - test_data_set_1 47 | - test_data_set_2 48 | - test_data_set_3 49 | - test_data_set_4 50 | - test_data_set_5 51 | 52 | ## Results/accuracy on test set 53 | 54 | ## Quantization 55 | ZFNet-512-int8 is obtained by quantizing fp32 ZFNet-512 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization. 56 | 57 | ### Environment 58 | onnx: 1.9.0 59 | onnxruntime: 1.8.0 60 | 61 | ### Prepare model 62 | ```shell 63 | wget https://github.com/onnx/models/raw/main/vision/classification/zfnet-512/model/zfnet512-12.onnx 64 | ``` 65 | 66 | ### Model quantize 67 | Make sure to specify the appropriate dataset path in the configuration file. 68 | ```bash 69 | bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx 70 | --config=zfnet512.yaml \ 71 | --data_path=/path/to/imagenet \ 72 | --label_path=/path/to/imagenet/label \ 73 | --output_model=path/to/save 74 | ``` 75 | 76 | ## References 77 | * [Visualizing and Understanding Convolutional Networks](https://arxiv.org/abs/1311.2901) 78 | 79 | * [Intel® Neural Compressor](https://github.com/intel/neural-compressor) 80 | 81 | ## Contributors 82 | * [mengniwang95](https://github.com/mengniwang95) (Intel) 83 | * [airMeng](https://github.com/airMeng) (Intel) 84 | * [ftian1](https://github.com/ftian1) (Intel) 85 | * [hshen14](https://github.com/hshen14) (Intel) 86 | 87 | ## License 88 | MIT 89 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 87274426 3 | hash = '4aba896ba8ad12660c6e76e26ee281a2ecfd17a507fc12f24dc385230cda96d2' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 50270897 3 | hash = '93de6e80a5d4a8078733ec2ce7cc1a382cc99b6f6c9fcd72c36d692ca20b076c' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 349005501 3 | hash = '2291a623f434ab3bee7dc9b804db2b8eb6c2650543668ecaaa4ec7ce48355dca' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 323924791 3 | hash = '25002d0749310a3426bd57493a970bc77aad9c1c4c96f1924b07eec8cbecfc7f' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-3.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 349005312 3 | hash = '909b5a7fe3ef4271e9a09d39764f862ba5e7c4311cd26b300300055713ba2397' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-3.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 325256523 3 | hash = 'a5c673018b6f29bed2a175667c618c36c1e0c61c464b122c72ffa0fd4f34d311' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-6.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 349005426 3 | hash = '7bf0aec48a658d2b8618f56909a3a6936e3176f204f15aaf73cc5a873527095d' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-6.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 326891392 3 | hash = 'e2cfe808e64c6e2c1f486099357dc92324756f4bcb9838f41b93fa7a08c8292c' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 349005372 3 | hash = '088a304cd6457109a78dd74fdc509ac209fbef8333c6a008dfd67f6fe4805588' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 324181951 3 | hash = 'f2cb13b051489d2012844fd463ffb510329ddde10f9aede3c12d5e8ce7c9a0c9' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 349005372 3 | hash = '6a85a0c5cff7b7ddb795f5bb068c42b475598ef91ad4b1cca68a2a786a8cca16' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 324182001 3 | hash = '0d0e1e3e2c2adcffaddb5baa4172aa9ce0c3dd90c4319e941a86f4058d404c0e' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 349005372 3 | hash = '8ff99918974461ea3426d383f462bbc91be8d3dedc49baca44fce42e0c62e27c' 4 | -------------------------------------------------------------------------------- /vision/classification/zfnet-512/model/zfnet512-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 324181833 3 | hash = '49a78792fdf8f8b0c58985e09147548343134b4ea57488bb4ecd3b5033eb9dda' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/dependencies/duc-inference.ipynb: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5374724 3 | hash = 'a186aba112aec2521ffed7c10876776c42edfe68629021880aaade75eeb12d3d' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/dependencies/duc-postprocess.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | import cv2 as cv 5 | from PIL import Image 6 | import cityscapes_labels 7 | 8 | def get_palette(): 9 | # get train id to color mappings from file 10 | trainId2colors = {label.trainId: label.color for label in cityscapes_labels.labels} 11 | # prepare and return palette 12 | palette = [0] * 256 * 3 13 | for trainId in trainId2colors: 14 | colors = trainId2colors[trainId] 15 | if trainId == 255: 16 | colors = (0, 0, 0) 17 | for i in range(3): 18 | palette[trainId * 3 + i] = colors[i] 19 | return palette 20 | 21 | def colorize(labels): 22 | # generate colorized image from output labels and color palette 23 | result_img = Image.fromarray(labels).convert('P') 24 | result_img.putpalette(get_palette()) 25 | return np.array(result_img.convert('RGB')) 26 | 27 | def postprocess(labels,img_shape,result_shape): 28 | ''' 29 | Postprocessing function for DUC 30 | input : output labels from the network as numpy array, input image shape, desired output image shape 31 | output : confidence score, segmented image, blended image, raw segmentation labels 32 | ''' 33 | ds_rate = 8 34 | label_num = 19 35 | cell_width = 2 36 | img_height,img_width = img_shape 37 | result_height,result_width = result_shape 38 | 39 | # re-arrange output 40 | test_width = int((int(img_width) / ds_rate) * ds_rate) 41 | test_height = int((int(img_height) / ds_rate) * ds_rate) 42 | feat_width = int(test_width / ds_rate) 43 | feat_height = int(test_height / ds_rate) 44 | labels = labels.reshape((label_num, 4, 4, feat_height, feat_width)) 45 | labels = np.transpose(labels, (0, 3, 1, 4, 2)) 46 | labels = labels.reshape((label_num, int(test_height / cell_width), int(test_width / cell_width))) 47 | 48 | labels = labels[:, :int(img_height / cell_width),:int(img_width / cell_width)] 49 | labels = np.transpose(labels, [1, 2, 0]) 50 | labels = cv.resize(labels, (result_width, result_height), interpolation=cv.INTER_LINEAR) 51 | labels = np.transpose(labels, [2, 0, 1]) 52 | 53 | # get softmax output 54 | softmax = labels 55 | 56 | # get classification labels 57 | results = np.argmax(labels, axis=0).astype(np.uint8) 58 | raw_labels = results 59 | 60 | # comput confidence score 61 | confidence = float(np.max(softmax, axis=0).mean()) 62 | 63 | # generate segmented image 64 | result_img = Image.fromarray(colorize(raw_labels)).resize(result_shape[::-1]) 65 | 66 | # generate blended image 67 | blended_img = Image.fromarray(cv.addWeighted(im[:, :, ::-1], 0.5, np.array(result_img), 0.5, 0)) 68 | 69 | return confidence, result_img, blended_img, raw_labels 70 | 71 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/dependencies/duc-preprocess.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | import numpy as np 4 | import math 5 | import cv2 as cv 6 | import mxnet as mx 7 | 8 | def preprocess(im, rgb_mean): 9 | ''' 10 | Preprocessing function for DUC 11 | input : input image and rgb mean 12 | output : MXNet ndarray 13 | ''' 14 | # Convert to float32 15 | test_img = im.astype(np.float32) 16 | # Extrapolate image with a small border in order obtain an accurate reshaped image after DUC layer 17 | test_shape = [im.shape[0],im.shape[1]] 18 | cell_shapes = [math.ceil(l / 8)*8 for l in test_shape] 19 | test_img = cv.copyMakeBorder(test_img, 0, max(0, int(cell_shapes[0]) - im.shape[0]), 0, max(0, int(cell_shapes[1]) - im.shape[1]), cv.BORDER_CONSTANT, value=rgb_mean) 20 | test_img = np.transpose(test_img, (2, 0, 1)) 21 | # subtract rbg mean 22 | for i in range(3): 23 | test_img[i] -= rgb_mean[i] 24 | test_img = np.expand_dims(test_img, axis=0) 25 | # convert to ndarray 26 | test_img = mx.ndarray.array(test_img) 27 | return test_img 28 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/model/ResNet101-DUC-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 65552143 3 | hash = '1cc55b387c9c3ffc9bc156dd106f1d88bbcb8c699a7ddea5bc0efa623c8c4ae0' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/model/ResNet101-DUC-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 71077085 3 | hash = 'efa28bdc902dd23df0bd4e0a25c90e2f5253a7d7ffb272f45f7fc8f61da67dbc' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/model/ResNet101-DUC-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 260671144 3 | hash = '81a5de2913c7a988e21fecfb02b1440138360b15cd2b1719bc9dda6b1a58a89d' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/model/ResNet101-DUC-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 259342514 3 | hash = 'dae58a6ca8ca575ed8df7783005a610bb58122f5417c0b7089ce2bb924f8f390' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/model/ResNet101-DUC-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 260681709 3 | hash = '0fd78e5341f952ee4213bc6c56c6006467836b6474b1cea607de900bbc2b931c' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/duc/model/ResNet101-DUC-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 259587240 3 | hash = 'bba1dfc01b0e10ab5e5e5b069653c0f6492e772afc7d27e7e136439815d5a326' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/dependencies/coco_classes.txt: -------------------------------------------------------------------------------- 1 | __background 2 | person 3 | bicycle 4 | car 5 | motorcycle 6 | airplane 7 | bus 8 | train 9 | truck 10 | boat 11 | traffic light 12 | fire hydrant 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | backpack 27 | umbrella 28 | handbag 29 | tie 30 | suitcase 31 | frisbee 32 | skis 33 | snowboard 34 | sports ball 35 | kite 36 | baseball bat 37 | baseball glove 38 | skateboard 39 | surfboard 40 | tennis racket 41 | bottle 42 | wine glass 43 | cup 44 | fork 45 | knife 46 | spoon 47 | bowl 48 | banana 49 | apple 50 | sandwich 51 | orange 52 | broccoli 53 | carrot 54 | hot dog 55 | pizza 56 | donut 57 | cake 58 | chair 59 | couch 60 | potted plant 61 | bed 62 | dining table 63 | toilet 64 | tv 65 | laptop 66 | mouse 67 | remote 68 | keyboard 69 | cell phone 70 | microwave 71 | oven 72 | toaster 73 | sink 74 | refrigerator 75 | book 76 | clock 77 | vase 78 | scissors 79 | teddy bear 80 | hair drier 81 | toothbrush 82 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/dependencies/demo.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 171271 3 | hash = '5f26e44d6872066f7f5cab507055eabaac5ffd75d839f03017438c8285d55c31' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 167330019 3 | hash = '3884dc7770ae4817504db4de4e107d2d2b46330a23938a07244b05ae1e2e9dc5' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 155190684 3 | hash = 'c7b1b7c1f234de882c95fb47cd3f11026e6f10c4380b6bf2041d2169ac897379' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 44626453 3 | hash = '53d55c9158ea24251859738ac53c3de832a48d893a67ecd828f87080c0ed13b7' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 38019008 3 | hash = '0b4d05e3f1d34e77f56ed5237366b3e679acea86bc91498390a0cc1560e67593' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 176713194 3 | hash = '8010781d0849523b0e9b1aeacd365e4fc851f6a8406c47e7f6706ba8026c702b' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 163814449 3 | hash = 'c8ffc0f70affabb090950f1ee7362abe726ac86c9884f7f289366e07cb38c331' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/dependencies/000000017968.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 99881 3 | hash = '969c3913c03c60b6ba77c7983420e641532f474c9f6713050e97c641bab8943e' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/dependencies/000000025205.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 232443 3 | hash = 'dbbaf4e0ade6bfff82f68521f6891da94b3292e472d9f580170aa81a7c7cfd91' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/dependencies/inference.ipynb: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 495536 3 | hash = '425d90e0994a84ada2bf419a7927a2253f065afd6171cae1793dc46d7a61955c' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/dependencies/voc_classes.txt: -------------------------------------------------------------------------------- 1 | __background__ 2 | aeroplane 3 | bicycle 4 | bird 5 | boat 6 | bottle 7 | bus 8 | car 9 | cat 10 | chair 11 | cow 12 | diningtable 13 | dog 14 | horse 15 | motorbike 16 | person 17 | pottedplant 18 | sheep 19 | sofa 20 | train 21 | tvmonitor -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet101-11.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 217069155 3 | hash = '7f64aa34e7cfb3eef361a03034e57824403d2c84859a13a62461953e93078f41' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet101-11.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 294729465 3 | hash = '28ef9c5eba177579470210d88f9d7c6bd9cc62775137984398212b90dd532105' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet50-11.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 141193553 3 | hash = '395dfc429f590d170905b2e8c294e91ee235026a6235ee875e69bee328f0b7b8' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet50-11.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 223950703 3 | hash = '5eb71553e309966b1e1c271ffb7c94c8f50ca66331637413288e2c46a6e36f72' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet50-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35545058 3 | hash = '505e1c9727a716aa5ecdcebf3e7fa3e78c0dcbd95f3e50f79b0922f0888175e1' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet50-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 30117697 3 | hash = '7ac0ee65f8f4b91a144882ff1e4e863bbce7fab5a953f980f25767c1f4097a63' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet50-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 141193555 3 | hash = '648d897c6f9f5d316c042036e470ff2ed0b4ce7794cd042c5f9e051438891b9e' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/fcn/model/fcn-resnet50-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 131124463 3 | hash = '395c9878c662f862f248b0b4de5ba03448e07a1ff7d363b58937f553d803b1e2' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/dependencies/coco_classes.txt: -------------------------------------------------------------------------------- 1 | __background 2 | person 3 | bicycle 4 | car 5 | motorcycle 6 | airplane 7 | bus 8 | train 9 | truck 10 | boat 11 | traffic light 12 | fire hydrant 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | backpack 27 | umbrella 28 | handbag 29 | tie 30 | suitcase 31 | frisbee 32 | skis 33 | snowboard 34 | sports ball 35 | kite 36 | baseball bat 37 | baseball glove 38 | skateboard 39 | surfboard 40 | tennis racket 41 | bottle 42 | wine glass 43 | cup 44 | fork 45 | knife 46 | spoon 47 | bowl 48 | banana 49 | apple 50 | sandwich 51 | orange 52 | broccoli 53 | carrot 54 | hot dog 55 | pizza 56 | donut 57 | cake 58 | chair 59 | couch 60 | potted plant 61 | bed 62 | dining table 63 | toilet 64 | tv 65 | laptop 66 | mouse 67 | remote 68 | keyboard 69 | cell phone 70 | microwave 71 | oven 72 | toaster 73 | sink 74 | refrigerator 75 | book 76 | clock 77 | vase 78 | scissors 79 | teddy bear 80 | hair drier 81 | toothbrush 82 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/dependencies/demo.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 171271 3 | hash = '5f26e44d6872066f7f5cab507055eabaac5ffd75d839f03017438c8285d55c31' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 177925424 3 | hash = '2815cecfc360873f3eee4bdcb76c234bcbb1689949ff6d3e324b6b4a68de31ad' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 165013602 3 | hash = '7fc9435b681b0a05720c7fbeffcaf70c1c204e767042b333cb55f1b1e7641077' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 48137510 3 | hash = '8b220718135008351a1b2331b3c9bb6b5db2d4c2f2e73a1fb8fafc8ad01d4a5e' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35901397 3 | hash = '01b38b54c0632c90532a22bccf3f4962349f03ead5c96c183b4ad6c93c1862bc' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 177970010 3 | hash = '1c4607acaae6e8241b1cc1753311a98e60af9827b1ea149ce57e14b043981b8e' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 164901207 3 | hash = '29561b43a3c4b811002c668626c4e912750fa116fa243bcd4a7c3275901f36ab' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/retinanet/dependencies/demo.jpg: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 170591 3 | hash = 'd51b9103977c8a0ef21dfdbf0686f83819ffdc3ea665bc690c18497d8f4b65b7' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/retinanet/model/retinanet-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 228369343 3 | hash = '26462d602632663535ddf58851715857343b5f4bc48907d49a39a527895ed241' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/retinanet/model/retinanet-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 153330008 3 | hash = '09e087009643a288f9727762ad997e972af1ab541441f4ed19fd6c74f2320b49' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 29275103 3 | hash = 'fd91c590b4f3c8aac5b5a38d37b563a62a23a8209790ecd67f6e40f76e332704' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 25464564 3 | hash = 'f835cf94db29cca0642d6a5ecd384529ea048ee5f4a3b992378e25085bd12046' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 8958931 3 | hash = 'b181710d4cc5a3455c3c67b82f39d99ab81a4d5603f2cfd3a5606be03d5627cf' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 5778852 3 | hash = '660bdbd1943542946d116a16f9dd2b461c48fff5ddf7fd8e6cb1d7ff8924fcde' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 29461455 3 | hash = 'e1bb393b38fda368bab6271087bfc59bc0cd08eca948e1ef405e5436ccd78d47' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 25475584 3 | hash = 'f255f65fa573bb66562786b2456b5cd4c80dc52c2808cf4d2ab91dcb3475a322' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd/model/ssd-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 80363696 3 | hash = '6e7d37f2fd5b65ac805ee4f5efac48b1ce8be299b869dc9089f1d719481be4ca' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd/model/ssd-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 78515627 3 | hash = '9bde368c311ddb11ebf88225ea125da46aa9ae5b03d977a1afa72b649a1ca32a' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd/model/ssd-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 20484185 3 | hash = '0655cc6baec63fd5bfbf19df2a0081a01a875572e1eb1fe1e1e0d90b2e4dad74' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd/model/ssd-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 31845320 3 | hash = '703f18e532afa5bbcd701f979a68127a18968d6eb01d2bb8021595682e6b3302' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd/model/ssd-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 80366315 3 | hash = 'bcb86cbf35c9f0edde8e103915500958038163e236a54fa4da7418df78a91fc0' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/ssd/model/ssd-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 90592544 3 | hash = 'a1940efcc5e59b387daf4756abbb67b5ad8bab64ef52bca0e0fcc42e2436dcf7' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov2/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Tiny YOLOv2 4 | 5 | ## Description 6 | This model is a real-time neural network for object detection that detects 20 different classes. It is made up of 9 convolutional layers and 6 max-pooling layers and is a smaller version of the more complex full [YOLOv2](https://pjreddie.com/darknet/yolov2/) network. 7 | 8 | CoreML TinyYoloV2 ==> ONNX TinyYoloV2 9 | 10 | ## Model 11 | |Model|Download|Download (with sample test data)| ONNX version |Opset version| 12 | |-----|:-------|:-------------------------------|:-------------|:------------| 13 | |Tiny YOLOv2|[62 MB](model/tinyyolov2-7.onnx)|[59 MB](model/tinyyolov2-7.tar.gz) |1.2 |7 | 14 | |Tiny YOLOv2|[62 MB](model/tinyyolov2-8.onnx)|[59 MB](model/tinyyolov2-8.tar.gz) |1.3 |8 | 15 | 16 | ### Paper 17 | "YOLO9000: Better, Faster, Stronger" [arXiv:1612.08242](https://arxiv.org/pdf/1612.08242.pdf) 18 | 19 | ### Dataset 20 | The Tiny YOLO model was trained on the [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) dataset. 21 | 22 | ### Source 23 | The model was converted from a Core ML version of Tiny YOLO using [ONNXMLTools](https://github.com/onnx/onnxmltools). The source code can be found [here](https://github.com/hollance/YOLO-CoreML-MPSNNGraph). The Core ML model in turn was converted from the [original network](https://pjreddie.com/darknet/yolov2/) implemented in Darknet (via intermediate conversion through Keras). 24 | 25 | ## Inference 26 | ### Input 27 | shape `(1x3x416x416)` 28 | ### Preprocessing 29 | ### Output 30 | shape `(1x125x13x13)` 31 | ### Postprocessing 32 | The output is a `(125x13x13)` tensor where 13x13 is the number of grid cells that the image gets divided into. Each grid cell corresponds to 125 channels, made up of the 5 bounding boxes predicted by the grid cell and the 25 data elements that describe each bounding box (`5x25=125`). For more information on how to derive the final bounding boxes and their corresponding confidence scores, refer to this [post](http://machinethink.net/blog/object-detection-with-yolo/). 33 | ### Sample test data 34 | Sets of sample input and output files are provided in 35 | * serialized protobuf TensorProtos (`.pb`), which are stored in the folders `test_data_set_*/`. 36 | 37 | ## License 38 | MIT 39 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov2/model/tinyyolov2-7.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 63480982 3 | hash = '60cd28804c778be558ae9eb17b4e39abaf2b76214c74c34c15cbdac21e6089d9' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov2/model/tinyyolov2-7.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 60865248 3 | hash = '38d941de30613ae5b3097580d7670f2ad9d84b51c5835ff747623577956bca98' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov2/model/tinyyolov2-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 63480982 3 | hash = 'bd23b3325cd2bbb229a47eb637453302dd3b9ad38527781af526893f706ba88d' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov2/model/tinyyolov2-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 60864927 3 | hash = '3cef389a701f52037f97d7b7622826d228c8020b1b7f3c3fc063d98982142719' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov3/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Tiny YOLOv3 4 | 5 | ## Description 6 | This model is a neural network for real-time object detection that detects 80 different classes. It is very fast and accurate. It is a smaller version of YOLOv3 model. 7 | 8 | ## Model 9 | 10 | |Model |Download |Download (with sample test data)|ONNX version|Opset version|Accuracy | 11 | |-------------|:--------------|:--------------|:--------------|:--------------|:--------------| 12 | |Tiny YOLOv3 |[34 MB](model/tiny-yolov3-11.onnx) |[33 MB](model/tiny-yolov3-11.tar.gz)|1.6 |11 |mAP of 0.331 | 13 | 14 | 15 | 16 |
17 | 18 | ## Inference 19 | 20 | ### Input to model 21 | Resized image `(1x3x416x416)` 22 | Original image size `(1x2)` which is `[image.size[1], image.size[0]]` 23 | 24 | ### Preprocessing steps 25 | The images have to be loaded in to a range of [0, 1]. The transformation should preferrably happen at preprocessing. 26 | 27 | The following code shows how to preprocess a NCHW tensor: 28 | 29 | ```python 30 | import numpy as np 31 | from PIL import Image 32 | 33 | # this function is from yolo3.utils.letterbox_image 34 | def letterbox_image(image, size): 35 | '''resize image with unchanged aspect ratio using padding''' 36 | iw, ih = image.size 37 | w, h = size 38 | scale = min(w/iw, h/ih) 39 | nw = int(iw*scale) 40 | nh = int(ih*scale) 41 | 42 | image = image.resize((nw,nh), Image.BICUBIC) 43 | new_image = Image.new('RGB', size, (128,128,128)) 44 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 45 | return new_image 46 | 47 | def preprocess(img): 48 | model_image_size = (416, 416) 49 | boxed_image = letterbox_image(img, tuple(reversed(model_image_size))) 50 | image_data = np.array(boxed_image, dtype='float32') 51 | image_data /= 255. 52 | image_data = np.transpose(image_data, [2, 0, 1]) 53 | image_data = np.expand_dims(image_data, 0) 54 | return image_data 55 | 56 | image = Image.open(img_path) 57 | # input 58 | image_data = preprocess(image) 59 | image_size = np.array([image.size[1], image.size[0]], dtype=np.float32).reshape(1, 2) 60 | ``` 61 | 62 | ### Output of model 63 | The model has 3 outputs. 64 | boxes: `(1x'n_candidates'x4)`, the coordinates of all anchor boxes, 65 | scores: `(1x80x'n_candidates')`, the scores of all anchor boxes per class, 66 | indices: `('nbox'x3)`, selected indices from the boxes tensor. The selected index format is (batch_index, class_index, box_index). The class list is [here](https://github.com/qqwweee/keras-yolo3/blob/master/model_data/coco_classes.txt) 67 | 68 | ### Postprocessing steps 69 | Post processing and meaning of output 70 | ```python 71 | out_boxes, out_scores, out_classes = [], [], [] 72 | for idx_ in indices[0]: 73 | out_classes.append(idx_[1]) 74 | out_scores.append(scores[tuple(idx_)]) 75 | idx_1 = (idx_[0], idx_[2]) 76 | out_boxes.append(boxes[idx_1]) 77 | ``` 78 | out_boxes, out_scores, out_classes are list of resulting boxes, scores, and classes. 79 |
80 | 81 | ## Dataset (Train and validation) 82 | We use pretrained weights from pjreddie.com [here](https://pjreddie.com/media/files/yolov3-tiny.weights). 83 |
84 | 85 | ## Validation accuracy 86 | Metric is COCO box mAP (averaged over IoU of 0.5:0.95), computed over 2017 COCO val data. 87 | mAP of 0.331 based on original tiny Yolov3 model [here](https://pjreddie.com/darknet/yolo/) 88 |
89 | 90 | ## Publication/Attribution 91 | Joseph Redmon, Ali Farhadi. YOLOv3: An Incremental Improvement, [paper](https://arxiv.org/pdf/1804.02767.pdf) 92 | 93 |
94 | 95 | ## References 96 | This model is converted from a keras model [repository](https://github.com/qqwweee/keras-yolo3) using keras2onnx converter [repository](https://github.com/onnx/keras-onnx). 97 |
98 | 99 | ## License 100 | MIT License 101 |
102 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov3/model/tiny-yolov3-11.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 35511756 3 | hash = 'b12c25289e81c7961a45a10f1eb34a4da775515efffc2c193e9494fb71b91d42' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/tiny-yolov3/model/tiny-yolov3-11.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 34058193 3 | hash = '0a32d472bb17efc77d4355cd6e3f23aa1467dd4256256cfa316d94b2fd910097' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov2-coco/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # YOLOv2-COCO 4 | 5 | ## Description 6 | This model aims to detect objects in real time. It detects 80 different classes from the [COCO Datasets](http://cocodataset.org/#home). For information on network architecture, see the [author's page](https://pjreddie.com/darknet/yolov2/) and [white paper](https://arxiv.org/pdf/1612.08242.pdf). 7 | 8 | ## Model 9 | The model was converted to ONNX from PyTorch version of YOLOv2 using [PyTorch-Yolo2](https://github.com/marvis/pytorch-yolo2). The output is fully verified by generating bounding boxes under PyTorch and onnxruntime. 10 | 11 | | Model | Download | Download (with sample test data) | ONNX version | Opset version | 12 | | ----- | -------- | -------------------------------- | ------------ | ------------- | 13 | | YOLOv2 | [203.9 MB](model/yolov2-coco-9.onnx) | [182.6 MB](model/yolov2-coco-9.tar.gz) | 1.5 | 9 | 14 | 15 | ## Inference 16 | ### Input to model 17 | shape `(1x3x416x416)` 18 | 19 | ### Output of model 20 | shape `(1x425x13x13)` 21 | 22 | ### Postprocessing steps 23 | The output is a `(1x425x13x13)` tensor where 13x13 is the number of grid cells that the image gets divided into. Each grid cell corresponds to 5 anchors, made up of the 5 bounding boxes predicted by the grid cell and the 80 classes that describe each bounding box (`5 x (80 classes + 5) = 425`). For more information on how to derive the final bounding boxes and their corresponding confidence scores, refer to this [post](https://docs.microsoft.com/en-us/dotnet/machine-learning/tutorials/object-detection-onnx) and [PyTorch source code](https://github.com/marvis/pytorch-yolo2/blob/master/detect.py). 24 | 25 | ## Dataset (Train and validation) 26 | The YOLOv2 model was trained on the [COCO](http://cocodataset.org/#home) datasets and was sourced from the original yolov2-voc `.cfg` and `.weights` files from [link](https://pjreddie.com/darknet/yolov2/). 27 | 28 | ## References 29 | "YOLO9000: Better, Faster, Stronger" [arXiv:1612.08242](https://arxiv.org/pdf/1612.08242.pdf) 30 | 31 | ## License 32 | MIT License 33 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov2-coco/model/yolov2-coco-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 203948401 3 | hash = 'e5afefa44b0d9bd2d95dce310bead16bcd4b7585dd3d1afc16ef18a6add0f41f' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov2-coco/model/yolov2-coco-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 191439022 3 | hash = 'a9a8c75e3185e7a8a7a347ca0357765e394958711e801a1089628509a4e3c3f7' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov3/model/yolov3-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 247908721 3 | hash = 'ae521eb8e8be19514b950911131a32c00ea86fde3764fbf198ffffc67b4164c2' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov3/model/yolov3-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 232571698 3 | hash = 'fecd3fb20d109b2e29dfb9542cc7128745c78a3a78cf3fd6ae05cbf9df00f642' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov3/model/yolov3-12-int8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 63200797 3 | hash = 'fd291299b28f79366cda4cdfe64df3710241e4783a8965780255fe510e4ecbfa' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov3/model/yolov3-12-int8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 48719693 3 | hash = '69dc31197864495712d594418ff0eb271ba536ce5a88a58da2983e65602c12b3' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov3/model/yolov3-12.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 247942267 3 | hash = '4757880ab358c95d0193ee77a5ede569e4af0dca3c3fa2a0bf927b672835aaac' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov3/model/yolov3-12.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 232651839 3 | hash = '66de0f28615a630747b6f31fb91f2c17dcecfe2cbdf65324c733c160cc7532a0' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov4/dependencies/Conversion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The model below is converted from tensorflow. The original model can be found [here](https://github.com/hunglc007/tensorflow-yolov4-tflite). The conversion process follows the basic outline provided by the [tensorflow-onnx repo](https://github.com/onnx/tensorflow-onnx)." 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Step 1: Cloning the repository" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "!git clone https://github.com/hunglc007/tensorflow-yolov4-tflite\n", 24 | "!cd tensorflow-yolov4-tflite" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Step 2: Downloading the weights\n", 32 | "Download yolov4.weights file: https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT\n", 33 | "\n", 34 | "Place the downloaded weights in the tensorflow-yolov4-tflite/data/folder" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Step 3: Saving the tf model in preparation for ONNX conversion" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "!python save_model.py --weights ./data/yolov4.weights --output ./checkpoints/yolov4.tf --input_size 416 --model yolov4" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Step 4: Conversion\n", 58 | "1. Follow the installation process listed on the tensorflow-onnx repo, which can be found [here](https://github.com/onnx/tensorflow-onnx#prerequisites).\n", 59 | "2. Run `tf2onnx.convert`" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# installations\n", 69 | "!pip install onnxruntime\n", 70 | "!pip install git+https://github.com/onnx/tensorflow-onnx\n", 71 | " \n", 72 | "# Conversion\n", 73 | "python -m tf2onnx.convert --saved-model ./checkpoints/yolov4.tf --output model.onnx --opset 11 --verbose " 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Step 5: Validation\n", 81 | "Validation outputs can be found in the [onnx-model-validation](./onnx-model-validation.ipynb) notebook." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "import onnx\n", 91 | "import os\n", 92 | "\n", 93 | "# Load the ONNX model\n", 94 | "model = onnx.load(os.path.join('model.onnx'))\n", 95 | "onnx.checker.check_model(model) # Check that the IR is well formed\n", 96 | "print(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graph" 97 | ] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Python 3", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.7.7" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 4 121 | } 122 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov4/dependencies/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov4/dependencies/inference.ipynb: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 345275 3 | hash = '199082554d51d723abe39041b7bbf141abeef427a27d9e514c65eaf4b5a8947e' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov4/dependencies/yolov4_anchors.txt: -------------------------------------------------------------------------------- 1 | 12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401 2 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov4/model/yolov4.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 257470589 3 | hash = '9ab372e4326fc2fd03fd3f96951771881fc17f3876bea960c6a5c7500dde935b' 4 | -------------------------------------------------------------------------------- /vision/object_detection_segmentation/yolov4/model/yolov4.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 243453644 3 | hash = '4804edf8924de2622216a41e97dcf4c3f2242ea14d894d3fef5bf3bd93bf1c25' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Fast Neural Style Transfer 4 | 5 | ## Use-cases 6 | This artistic style transfer model mixes the content of an image with the style of another image. Examples of the styles can be seen [in this PyTorch example](https://github.com/pytorch/examples/tree/master/fast_neural_style#models). 7 | 8 | 9 | ## Description 10 | The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf). 11 | 12 | 13 | ## Model 14 | |Model |Download |Download (with sample test data)|ONNX version|Opset version| 15 | |-------------|:--------------|:--------------|:--------------|:--------------| 16 | |Mosaic|[6.6 MB](model/mosaic-9.onnx) | [7.2 MB](model/mosaic-9.tar.gz)|1.4|9| 17 | |Candy|[6.6 MB](model/candy-9.onnx) | [7.2 MB](model/candy-9.tar.gz)|1.4|9| 18 | |Rain Princess|[6.6 MB](model/rain-princess-9.onnx) |[7.2 MB](model/rain-princess-9.tar.gz)|1.4|9| 19 | |Udnie|[6.6 MB](model/udnie-9.onnx) | [7.2 MB](model/udnie-9.tar.gz)|1.4|9| 20 | |Pointilism|[6.6 MB](model/pointilism-9.onnx) | [7.2 MB](model/pointilism-9.tar.gz)|1.4|9| 21 | |Mosaic|[6.6 MB](model/mosaic-8.onnx) | [7.2 MB](model/mosaic-8.tar.gz)|1.4|8| 22 | |Candy|[6.6 MB](model/candy-8.onnx) | [7.2 MB](model/candy-8.tar.gz)|1.4|8| 23 | |Rain Princess|[6.6 MB](model/rain-princess-8.onnx) |[7.2 MB](model/rain-princess-8.tar.gz)|1.4|8| 24 | |Udnie|[6.6 MB](model/udnie-8.onnx) | [7.2 MB](model/udnie-8.tar.gz)|1.4|8| 25 | |Pointilism|[6.6 MB](model/pointilism-8.onnx) | [7.2 MB](model/pointilism-8.tar.gz)|1.4|8| 26 |
27 | 28 | ## Inference 29 | Refer to [style-transfer-ort.ipynb](dependencies/style-transfer-ort.ipynb) for detailed preprocessing and postprocessing. 30 | 31 | ### Input to model 32 | The input to the model are 3-channel RGB images. The images have to be loaded in a range of [0, 255]. If running into memory issues, try resizing the image by increasing the scale number. 33 | 34 | ### Preprocessing steps 35 | ``` 36 | from PIL import Image 37 | import numpy as np 38 | 39 | # loading input and resize if needed 40 | image = Image.open("PATH TO IMAGE") 41 | size_reduction_factor = 1 42 | image = image.resize((int(image.size[0] / size_reduction_factor), int(image.size[1] / size_reduction_factor)), Image.ANTIALIAS) 43 | 44 | # Preprocess image 45 | x = np.array(image).astype('float32') 46 | x = np.transpose(x, [2, 0, 1]) 47 | x = np.expand_dims(x, axis=0) 48 | ``` 49 | 50 | ### Output of model 51 | The converted ONNX model outputs a NumPy float32 array of shape [1, 3, ‘height’, ‘width’]. The height and width of the output image are the same as the height and width of the input image. 52 | 53 | ### Postprocessing steps 54 | ``` 55 | result = np.clip(result, 0, 255) 56 | result = result.transpose(1,2,0).astype("uint8") 57 | img = Image.fromarray(result) 58 | ``` 59 |
60 | 61 | ## Dataset (Train and validation) 62 | The original fast neural style model is from [pytorch/examples/fast_neural_style](https://github.com/pytorch/examples/tree/master/fast_neural_style). All models are trained using the [COCO 2014 Training images dataset](http://cocodataset.org/#download) [80K/13GB]. 63 |
64 | 65 | ## Training 66 | Refer to [pytorch/examples/fast_neural_style](https://github.com/pytorch/examples/tree/master/fast_neural_style) for training details in PyTorch. Refer to [conversion.ipynb](dependencies/conversion.ipynb) to learn how the PyTorch models are converted to ONNX format. 67 |
68 | 69 | 70 | ## References 71 | Original style transfer model in PyTorch: 72 |
73 | 74 | ## Contributors 75 | [Jennifer Wang](https://github.com/jennifererwangg) 76 |
77 | 78 | ## License 79 | BSD-3-Clause 80 |
81 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/dependencies/conversion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from onnx import numpy_helper\n", 10 | "\n", 11 | "def f(t):\n", 12 | " return [f(i) for i in t] if isinstance(t, (list, tuple)) else t\n", 13 | "\n", 14 | "def g(t, res):\n", 15 | " for i in t:\n", 16 | " res.append(i) if not isinstance(i, (list, tuple)) else g(i, res)\n", 17 | " return res\n", 18 | "\n", 19 | "def SaveData(test_data_dir, prefix, data_list):\n", 20 | " if isinstance(data_list, torch.autograd.Variable) or isinstance(data_list, torch.Tensor):\n", 21 | " data_list = [data_list]\n", 22 | " for i, d in enumerate(data_list):\n", 23 | " d = d.data.cpu().numpy()\n", 24 | " SaveTensorProto(os.path.join(test_data_dir, '{0}_{1}.pb'.format(prefix, i)), prefix + str(i+1), d)\n", 25 | " \n", 26 | "def SaveTensorProto(file_path, name, data):\n", 27 | " tp = numpy_helper.from_array(data)\n", 28 | " tp.name = name\n", 29 | "\n", 30 | " with open(file_path, 'wb') as f:\n", 31 | " f.write(tp.SerializeToString())" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import torch\n", 41 | "import re\n", 42 | "import os\n", 43 | "import onnxruntime as rt\n", 44 | "from transformer_net import TransformerNet\n", 45 | "\n", 46 | "input = torch.randn(1, 3, 224, 224)\n", 47 | "with torch.no_grad():\n", 48 | " model = TransformerNet()\n", 49 | " model_dict = torch.load(\"PATH TO PYTORCH MODEL\")\n", 50 | " for k in list(model_dict.keys()):\n", 51 | " if re.search(r'in\\d+\\.running_(mean|var)$', k):\n", 52 | " del model_dict[k]\n", 53 | " model.load_state_dict(model_dict)\n", 54 | " output = model(input)\n", 55 | " \n", 56 | "input_names = ['input1']\n", 57 | "output_names = ['output1']\n", 58 | "dir = \"PATH TO CONVERTED ONNX MODEL\"\n", 59 | "if not os.path.exists(dir):\n", 60 | " os.makedirs(dir)\n", 61 | "data_dir = os.path.join(dir, \"data_set\")\n", 62 | "if not os.path.exists(data_dir):\n", 63 | " os.makedirs(data_dir)\n", 64 | "\n", 65 | "if isinstance(model, torch.jit.ScriptModule):\n", 66 | " torch.onnx._export(model, tuple((input,)), os.path.join(dir, 'model.onnx'), verbose=True, input_names=input_names, output_names=output_names, example_outputs=(output,))\n", 67 | "else:\n", 68 | " torch.onnx.export(model, tuple((input,)), os.path.join(dir, 'model.onnx'), verbose=True, input_names=input_names, output_names=output_names)\n", 69 | "\n", 70 | "input = f(input)\n", 71 | "input = g(input, [])\n", 72 | "output = f(output)\n", 73 | "output = g(output, [])\n", 74 | " \n", 75 | "SaveData(data_dir, 'input', input)\n", 76 | "SaveData(data_dir, 'output', output)" 77 | ] 78 | } 79 | ], 80 | "metadata": { 81 | "kernelspec": { 82 | "display_name": "Python 3", 83 | "language": "python", 84 | "name": "python3" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.6.5" 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 2 101 | } 102 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/dependencies/style-transfer-ort.ipynb: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2954016 3 | hash = '7303a3bab1ab8574aa0924481e4abf498dbcbec188de36ba9893d06d9dbb4ee1' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/candy-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6726529 3 | hash = '55fcf4a3e1b049657489a014fa68625f87d82ca27b881c7ddb4c3ed0c255bd4f' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/candy-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7338783 3 | hash = '7282139d7a7ec11817060932e58d7f33ed8b67bff8076a337824491e3301bcb3' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/candy-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6728029 3 | hash = '2e92255cedb3856e151b4c07a1e6cec557979af2f5a8c7ddfa3fc979e81a6232' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/candy-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7338825 3 | hash = '9c3edda686c6f843268ee17d092b1aac452f2ae5bcd464c0bbce389628e2a481' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/mosaic-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6726529 3 | hash = '12933ae38f0ae127fdc288abac4772c4da09efcd6d3a47e22340eb22b4ff44bd' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/mosaic-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7333067 3 | hash = 'db27f5fe7575a70b3fdcf48f084f2564dc71eece05b00ca74f891a37277d2300' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/mosaic-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6728029 3 | hash = 'eb760de2a1e26da0d2faa45bfc13476aed07d0495c5d8e66f246a0dcbf30952d' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/mosaic-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7333449 3 | hash = '2cb44086c11568fee0361f655c40c3145aed33e6ea5f5c347f5f1ca38412acf4' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/pointilism-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6726529 3 | hash = '73f1177437d57552776324991bf55516fd063e920a21c2e697fb92da14cb9a40' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/pointilism-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7333381 3 | hash = 'bb0d37df0c061e23ee52cc2e514a3487eb47773955bcdbc95fb6929888032a84' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/pointilism-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6728029 3 | hash = '70aa86c08cb4c379549de22df2aff337525056ffe339b03f1fa0dbc6479ca386' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/pointilism-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7335554 3 | hash = 'ce70300115423f973c22914b3f3d3113f6d766a21912db5b33c9cbc4c9abcbd7' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/rain-princess-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6726529 3 | hash = '79133084869bb6f4e421e4e998a1adeb0f4b0917847ea6ff0b1c2c63e188507c' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/rain-princess-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7344452 3 | hash = '11b99127176ed762816592fc6fb1d8f9b25d2a908a241293cf0ec99fe36482dc' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/rain-princess-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6728029 3 | hash = '98f33bb66ab857e39f935fa5da57b1a47daf6b5854e3f533466079789c0146f4' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/rain-princess-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7344974 3 | hash = 'e82859548f4d7ad09388ae3d680830d6e30ab846600af2a9b4961b3ef082c867' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/udnie-8.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6726529 3 | hash = 'f1a18ca9f7e50354ff8d5cf83aa20252cbaf7c4e81eede207efb30356b58874b' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/udnie-8.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7339429 3 | hash = '732d3376c6275177b26b5dc553f3378b1c80fdf4da85dbb01ae260116be7ff35' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/udnie-9.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 6728029 3 | hash = 'e6e319b8f3f2c206db1f934bc2117aedf59c2cf1ef39985c80a8996c0a53ea50' 4 | -------------------------------------------------------------------------------- /vision/style_transfer/fast_neural_style/model/udnie-9.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 7338543 3 | hash = '44e9f5a07ae39a96a0c13ad7182e2ce654dca5aef105c5c79e2dc5f0848ca09f' 4 | -------------------------------------------------------------------------------- /vision/super_resolution/sub_pixel_cnn_2016/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Super Resolution 4 | 5 | ## Use cases 6 | The Super Resolution machine learning model sharpens and upscales the input image to refine the details and improve quality. 7 | 8 | ## Description 9 | Super Resolution uses efficient [Sub-pixel convolutional layer](https://arxiv.org/abs/1609.05158) described for increasing spatial resolution within network tasks. By increasing pixel count, images are then clarified, sharpened, and upscaled without losing the input image’s content and characteristics. 10 | 11 | ## Model 12 | 13 | |Model |Download |Download (with sample test data)| ONNX version | Opset Version| 14 | |-------------|:--------------|:--------------|:--------------| :------------| 15 | |Super_Resolution| [240 KB](model/super-resolution-10.onnx) | [7.6 MB](model/super-resolution-10.tar.gz) | 1.5.0 | 10| 16 | 17 | ## Inference 18 | Get started with this model by running through the [included inference notebook](dependencies/Run_Super_Resolution_Model.ipynb) for Super Resolution or following the steps below. 19 | 20 | ### Input 21 | Image input sizes are dynamic. The inference was done using jpg image. 22 | 23 | ### Preprocessing 24 | Images are resized into (224x224). The image format is changed into YCbCr with color components: greyscale ‘Y’, blue-difference ‘Cb’, and red-difference ‘Cr’. Once the greyscale Y component is extracted, it is then passed through the super resolution model and upscaled. 25 | 26 | from PIL import Image 27 | from resizeimage import resizeimage 28 | import numpy as np 29 | orig_img = Image.open('IMAGE_FILE_PATH') 30 | img = resizeimage.resize_cover(orig_img, [224,224], validate=False) 31 | img_ycbcr = img.convert('YCbCr') 32 | img_y_0, img_cb, img_cr = img_ycbcr.split() 33 | img_ndarray = np.asarray(img_y_0) 34 | img_4 = np.expand_dims(np.expand_dims(img_ndarray, axis=0), axis=0) 35 | img_5 = img_4.astype(np.float32) / 255.0 36 | img_5 37 | 38 | 39 | ### Output 40 | The model outputs a multidimensional array of pixels that are upscaled. Output shape is [batch_size,1,672,672]. The second dimension is one because only the (Y) intensity channel was passed into the super resolution model and upscaled. 41 | 42 | ### Postprocessing 43 | Postprocessing involves converting the array of pixels into an image that is scaled to a higher resolution. The color channels (Cb, Cr) are also scaled to a higher resolution using bicubic interpolation. Then the color channels are combined and converted back to RGB format, producing the final output image. 44 | 45 | final_img = Image.merge( 46 | "YCbCr", [ 47 | img_out_y, 48 | img_cb.resize(img_out_y.size, Image.BICUBIC), 49 | img_cr.resize(img_out_y.size, Image.BICUBIC), 50 | ]).convert("RGB") 51 | plt.imshow(final_img) 52 | 53 | 54 | ## Dataset 55 | This model is trained on the [BSD300 Dataset](https://github.com/pytorch/examples/tree/master/super_resolution), using crops from the 200 training images. 56 | 57 | ## Training 58 | View the [training notebook](https://github.com/pytorch/examples/tree/master/super_resolution) to understand details for parameters and network for SuperResolution. 59 | -------------------------------------------------------------------------------- /vision/super_resolution/sub_pixel_cnn_2016/model/super-resolution-10.onnx: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 240078 3 | hash = '6c13e9b06178ce98d476f06130b55a51b96bcb916f898a4e7304e7281ad98f27' 4 | -------------------------------------------------------------------------------- /vision/super_resolution/sub_pixel_cnn_2016/model/super-resolution-10.tar.gz: -------------------------------------------------------------------------------- 1 | # xet version 0 2 | filesize = 2079037 3 | hash = '1bbca87e2d0b0f3b5c33bb59d798f1bcd37a94f762d24d2bbc7704c06316e1a2' 4 | -------------------------------------------------------------------------------- /workflow_scripts/check_model.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from cpuinfo import get_cpu_info 4 | import ort_test_dir_utils 5 | import onnxruntime 6 | import onnx 7 | import os 8 | from shutil import rmtree 9 | import tarfile 10 | import test_utils 11 | 12 | 13 | def has_vnni_support(): 14 | return "avx512vnni" in set(get_cpu_info()["flags"]) 15 | 16 | 17 | def run_onnx_checker(model_path): 18 | model = onnx.load(model_path) 19 | onnx.checker.check_model(model) 20 | 21 | 22 | def ort_skip_reason(model_path): 23 | if (model_path.endswith("-int8.onnx") or model_path.endswith("-qdq.onnx")) and not has_vnni_support(): 24 | # At least run InferenceSession to test shape inference 25 | onnxruntime.InferenceSession(model_path) 26 | return f"Skip ORT test for {model_path} because this machine lacks avx512vnni support and the output.pb was produced with avx512vnni support." 27 | model = onnx.load(model_path) 28 | if model.opset_import[0].version < 7: 29 | return f"Skip ORT test for {model_path} because ORT only supports opset version >= 7" 30 | return None 31 | 32 | 33 | def make_tarfile(output_filename, source_dir): 34 | with tarfile.open(output_filename, "w:gz", format=tarfile.GNU_FORMAT) as tar: 35 | tar.add(source_dir, arcname=os.path.basename(source_dir)) 36 | 37 | 38 | def run_backend_ort(model_path, test_data_set=None, tar_gz_path=None): 39 | skip_reason = ort_skip_reason(model_path) 40 | if skip_reason: 41 | print(skip_reason) 42 | return 43 | # if "test_data_set_N" doesn't exist, create test_dir 44 | if not test_data_set: 45 | # Start from ORT 1.10, ORT requires explicitly setting the providers parameter if you want to use execution providers 46 | # other than the default CPU provider (as opposed to the previous behavior of providers getting set/registered by default 47 | # based on the build flags) when instantiating InferenceSession. 48 | # For example, if NVIDIA GPU is available and ORT Python package is built with CUDA, then call API as following: 49 | # onnxruntime.InferenceSession(path/to/model, providers=["CUDAExecutionProvider"]) 50 | onnxruntime.InferenceSession(model_path) 51 | # Get model name without .onnx 52 | model_name = os.path.basename(os.path.splitext(model_path)[0]) 53 | if model_name is None: 54 | print(f"The model path {model_path} is invalid") 55 | return 56 | ort_test_dir_utils.create_test_dir(model_path, "./", test_utils.TEST_ORT_DIR) 57 | ort_test_dir_utils.run_test_dir(test_utils.TEST_ORT_DIR) 58 | if os.path.exists(model_name) and os.path.isdir(model_name): 59 | rmtree(model_name) 60 | os.rename(test_utils.TEST_ORT_DIR, model_name) 61 | make_tarfile(tar_gz_path, model_name) 62 | rmtree(model_name) 63 | # otherwise use the existing "test_data_set_N" as test data 64 | else: 65 | test_dir_from_tar = test_utils.get_model_directory(model_path) 66 | ort_test_dir_utils.run_test_dir(test_dir_from_tar) 67 | # remove the produced test_dir from ORT 68 | test_utils.remove_onnxruntime_test_dir() 69 | -------------------------------------------------------------------------------- /workflow_scripts/test_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | 3 | from pathlib import Path 4 | import subprocess 5 | import tarfile 6 | import os 7 | from shutil import rmtree 8 | 9 | TEST_ORT_DIR = 'ci_test_dir' 10 | TEST_TAR_DIR = 'ci_test_tar_dir' 11 | cwd_path = Path.cwd() 12 | 13 | 14 | def get_model_directory(model_path): 15 | return os.path.dirname(model_path) 16 | 17 | 18 | def run_lfs_install(): 19 | result = subprocess.run(['git', 'lfs', 'install'], cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 20 | print(f'Git LFS install completed with return code= {result.returncode}') 21 | 22 | 23 | def pull_lfs_file(file_name): 24 | result = subprocess.run(['git', 'lfs', 'pull', '--include', file_name, '--exclude', '\'\''], cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 25 | print(f'LFS pull completed for {file_name} with return code= {result.returncode}') 26 | 27 | 28 | def run_lfs_prune(): 29 | result = subprocess.run(['git', 'lfs', 'prune'], cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 30 | print(f'LFS prune completed with return code= {result.returncode}') 31 | 32 | 33 | def extract_test_data(file_path): 34 | tar = tarfile.open(file_path, "r:gz") 35 | tar.extractall(TEST_TAR_DIR) 36 | tar.close() 37 | return get_model_and_test_data(TEST_TAR_DIR) 38 | 39 | 40 | def get_model_and_test_data(directory_path): 41 | onnx_model = None 42 | test_data_set = [] 43 | for root, dirs, files in os.walk(directory_path): 44 | for file in files: 45 | if file.endswith('.onnx'): 46 | file_path = os.path.join(root, file) 47 | assert onnx_model is None, "More than one ONNX model detected" 48 | onnx_model = file_path 49 | for subdir in dirs: 50 | # detect any test_data_set 51 | if subdir.startswith('test_data_set_'): 52 | subdir_path = os.path.join(root, subdir) 53 | test_data_set.append(subdir_path) 54 | return onnx_model, test_data_set 55 | 56 | 57 | def remove_tar_dir(): 58 | if os.path.exists(TEST_TAR_DIR) and os.path.isdir(TEST_TAR_DIR): 59 | rmtree(TEST_TAR_DIR) 60 | 61 | 62 | def remove_onnxruntime_test_dir(): 63 | if os.path.exists(TEST_ORT_DIR) and os.path.isdir(TEST_ORT_DIR): 64 | rmtree(TEST_ORT_DIR) 65 | --------------------------------------------------------------------------------