├── .gitignore
├── script
    ├── setup
    │   ├── .cm
    │   │   ├── desc.json
    │   │   ├── meta.json
    │   │   └── info.json
    │   ├── setup.ssd-mobilenet.sh
    │   ├── setup.ssd-resnet.sh
    │   ├── setup.common.sh
    │   └── setup.resnet.sh
    ├── explore-params
    │   ├── .cm
    │   │   ├── desc.json
    │   │   ├── meta.json
    │   │   └── info.json
    │   ├── explore.sh
    │   └── run.sh
    └── .cm
    │   ├── alias-a-setup
    │   ├── alias-u-c03619b7c49c5a52
    │   ├── alias-a-explore-params
    │   └── alias-u-64da79981c7d1099
├── .cm
    ├── alias-a-program
    ├── alias-a-script
    ├── alias-u-84e27ad9dd12e734
    └── alias-u-b0ac08fe1d3c2615
├── COPYRIGHT.txt
├── program
    ├── zpp-worker-tensorrt-py
    │   ├── .cm
    │   │   ├── desc.json
    │   │   ├── info.json
    │   │   └── meta.json
    │   └── zpp_worker_trt.py
    ├── object-detection-zpp-hub-py
    │   ├── .cm
    │   │   ├── desc.json
    │   │   ├── info.json
    │   │   └── meta.json
    │   └── zpp_hub_detect.py
    ├── .cm
    │   ├── alias-a-zpp-worker-tensorrt-py
    │   ├── alias-u-0b248b2913eb548b
    │   ├── alias-a-object-detection-zpp-hub-py
    │   ├── alias-u-1dc4528a1a53c218
    │   ├── alias-a-image-classification-zpp-hub-py
    │   ├── alias-u-6495587eb9150c0b
    │   ├── alias-a-object-detection-zpp-hub-loadgen-py
    │   ├── alias-u-f497e983b6b2eaaf
    │   ├── alias-a-image-classification-zpp-hub-loadgen-py
    │   └── alias-u-c4a80957e3ae1c8f
    ├── image-classification-zpp-hub-py
    │   ├── .cm
    │   │   ├── desc.json
    │   │   ├── info.json
    │   │   └── meta.json
    │   └── zpp_hub_classify.py
    ├── image-classification-zpp-hub-loadgen-py
    │   ├── .cm
    │   │   ├── desc.json
    │   │   ├── info.json
    │   │   └── meta.json
    │   ├── user.conf
    │   └── zpp_hub_classify_loadgen.py
    └── object-detection-zpp-hub-loadgen-py
    │   ├── .cm
    │       ├── desc.json
    │       ├── info.json
    │       └── meta.json
    │   ├── user.conf
    │   └── zpp_hub_detect_loadgen.py
├── README.md
├── .ckr.json
└── LICENSE.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | program/*/tmp/*
2 | 


--------------------------------------------------------------------------------
/script/setup/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/script/setup/.cm/meta.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/.cm/alias-a-program:
--------------------------------------------------------------------------------
1 | b0ac08fe1d3c2615
2 | 


--------------------------------------------------------------------------------
/.cm/alias-a-script:
--------------------------------------------------------------------------------
1 | 84e27ad9dd12e734
2 | 


--------------------------------------------------------------------------------
/.cm/alias-u-84e27ad9dd12e734:
--------------------------------------------------------------------------------
1 | script
2 | 


--------------------------------------------------------------------------------
/.cm/alias-u-b0ac08fe1d3c2615:
--------------------------------------------------------------------------------
1 | program
2 | 


--------------------------------------------------------------------------------
/script/explore-params/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/script/explore-params/.cm/meta.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/script/.cm/alias-a-setup:
--------------------------------------------------------------------------------
1 | c03619b7c49c5a52
2 | 


--------------------------------------------------------------------------------
/script/.cm/alias-u-c03619b7c49c5a52:
--------------------------------------------------------------------------------
1 | setup
2 | 


--------------------------------------------------------------------------------
/COPYRIGHT.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2019-2020 dividiti
2 | 


--------------------------------------------------------------------------------
/program/zpp-worker-tensorrt-py/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-py/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/script/.cm/alias-a-explore-params:
--------------------------------------------------------------------------------
1 | 64da79981c7d1099
2 | 


--------------------------------------------------------------------------------
/script/.cm/alias-u-64da79981c7d1099:
--------------------------------------------------------------------------------
1 | explore-params
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-a-zpp-worker-tensorrt-py:
--------------------------------------------------------------------------------
1 | 0b248b2913eb548b
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-u-0b248b2913eb548b:
--------------------------------------------------------------------------------
1 | zpp-worker-tensorrt-py
2 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-py/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-a-object-detection-zpp-hub-py:
--------------------------------------------------------------------------------
1 | 1dc4528a1a53c218
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-u-1dc4528a1a53c218:
--------------------------------------------------------------------------------
1 | object-detection-zpp-hub-py
2 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-loadgen-py/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-loadgen-py/.cm/desc.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-a-image-classification-zpp-hub-py:
--------------------------------------------------------------------------------
1 | 6495587eb9150c0b
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-u-6495587eb9150c0b:
--------------------------------------------------------------------------------
1 | image-classification-zpp-hub-py
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-a-object-detection-zpp-hub-loadgen-py:
--------------------------------------------------------------------------------
1 | f497e983b6b2eaaf
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-u-f497e983b6b2eaaf:
--------------------------------------------------------------------------------
1 | object-detection-zpp-hub-loadgen-py
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-a-image-classification-zpp-hub-loadgen-py:
--------------------------------------------------------------------------------
1 | c4a80957e3ae1c8f
2 | 


--------------------------------------------------------------------------------
/program/.cm/alias-u-c4a80957e3ae1c8f:
--------------------------------------------------------------------------------
1 | image-classification-zpp-hub-loadgen-py
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CK workflows for experiments with ZeroMQ, LoadGen and TensorRT
2 | 
3 | ```bash
4 | $ ck pull repo --url=https://github.com/dividiti/ck-zeromq
5 | ```
6 | 


--------------------------------------------------------------------------------
/script/setup/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "c03619b7c49c5a52",
 3 |   "backup_module_uid": "84e27ad9dd12e734",
 4 |   "backup_module_uoa": "script",
 5 |   "control": {
 6 |     "engine": "CK",
 7 |     "iso_datetime": "2020-03-09T21:14:06.029449",
 8 |     "version": [
 9 |       "1",
10 |       "12",
11 |       "2"
12 |     ]
13 |   },
14 |   "data_name": "setup"
15 | }
16 | 


--------------------------------------------------------------------------------
/script/explore-params/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "64da79981c7d1099",
 3 |   "backup_module_uid": "84e27ad9dd12e734",
 4 |   "backup_module_uoa": "script",
 5 |   "control": {
 6 |     "engine": "CK",
 7 |     "iso_datetime": "2020-01-17T11:01:57.319371",
 8 |     "version": [
 9 |       "1",
10 |       "11",
11 |       "4"
12 |     ]
13 |   },
14 |   "data_name": "explore-params"
15 | }
16 | 


--------------------------------------------------------------------------------
/.ckr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "data_uoa": "ck-zeromq",
 3 |   "data_uid": "aa14f24b90ec9276",
 4 |   "data_alias": "ck-zeromq",
 5 |   "data_name": "ck-zeromq",
 6 |   "dict": {
 7 |     "shared": "git",
 8 |     "url": "git@github.com:dividiti/ck-zeromq.git",
 9 |     "repo_deps": [
10 |       {
11 |         "repo_uoa": "ck-tensorrt"
12 |       },
13 |       {
14 |         "repo_uoa": "ck-mlperf"
15 |       }
16 |     ]
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-py/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "1dc4528a1a53c218", 
 3 |   "backup_module_uid": "b0ac08fe1d3c2615", 
 4 |   "backup_module_uoa": "program", 
 5 |   "control": {
 6 |     "engine": "CK", 
 7 |     "iso_datetime": "2020-03-16T11:53:09.534707", 
 8 |     "version": [
 9 |       "1", 
10 |       "12", 
11 |       "2"
12 |     ]
13 |   }, 
14 |   "data_name": "object-detection-zpp-hub-py"
15 | }
16 | 


--------------------------------------------------------------------------------
/program/zpp-worker-tensorrt-py/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "0b248b2913eb548b", 
 3 |   "backup_module_uid": "b0ac08fe1d3c2615", 
 4 |   "backup_module_uoa": "program", 
 5 |   "control": {
 6 |     "engine": "CK", 
 7 |     "iso_datetime": "2019-12-12T12:42:20.202044", 
 8 |     "version": [
 9 |       "1", 
10 |       "11", 
11 |       "4", 
12 |       "1"
13 |     ]
14 |   }, 
15 |   "data_name": "zpp-worker-tensorrt-py"
16 | }
17 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-loadgen-py/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "f497e983b6b2eaaf", 
 3 |   "backup_module_uid": "b0ac08fe1d3c2615", 
 4 |   "backup_module_uoa": "program", 
 5 |   "control": {
 6 |     "engine": "CK", 
 7 |     "iso_datetime": "2020-03-17T12:07:30.704594", 
 8 |     "version": [
 9 |       "1", 
10 |       "12", 
11 |       "2"
12 |     ]
13 |   }, 
14 |   "data_name": "object-detection-zpp-hub-loadgen-py"
15 | }
16 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-py/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "6495587eb9150c0b", 
 3 |   "backup_module_uid": "b0ac08fe1d3c2615", 
 4 |   "backup_module_uoa": "program", 
 5 |   "control": {
 6 |     "engine": "CK", 
 7 |     "iso_datetime": "2019-12-12T12:55:29.646618", 
 8 |     "version": [
 9 |       "1", 
10 |       "11", 
11 |       "4", 
12 |       "1"
13 |     ]
14 |   }, 
15 |   "data_name": "image-classification-zpp-hub-py"
16 | }
17 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-loadgen-py/.cm/info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "c4a80957e3ae1c8f", 
 3 |   "backup_module_uid": "b0ac08fe1d3c2615", 
 4 |   "backup_module_uoa": "program", 
 5 |   "control": {
 6 |     "engine": "CK", 
 7 |     "iso_datetime": "2019-12-27T16:40:01.027933", 
 8 |     "version": [
 9 |       "1", 
10 |       "11", 
11 |       "4", 
12 |       "1"
13 |     ]
14 |   }, 
15 |   "data_name": "image-classification-zpp-hub-loadgen-py"
16 | }
17 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019-2020 dividiti <info@dividiti.com>
 2 | All rights reserved
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 |     1. Redistributions of source code must retain the above copyright notice,
 8 |        this list of conditions and the following disclaimer.
 9 | 
10 |     2. Redistributions in binary form must reproduce the above copyright
11 |        notice, this list of conditions and the following disclaimer in the
12 |        documentation and/or other materials provided with the distribution.
13 | 
14 |     3. Neither the name of the cTuning foundation
15 |        nor the names of its contributors may be used to endorse
16 |        or promote products derived from this software without
17 |        specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/script/setup/setup.ssd-mobilenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | function exit_if_error() {
 5 |     if [ "${?}" != "0" ]; then exit 1; fi
 6 | }
 7 | 
 8 | 
 9 | SSD_MODEL_NAME="SSD-MobileNet"
10 | SSD_MODEL_TAGS="ssd-mobilenet"
11 | SSD_MODEL_SIDE=300
12 | 
13 | 
14 | # Refresh CK-TensorRT and its dependencies.
15 | echo "Refreshing CK-TensorRT ..."
16 | ck pull repo:ck-tensorrt
17 | exit_if_error
18 | 
19 | 
20 | echo
21 | 
22 | 
23 | echo "Setting up ${SSD_MODEL_NAME} and COCO ..."
24 | 
25 | # Skip SSD model setup: should be NO for hub; should be NO for worker.
26 | skip_ssd_setup=${CK_SKIP_SSD_SETUP:-"NO"}
27 | echo "- skip ${SSD_MODEL_NAME} setup (download): ${skip_ssd_setup}"
28 | 
29 | # Skip COCO setup: should be NO for hub; should be YES or NO for worker.
30 | skip_coco_setup=${CK_SKIP_COCO_SETUP:-"NO"}
31 | echo "- skip COCO setup (download and preprocessing): ${skip_coco_setup}"
32 | 
33 | 
34 | echo
35 | 
36 | 
37 | if [ "${skip_ssd_setup}" == "NO" ]; then
38 |   # Install SSD model generated from NVIDIA's MLPerf Inference v0.5 submission.
39 |   # TODO: Xavier only at the moment.
40 |   ck install package --tags=model,tensorrt,downloaded,${SSD_MODEL_TAGS}
41 |   exit_if_error
42 | fi
43 | 
44 | 
45 | if [ "${skip_coco_setup}" == "NO" ]; then
46 |   # Detect OpenCV in its location in JetPack 4.3.
47 |   # TODO: Only works on Jetson machines at the moment.
48 |   ck detect soft --tags=python-package,cv2 --cus.version=JetPack \
49 |     --full_path=/usr/lib/python3.6/dist-packages/cv2/__init__.py
50 |   exit_if_error
51 | 
52 |   # Remove training annotations (~765 MB), leaving only 5,000 images (~788 MB) and
53 |   # validation annotations (~52 MB).
54 |   ck virtual env \
55 |     --tags=object-detection,dataset,coco.2017,val,original,full \
56 |     --shell_cmd='rm -f $CK_ENV_DATASET_COCO_LABELS_DIR/*train2017.json'
57 | 
58 |   # Preprocess for SSD-MobileNet (300x300 input images, 264 KB each, 1.3 GB in total).
59 |   ck install package --tags=dataset,preprocessed,using-opencv,coco.2017,full,side.${SSD_MODEL_SIDE}
60 |   exit_if_error
61 | fi
62 | 
63 | 
64 | echo
65 | 
66 | 
67 | echo "Done."
68 | 


--------------------------------------------------------------------------------
/script/setup/setup.ssd-resnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | function exit_if_error() {
 5 |     if [ "${?}" != "0" ]; then exit 1; fi
 6 | }
 7 | 
 8 | 
 9 | SSD_MODEL_NAME="SSD-ResNet"
10 | SSD_MODEL_TAGS="ssd-resnet"
11 | SSD_MODEL_SIDE=1200
12 | 
13 | 
14 | # Refresh CK-TensorRT and its dependencies.
15 | echo "Refreshing CK-TensorRT ..."
16 | ck pull repo:ck-tensorrt
17 | exit_if_error
18 | 
19 | 
20 | echo
21 | 
22 | 
23 | echo "Setting up ${SSD_MODEL_NAME} and COCO ..."
24 | 
25 | # Skip SSD model setup: should be NO for hub; should be NO for worker.
26 | skip_ssd_setup=${CK_SKIP_SSD_SETUP:-"NO"}
27 | echo "- skip ${SSD_MODEL_NAME} setup (download): ${skip_ssd_setup}"
28 | 
29 | # Skip COCO setup: should be NO for hub; should be YES or NO for worker.
30 | skip_coco_setup=${CK_SKIP_COCO_SETUP:-"NO"}
31 | echo "- skip COCO setup (download and preprocessing): ${skip_coco_setup}"
32 | 
33 | 
34 | echo
35 | 
36 | 
37 | if [ "${skip_ssd_setup}" == "NO" ]; then
38 |   # Install SSD models generated from NVIDIA's MLPerf Inference v0.5 submission.
39 |   # TODO: Xavier only at the moment.
40 |   ck install package --tags=model,tensorrt,downloaded,${SSD_MODEL_TAGS}
41 |   ck install package --tags=model,tensorrt,downloaded,${SSD_MODEL_TAGS}.singlestream
42 |   exit_if_error
43 | fi
44 | 
45 | 
46 | if [ "${skip_coco_setup}" == "NO" ]; then
47 |   # Detect OpenCV in its location in JetPack 4.3.
48 |   # TODO: Only works on Jetson machines at the moment.
49 |   ck detect soft --tags=python-package,cv2 --cus.version=JetPack \
50 |     --full_path=/usr/lib/python3.6/dist-packages/cv2/__init__.py
51 |   exit_if_error
52 | 
53 |   # Remove training annotations (~765 MB), leaving only 5,000 images (~788 MB) and
54 |   # validation annotations (~52 MB).
55 |   ck virtual env \
56 |     --tags=object-detection,dataset,coco.2017,val,original,full \
57 |     --shell_cmd='rm -f $CK_ENV_DATASET_COCO_LABELS_DIR/*train2017.json'
58 | 
59 |   # Preprocess for SSD-ResNet (1200x1200 input images, 4.2 MB each, 21 GB in total).
60 |   ck install package --tags=dataset,preprocessed,using-opencv,coco.2017,full,side.${SSD_MODEL_SIDE}
61 |   exit_if_error
62 | fi
63 | 
64 | 
65 | echo
66 | 
67 | 
68 | echo "Done."
69 | 


--------------------------------------------------------------------------------
/program/zpp-worker-tensorrt-py/.cm/meta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "0b248b2913eb548b", 
 3 |   "pass_env_to_resolve": "yes",
 4 |   "build_compiler_vars": {}, 
 5 |   "data_name": "zpp-worker-tensorrt-py", 
 6 |   "main_language": "python", 
 7 |   "no_compile": "yes", 
 8 |   "no_target_file": "yes", 
 9 |   "process_in_tmp": "yes", 
10 |   "program": "yes", 
11 |   "run_cmds": {
12 |     "default": {
13 |       "ignore_return_code": "no", 
14 |       "run_time": {
15 |         "run_cmd_main": "$<<CK_ENV_COMPILER_PYTHON_FILE>>$ ../zpp_worker_trt.py"
16 |       }
17 |     }
18 |   }, 
19 |   "run_deps": {
20 |     "lib-python-numpy": {
21 |       "local": "yes", 
22 |       "name": "Python NumPy library", 
23 |       "sort": 10, 
24 |       "tags": "lib,python-package,numpy"
25 |     }, 
26 |     "lib-python-pycuda": {
27 |       "local": "yes", 
28 |       "name": "Python PyCUDA library", 
29 |       "sort": 20, 
30 |       "tags": "lib,python-package,pycuda"
31 |     }, 
32 |     "lib-python-tensorrt": {
33 |       "local": "yes", 
34 |       "name": "Python TensorRT library", 
35 |       "sort": 30, 
36 |       "tags": "lib,python-package,tensorrt"
37 |     }, 
38 |     "lib-python-zmq": {
39 |       "local": "yes", 
40 |       "name": "Python ZeroMQ library", 
41 |       "sort": 40, 
42 |       "tags": "lib,python-package,zmq"
43 |     }, 
44 |     "plugin-nms": {
45 |       "enable_if_env": [ { "CK_WORKER_OUTPUT_FORMAT": ["direct_return"] } ],
46 |       "local": "yes",
47 |       "name": "TensorRT NMS plugin",
48 |       "sort": 60,
49 |       "tags": "tensorrt,plugin,nms"
50 |     },
51 |     "weights": {
52 |       "local": "yes", 
53 |       "name": "TensorRT model", 
54 |       "sort": 50, 
55 |       "tags": "tensorrt,model"
56 |     }
57 |   }, 
58 |   "run_vars": {
59 |     "CK_WORKER_ID": "",
60 |     "CK_WORKER_JOB_LIMIT": 0,
61 |     "CK_WORKER_OUTPUT_FORMAT": "softmax",
62 |     "CK_WORKER_POSTWORK_TIMEOUT_S": "",
63 |     "CK_HUB_IP": "localhost",
64 |     "CK_ZMQ_FAN_PORT": 5557,
65 |     "CK_ZMQ_FUNNEL_PORT": 5558,
66 |     "CK_TRANSFER_MODE": "numpy",
67 |     "CK_TRANSFER_FLOAT": "YES",
68 |     "CK_PREPROCESS_ON_GPU": "NO"
69 |   }, 
70 |   "tags": [
71 |     "image-classification", 
72 |     "zeromq", 
73 |     "zmq",
74 |     "worker",
75 |     "lang-python"
76 |   ]
77 | }
78 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-py/.cm/meta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "6495587eb9150c0b", 
 3 |   "build_compiler_vars": {}, 
 4 |   "data_name": "image-classification-zpp-hub-py", 
 5 |   "main_language": "python", 
 6 |   "no_compile": "yes", 
 7 |   "no_target_file": "yes", 
 8 |   "process_in_tmp": "yes", 
 9 |   "program": "yes", 
10 |   "run_cmds": {
11 |     "default": {
12 |       "ignore_return_code": "no", 
13 |       "run_time": {
14 |         "fine_grain_timer_file": "tmp-ck-timer.json", 
15 |         "post_process_via_ck": {
16 |           "data_uoa": "b98ee24399ef4c3a", 
17 |           "module_uoa": "script", 
18 |           "script_name": "postprocess"
19 |         }, 
20 |         "run_cmd_main": "$<<CK_ENV_COMPILER_PYTHON_FILE>>$ ../zpp_hub_classify.py"
21 |       }
22 |     }
23 |   }, 
24 |   "run_deps": {
25 |     "imagenet-aux": {
26 |       "force_target_as_host": "yes", 
27 |       "local": "yes", 
28 |       "name": "ImageNet dataset (aux)", 
29 |       "sort": 10, 
30 |       "tags": "dataset,imagenet,aux"
31 |     }, 
32 |     "images": {
33 |       "force_target_as_host": "yes", 
34 |       "local": "yes", 
35 |       "name": "ImageNet dataset (preprocessed subset)", 
36 |       "sort": 20, 
37 |       "tags": "dataset,imagenet,preprocessed"
38 |     }, 
39 |     "imagenet-helper": {
40 |       "local": "yes",
41 |       "name": "Python ImageNet helper functions and metadata",
42 |       "sort": 35,
43 |       "tags": "lib,python-package,imagenet-helper"
44 |     },
45 |     "lib-python-numpy": {
46 |       "local": "yes", 
47 |       "name": "Python NumPy library", 
48 |       "sort": 30, 
49 |       "tags": "lib,python-package,numpy"
50 |     }, 
51 |     "lib-python-zeromq": {
52 |       "local": "yes", 
53 |       "name": "Python ZeroMQ library", 
54 |       "sort": 40, 
55 |       "tags": "lib,python-package,zmq"
56 |     }, 
57 |     "weights": {
58 |       "local": "yes", 
59 |       "name": "TensorRT model", 
60 |       "sort": 50, 
61 |       "tags": "tensorrt,model,image-classification"
62 |     }
63 |   }, 
64 |   "run_vars": {
65 |     "CK_BATCH_COUNT": 1, 
66 |     "CK_BATCH_SIZE": 1, 
67 |     "CK_RESULTS_DIR": "predictions", 
68 |     "CK_SILENT_MODE": 0,
69 |     "CK_SKIP_IMAGES": 0,
70 |     "CK_SLEEP_AFTER_SEND_MS": 0,
71 |     "CK_ZMQ_FAN_PORT": 5557,
72 |     "CK_ZMQ_FUNNEL_PORT": 5558,
73 |     "CK_TRANSFER_MODE": "numpy",
74 |     "CK_TRANSFER_FLOAT": "YES",
75 |     "CK_PREPROCESS_ON_GPU": "NO"
76 |   }, 
77 |   "tags": [
78 |     "image-classification", 
79 |     "zeromq", 
80 |     "zmq",
81 |     "hub",
82 |     "lang-python"
83 |   ]
84 | }
85 | 


--------------------------------------------------------------------------------
/script/setup/setup.common.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | function exit_if_error() {
 5 |     if [ "${?}" != "0" ]; then exit 1; fi
 6 | }
 7 | 
 8 | 
 9 | echo "Setting up CK-ZeroMQ ..."
10 | 
11 | # Skip Python setup: should be NO for hub; should be NO for worker.
12 | skip_python_setup=${CK_SKIP_PYTHON_SETUP:-"NO"}
13 | echo "- skip Python setup: ${skip_python_setup}"
14 | 
15 | # Skip NVIDIA setup: can be YES or NO for hub; should be NO for worker.
16 | skip_nvidia_setup=${CK_SKIP_NVIDIA_SETUP:-"NO"}
17 | echo "- skip NVIDIA setup: ${skip_nvidia_setup}"
18 | 
19 | # Skip LoadGen setup: should be NO for hub; can be YES or NO for worker.
20 | skip_loadgen_setup=${CK_SKIP_LOADGEN_SETUP:-"NO"}
21 | echo "- skip LoadGen setup: ${skip_loadgen_setup}"
22 | 
23 | 
24 | echo
25 | 
26 | 
27 | # Refresh CK-ZeroMQ and its dependencies.
28 | echo "Refreshing CK-ZeroMQ ..."
29 | ck pull repo:ck-zeromq --url=https://github.com/dividiti/ck-zeromq
30 | exit_if_error
31 | 
32 | 
33 | echo
34 | 
35 | 
36 | if [ "${skip_python_setup}" == "NO" ]; then
37 |   # Set up Python, NumPy, PyZMQ.
38 |   echo "Setting up Python 3 and essential packages ..."
39 |   ck detect soft:compiler.python --full_path=`which python3`
40 |   exit_if_error
41 | 
42 |   ck install package --tags=python-package,cython
43 |   exit_if_error
44 | 
45 |   # NB: Building NumPy 1.18.1 requires Cython >= 0.29.14.
46 |   ck virtual env --tags=cython --shell_cmd='ck install package --tags=python-package,numpy'
47 |   exit_if_error
48 | 
49 |   ck install package --tags=python-package,zmq
50 |   exit_if_error
51 | fi
52 | 
53 | 
54 | if [ "${skip_nvidia_setup}" == "NO" ]; then
55 |   # Detect TensorRT and PyTensorRT.
56 |   echo "Setting up TensorRT/PyTensorRT ..."
57 | 
58 |   ck detect soft:lib.tensorrt --full_path=/usr/lib/aarch64-linux-gnu/libnvinfer.so
59 |   exit_if_error
60 | 
61 |   ck detect soft:lib.python.tensorrt --full_path=/usr/lib/python3.6/dist-packages/tensorrt/__init__.py
62 |   exit_if_error
63 | 
64 |   # Detect GCC/CUDA and install PyCUDA.
65 |   echo "Setting up CUDA/PyCUDA ..."
66 | 
67 |   ck detect soft:compiler.gcc --full_path=`which gcc-7`
68 |   exit_if_error
69 | 
70 |   ck detect soft:compiler.cuda --full_path=/usr/local/cuda-10.0/bin/nvcc
71 |   exit_if_error
72 | 
73 |   ck install package --tags=python-package,pycuda
74 |   exit_if_error
75 | fi
76 | 
77 | 
78 | if [ "${skip_loadgen_setup}" == "NO" ]; then
79 |   # Install MLPerf Inference packages.
80 |   echo "Setting up MLPerf Inference packages ..."
81 | 
82 |   ck install package --tags=mlperf,inference,source
83 |   exit_if_error
84 | 
85 |   ck install package --tags=python-package,absl
86 |   exit_if_error
87 | 
88 |   ck install package --tags=mlperf,loadgen,python-package
89 |   exit_if_error
90 | fi
91 | 
92 | 
93 | echo
94 | 
95 | 
96 | echo "Done."
97 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-py/.cm/meta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backup_data_uid": "1dc4528a1a53c218", 
 3 |   "build_compiler_vars": {}, 
 4 |   "data_name": "object-detection-zpp-hub-py", 
 5 |   "main_language": "python", 
 6 |   "no_compile": "yes", 
 7 |   "no_target_file": "yes", 
 8 |   "process_in_tmp": "yes", 
 9 |   "program": "yes", 
10 |   "run_cmds": {
11 |     "default": {
12 |       "ignore_return_code": "no", 
13 |       "run_time": {
14 |         "fine_grain_timer_file": "tmp-ck-timer.json", 
15 |         "post_process_via_ck": {
16 |           "data_uoa": "24c98b0cee248d93", 
17 |           "module_uoa": "script", 
18 |           "script_name": "iniless_postprocess"
19 |         }, 
20 |         "run_cmd_main": "$<<CK_ENV_COMPILER_PYTHON_FILE>>$ ../zpp_hub_detect.py"
21 |       }
22 |     }
23 |   }, 
24 |   "run_deps": {
25 |     "tool-coco": {
26 |       "local": "yes",
27 |       "name": "Python API for COCO",
28 |       "sort": 60,
29 |       "tags": "tool,coco"
30 |     },
31 |     "dataset": {
32 |       "force_target_as_host": "yes",
33 |       "local": "yes",
34 |       "name": "Preprocessed subset of COCO dataset",
35 |       "sort": 20,
36 |       "tags": "dataset,preprocessed,object-detection"
37 |     },
38 |     "coco-helper": {
39 |       "local": "yes",
40 |       "name": "Python COCO helper functions and metadata",
41 |       "sort": 35,
42 |       "tags": "lib,python-package,coco-helper"
43 |     },
44 |     "lib-python-matplotlib": {
45 |       "local": "yes",
46 |       "name": "Python Matplotlib library",
47 |       "sort": 40,
48 |       "tags": "lib,python-package,matplotlib"
49 |     },
50 |     "lib-python-numpy": {
51 |       "local": "yes", 
52 |       "name": "Python NumPy library", 
53 |       "sort": 30, 
54 |       "tags": "lib,python-package,numpy"
55 |     }, 
56 |     "lib-python-zeromq": {
57 |       "local": "yes", 
58 |       "name": "Python ZeroMQ library", 
59 |       "sort": 10, 
60 |       "tags": "lib,python-package,zmq"
61 |     }, 
62 |     "weights": {
63 |       "local": "yes", 
64 |       "name": "TensorRT model", 
65 |       "sort": 50, 
66 |       "tags": "tensorrt,model,object-detection"
67 |     }
68 |   }, 
69 |   "run_vars": {
70 |     "CK_ANNOTATIONS_OUT_DIR": "annotations",
71 |     "CK_DETECTIONS_OUT_DIR": "detections",
72 |     "CK_DETECTION_THRESHOLD": 0.0,
73 |     "CK_PREPROCESSED_OUT_DIR": "preprocessed",
74 |     "CK_RESULTS_OUT_DIR": "results",
75 |     "CK_BATCH_COUNT": 1,
76 |     "CK_BATCH_SIZE": 1,
77 |     "CK_SILENT_MODE": 0,
78 |     "CK_SKIP_IMAGES": 0,
79 |     "CK_TIMER_FILE": "tmp-ck-timer.json",
80 | 
81 |     "CK_PREPROCESS_ON_GPU": "NO", 
82 |     "CK_SLEEP_AFTER_SEND_MS": 0, 
83 |     "CK_TRANSFER_FLOAT": "YES", 
84 |     "CK_TRANSFER_MODE": "numpy", 
85 |     "CK_ZMQ_FAN_PORT": 5557, 
86 |     "CK_ZMQ_FUNNEL_PORT": 5558
87 |   }, 
88 |   "tags": [
89 |     "object-detection",
90 |     "zeromq", 
91 |     "zmq", 
92 |     "hub", 
93 |     "lang-python"
94 |   ]
95 | }
96 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-loadgen-py/user.conf:
--------------------------------------------------------------------------------
 1 | # The format of this config file is 'key = value'.
 2 | # The key has the format 'model.scenario.key'. Value is mostly int64_t.
 3 | # Model maybe '*' as wildcard. In that case the value applies to all models.
 4 | # All times are in milli seconds
 5 | 
 6 | *.SingleStream.qsl_rng_seed = 3133965575612453542
 7 | *.SingleStream.sample_index_rng_seed = 665484352860916858
 8 | *.SingleStream.schedule_rng_seed = 3622009729038561421
 9 | *.SingleStream.target_latency = 5
10 | *.SingleStream.target_latency_percentile = 90
11 | *.SingleStream.min_duration = 60000
12 | *.SingleStream.min_query_count = 1024
13 | mobilenet.SingleStream.performance_sample_count_override = 1024
14 | resnet50.SingleStream.performance_sample_count_override = 1024
15 | ssd-mobilenet.SingleStream.performance_sample_count_override = 256
16 | ssd-resnet34.SingleStream.performance_sample_count_override = 64
17 | gnmt.SingleStream.performance_sample_count_override = 3903900
18 | 
19 | *.MultiStream.qsl_rng_seed = 3133965575612453542
20 | *.MultiStream.sample_index_rng_seed = 665484352860916858
21 | *.MultiStream.schedule_rng_seed = 3622009729038561421
22 | *.MultiStream.target_qps = 20
23 | *.MultiStream.target_latency_percentile = 99
24 | *.MultiStream.samples_per_query = 4
25 | *.MultiStream.max_async_queries = 1
26 | *.MultiStream.target_latency = 50
27 | *.MultiStream.min_duration = 60000
28 | *.MultiStream.min_query_count = 270336
29 | ssd-resnet34.MultiStream.target_qps = 15
30 | ssd-resnet34.MultiStream.target_latency = 66
31 | gnmt.MultiStream.min_query_count = 90112
32 | gnmt.MultiStream.target_latency = 100
33 | gnmt.MultiStream.target_qps = 10
34 | gnmt.MultiStream.target_latency_percentile = 97
35 | mobilenet.MultiStream.performance_sample_count_override = 1024
36 | resnet50.MultiStream.performance_sample_count_override = 1024
37 | ssd-mobilenet.MultiStream.performance_sample_count_override = 256
38 | ssd-resnet34.MultiStream.performance_sample_count_override = 64
39 | gnmt.MultiStream.performance_sample_count_override = 3903900
40 | 
41 | *.Server.qsl_rng_seed = 3133965575612453542
42 | *.Server.sample_index_rng_seed = 665484352860916858
43 | *.Server.schedule_rng_seed = 3622009729038561421
44 | *.Server.target_qps = 1.0
45 | *.Server.target_latency = 10
46 | *.Server.target_latency_percentile = 99
47 | *.Server.target_duration = 0
48 | *.Server.min_duration = 60000
49 | *.Server.min_query_count = 270336
50 | resnet50.Server.target_latency = 15
51 | ssd-resnet34.Server.target_latency = 100
52 | gnmt.Server.min_query_count = 90112
53 | gnmt.Server.target_latency = 250
54 | gnmt.Server.target_latency_percentile = 97
55 | mobilenet.Server.performance_sample_count_override = 1024
56 | resnet50.Server.performance_sample_count_override = 1024
57 | ssd-mobilenet.Server.performance_sample_count_override = 256
58 | ssd-resnet34.Server.performance_sample_count_override = 64
59 | gnmt.Server.performance_sample_count_override = 3903900
60 | 
61 | *.Offline.qsl_rng_seed = 3133965575612453542
62 | *.Offline.sample_index_rng_seed = 665484352860916858
63 | *.Offline.schedule_rng_seed = 3622009729038561421
64 | *.Offline.target_qps = 70
65 | *.Offline.target_latency_percentile = 90
66 | *.Offline.min_duration = 60000
67 | *.Offline.min_query_count = 1
68 | mobilenet.Offline.performance_sample_count_override = 1024
69 | resnet50.Offline.performance_sample_count_override = 1024
70 | ssd-mobilenet.Offline.performance_sample_count_override = 256
71 | ssd-resnet34.Offline.performance_sample_count_override = 64
72 | gnmt.Offline.performance_sample_count_override = 3903900
73 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-loadgen-py/user.conf:
--------------------------------------------------------------------------------
 1 | # The format of this config file is 'key = value'.
 2 | # The key has the format 'model.scenario.key'. Value is mostly int64_t.
 3 | # Model maybe '*' as wildcard. In that case the value applies to all models.
 4 | # All times are in milli seconds
 5 | 
 6 | *.SingleStream.qsl_rng_seed = 3133965575612453542
 7 | *.SingleStream.sample_index_rng_seed = 665484352860916858
 8 | *.SingleStream.schedule_rng_seed = 3622009729038561421
 9 | *.SingleStream.target_latency = 5
10 | *.SingleStream.target_latency_percentile = 90
11 | *.SingleStream.min_duration = 60000
12 | *.SingleStream.min_query_count = 1024
13 | mobilenet.SingleStream.performance_sample_count_override = 1024
14 | resnet50.SingleStream.performance_sample_count_override = 1024
15 | ssd-mobilenet.SingleStream.performance_sample_count_override = 256
16 | ssd-resnet34.SingleStream.performance_sample_count_override = 64
17 | gnmt.SingleStream.performance_sample_count_override = 3903900
18 | 
19 | *.MultiStream.qsl_rng_seed = 3133965575612453542
20 | *.MultiStream.sample_index_rng_seed = 665484352860916858
21 | *.MultiStream.schedule_rng_seed = 3622009729038561421
22 | *.MultiStream.target_qps = 20
23 | *.MultiStream.target_latency_percentile = 99
24 | *.MultiStream.samples_per_query = 4
25 | *.MultiStream.max_async_queries = 1
26 | *.MultiStream.target_latency = 50
27 | *.MultiStream.min_duration = 60000
28 | *.MultiStream.min_query_count = 270336
29 | ssd-resnet34.MultiStream.target_qps = 15
30 | ssd-resnet34.MultiStream.target_latency = 66
31 | gnmt.MultiStream.min_query_count = 90112
32 | gnmt.MultiStream.target_latency = 100
33 | gnmt.MultiStream.target_qps = 10
34 | gnmt.MultiStream.target_latency_percentile = 97
35 | mobilenet.MultiStream.performance_sample_count_override = 1024
36 | resnet50.MultiStream.performance_sample_count_override = 1024
37 | ssd-mobilenet.MultiStream.performance_sample_count_override = 256
38 | ssd-resnet34.MultiStream.performance_sample_count_override = 64
39 | gnmt.MultiStream.performance_sample_count_override = 3903900
40 | 
41 | *.Server.qsl_rng_seed = 3133965575612453542
42 | *.Server.sample_index_rng_seed = 665484352860916858
43 | *.Server.schedule_rng_seed = 3622009729038561421
44 | *.Server.target_qps = 1.0
45 | *.Server.target_latency = 10
46 | *.Server.target_latency_percentile = 99
47 | *.Server.target_duration = 0
48 | *.Server.min_duration = 60000
49 | *.Server.min_query_count = 270336
50 | resnet50.Server.target_latency = 15
51 | ssd-resnet34.Server.target_latency = 100
52 | gnmt.Server.min_query_count = 90112
53 | gnmt.Server.target_latency = 250
54 | gnmt.Server.target_latency_percentile = 97
55 | mobilenet.Server.performance_sample_count_override = 1024
56 | resnet50.Server.performance_sample_count_override = 1024
57 | ssd-mobilenet.Server.performance_sample_count_override = 256
58 | ssd-resnet34.Server.performance_sample_count_override = 64
59 | gnmt.Server.performance_sample_count_override = 3903900
60 | 
61 | *.Offline.qsl_rng_seed = 3133965575612453542
62 | *.Offline.sample_index_rng_seed = 665484352860916858
63 | *.Offline.schedule_rng_seed = 3622009729038561421
64 | *.Offline.target_qps = 70
65 | *.Offline.target_latency_percentile = 90
66 | *.Offline.min_duration = 60000
67 | *.Offline.min_query_count = 1
68 | mobilenet.Offline.performance_sample_count_override = 1024
69 | resnet50.Offline.performance_sample_count_override = 1024
70 | ssd-mobilenet.Offline.performance_sample_count_override = 256
71 | ssd-resnet34.Offline.performance_sample_count_override = 64
72 | gnmt.Offline.performance_sample_count_override = 3903900
73 | 


--------------------------------------------------------------------------------
/script/setup/setup.resnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | function exit_if_error() {
 5 |     if [ "${?}" != "0" ]; then exit 1; fi
 6 | }
 7 | 
 8 | 
 9 | # Refresh CK-MLPerf and its dependencies.
10 | echo "Refreshing CK-MLPerf ..."
11 | ck pull repo:ck-mlperf
12 | exit_if_error
13 | 
14 | 
15 | echo
16 | 
17 | 
18 | echo "Setting up ResNet and ImageNet ..."
19 | 
20 | # Skip ResNet setup: can be YES or NO for hub; should be NO for worker.
21 | skip_resnet_setup=${CK_SKIP_RESNET_SETUP:-"NO"}
22 | echo "- skip ResNet setup: ${skip_resnet_setup}"
23 | 
24 | # Fake ResNet detection: can be NO or YES for hub; should be NO for worker.
25 | fake_resnet_detection=${CK_FAKE_RESNET_DETECTION:-"NO"}
26 | ck_tools=${CK_TOOLS:-"$HOME/CK-TOOLS"}
27 | echo "- fake ResNet detection: ${fake_resnet_detection} (CK_TOOLS=${ck_tools})"
28 | 
29 | if [ "${skip_resnet_setup}" == "NO" ] && [ "${fake_resnet_detection}" != "NO" ]; then
30 |   echo "ERROR: You cannot set up ResNet and fake ResNet detection at the same time!"
31 |   exit 1
32 | fi
33 | 
34 | if [ "${skip_resnet_setup}" != "NO" ] && [ "${fake_resnet_detection}" == "NO" ]; then
35 |   echo "ERROR: You cannot skip ResNet setup and not to fake ResNet detection at the same time!"
36 |   exit 1
37 | fi
38 | 
39 | # Skip ImageNet detection: should be NO for hub; should be YES for worker.
40 | skip_imagenet_detection=${CK_SKIP_IMAGENET_DETECTION:-"NO"}
41 | echo "- skip ImageNet detection: ${skip_imagenet_detection}"
42 | 
43 | 
44 | echo
45 | 
46 | 
47 | if [ "${skip_resnet_setup}" == "NO" ]; then
48 |   # Install the official MLPerf ONNX model and convert it to TensorRT with predefined options.
49 |   ck install package --tags=model,onnx,resnet,downloaded
50 |   ck install package --tags=model,tensorrt,resnet,converted-from-onnx,maxbatch.20,fp16
51 |   # Install a quantized model converted for Xavier from NVIDIA's v0.5 submission.
52 |   ck install package --tags=model,tensorrt,resnet,downloaded,int8,linear,for.xavier
53 |   exit_if_error
54 | fi
55 | 
56 | 
57 | if [ "${fake_resnet_detection}" != "NO" ]; then
58 |   # Detect fake ResNet model.
59 |   model_dir=${ck_tools}/model-tensorrt-converted-from-onnx-fp16-maxbatch.20-resnet
60 |   model_file=${model_dir}/converted_model.trt
61 |   mkdir -p ${model_dir}
62 |   touch ${model_file}
63 |   ck detect soft:model.tensorrt --cus.version=resnet50-fp16 \
64 |   --full_path=${model_file} \
65 |   --extra_tags=converted,converted-from-onnx,fp16,image-classification,maxbatch.20,model,resnet,tensorrt,trt \
66 |   --ienv.ML_MODEL_MAX_BATCH_SIZE=20 \
67 |   --ienv.ML_MODEL_DATA_TYPE=float16 \
68 |   --ienv.ML_MODEL_DATA_LAYOUT=NCHW \
69 |   --ienv.ML_MODEL_NORMALIZE_DATA=NO \
70 |   --ienv.ML_MODEL_SUBTRACT_MEAN=YES \
71 |   --ienv.ML_MODEL_GIVEN_CHANNEL_MEANS='123.68 116.78 103.94' \
72 |   --ienv.ML_MODEL_IMAGE_HEIGHT=224 \
73 |   --ienv.ML_MODEL_IMAGE_WIDTH=224
74 |   exit_if_error
75 | fi
76 | 
77 | 
78 | if [ "${skip_imagenet_detection}" == "NO" ]; then
79 |   # Detect a preprocessed ImageNet validation dataset (50,000 images).
80 |   echo "Detecting a preprocessed ImageNet validation set ..."
81 |   imagenet_dir=${CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR:-"/datasets/dataset-imagenet-preprocessed-using-opencv-crop.875-full-inter.linear-side.224/ILSVRC2012_val_00000001.rgb8"}
82 |   imagenet_tags=${CK_ENV_DATASET_IMAGENET_PREPROCESSED_TAGS:-"preprocessed,using-opencv,universal,crop.875,full,inter.linear,side.224"}
83 |   imagenet_version=${CK_ENV_DATASET_IMAGENET_PREPROCESSED_VERSION:-"using-opencv"}
84 |   ck detect soft:dataset.imagenet.preprocessed --full_path=${imagenet_dir} --extra_tags=${imagenet_tags} --cus.version=${imagenet_version}
85 | 
86 |   # Install ImageNet labels.
87 |   ck install package --tags=dataset,imagenet,aux
88 |   exit_if_error
89 | fi
90 | 
91 | 
92 | echo
93 | 
94 | 
95 | echo "Done."
96 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-loadgen-py/.cm/meta.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "backup_data_uid": "c4a80957e3ae1c8f", 
  3 |   "pass_env_to_resolve": "yes",
  4 |   "build_compiler_vars": {}, 
  5 |   "data_name": "image-classification-zpp-hub-loadgen-py", 
  6 |   "main_language": "python", 
  7 |   "no_compile": "yes", 
  8 |   "no_target_file": "yes", 
  9 |   "process_in_tmp": "yes", 
 10 |   "program": "yes", 
 11 |   "run_cmds": {
 12 |     "default": {
 13 |       "ignore_return_code": "no", 
 14 |       "run_time": {
 15 |         "fine_grain_timer_file": "tmp-ck-timer.json", 
 16 |         "post_process_via_ck": {
 17 |           "data_uoa": "b98ee24399ef4c3a", 
 18 |           "module_uoa": "script", 
 19 |           "script_name": "loadgen_postprocess"
 20 |         }, 
 21 |         "run_cmd_main": "$<<CK_ENV_COMPILER_PYTHON_FILE>>$ ../zpp_hub_classify_loadgen.py"
 22 |       }
 23 |     }
 24 |   }, 
 25 |   "run_deps": {
 26 |     "imagenet-aux": {
 27 |       "force_target_as_host": "yes", 
 28 |       "local": "yes", 
 29 |       "name": "ImageNet dataset (aux)", 
 30 |       "sort": 10, 
 31 |       "tags": "dataset,imagenet,aux"
 32 |     }, 
 33 |     "images": {
 34 |       "force_target_as_host": "yes", 
 35 |       "local": "yes", 
 36 |       "name": "ImageNet dataset (preprocessed subset)", 
 37 |       "sort": 20, 
 38 |       "tags": "dataset,imagenet,preprocessed"
 39 |     }, 
 40 |     "loadgen-config-file": {
 41 |       "enable_if_env": [
 42 |         { "CK_LOADGEN_USE_CONFIG_ENV": ["yes", "Yes", "YES", "on", "On", "ON", "true", "True", "TRUE", "1"] }
 43 |       ],
 44 |       "add_to_path": "no",
 45 |       "local": "yes",
 46 |       "name": "LoadGen Config file",
 47 |       "sort": 35,
 48 |       "tags": "loadgen,config"
 49 |     },
 50 |     "lib-python-loadgen": {
 51 |       "local": "yes", 
 52 |       "name": "Python LoadGen library", 
 53 |       "sort": 40, 
 54 |       "tags": "lib,python-package,loadgen"
 55 |     }, 
 56 |     "lib-python-numpy": {
 57 |       "local": "yes", 
 58 |       "name": "Python NumPy library", 
 59 |       "sort": 50, 
 60 |       "tags": "lib,python-package,numpy"
 61 |     }, 
 62 |     "lib-python-zeromq": {
 63 |       "local": "yes", 
 64 |       "name": "Python ZeroMQ library", 
 65 |       "sort": 60, 
 66 |       "tags": "lib,python-package,zmq"
 67 |     }, 
 68 |     "mlperf-inference-src": {
 69 |       "add_to_path": "no", 
 70 |       "force_target_as_host": "yes", 
 71 |       "local": "yes", 
 72 |       "name": "MLPerf Inference source", 
 73 |       "sort": 110, 
 74 |       "tags": "mlperf,inference,source"
 75 |     }, 
 76 |     "python": {
 77 |       "force_target_as_host": "yes", 
 78 |       "local": "yes", 
 79 |       "name": "Python interpreter", 
 80 |       "sort": 100, 
 81 |       "tags": "compiler,lang-python"
 82 |     }, 
 83 |     "weights": {
 84 |       "local": "yes", 
 85 |       "name": "TensorRT model", 
 86 |       "sort": 30, 
 87 |       "tags": "tensorrt,model,image-classification"
 88 |     }
 89 |   }, 
 90 |   "run_vars": {
 91 |     "CK_BATCH_SIZE": 1, 
 92 |     "CK_LOADGEN_BUFFER_SIZE": 8, 
 93 |     "CK_LOADGEN_COUNT_OVERRIDE": "",
 94 |     "CK_LOADGEN_DATASET_SIZE": 20, 
 95 |     "CK_LOADGEN_MAX_DURATION_S": "",
 96 |     "CK_LOADGEN_MODE": "AccuracyOnly", 
 97 |     "CK_LOADGEN_MULTISTREAMNESS": "", 
 98 |     "CK_LOADGEN_SCENARIO": "SingleStream",
 99 |     "CK_LOADGEN_SIDELOAD_JSON": "non-mlperf_sideload.json",
100 |     "CK_LOADGEN_TARGET_QPS": "",
101 |     "CK_LOADGEN_USE_CONFIG_ENV": "no",
102 |     "CK_LOADGEN_WARMUP_SAMPLES": 0,
103 |     "CK_ZMQ_FAN_PORT": 5557,
104 |     "CK_ZMQ_FUNNEL_PORT": 5558,
105 |     "CK_TRANSFER_MODE": "numpy",
106 |     "CK_TRANSFER_FLOAT": "YES",
107 |     "CK_PREPROCESS_ON_GPU": "NO"
108 |   }, 
109 |   "tags": [
110 |     "image-classification", 
111 |     "zeromq",
112 |     "zmq",
113 |     "hub",
114 |     "loadgen", 
115 |     "lang-python"
116 |   ]
117 | }
118 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-loadgen-py/.cm/meta.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "backup_data_uid": "f497e983b6b2eaaf", 
  3 |   "build_compiler_vars": {}, 
  4 |   "data_name": "object-detection-zpp-hub-loadgen-py", 
  5 |   "main_language": "python", 
  6 |   "no_compile": "yes", 
  7 |   "no_target_file": "yes", 
  8 |   "pass_env_to_resolve": "yes", 
  9 |   "process_in_tmp": "yes", 
 10 |   "program": "yes", 
 11 |   "run_cmds": {
 12 |     "default": {
 13 |       "ignore_return_code": "no", 
 14 |       "run_time": {
 15 |         "fine_grain_timer_file": "tmp-ck-timer.json", 
 16 |         "post_process_via_ck": {
 17 |           "data_uoa": "24c98b0cee248d93", 
 18 |           "module_uoa": "script", 
 19 |           "script_name": "loadgen_postprocess"
 20 |         }, 
 21 |         "run_cmd_main": "$<<CK_ENV_COMPILER_PYTHON_FILE>>$ ../zpp_hub_detect_loadgen.py"
 22 |       }
 23 |     }
 24 |   }, 
 25 |   "run_deps": {
 26 |     "python": {
 27 |       "force_target_as_host": "yes", 
 28 |       "local": "yes", 
 29 |       "name": "Python interpreter", 
 30 |       "sort": 10, 
 31 |       "tags": "compiler,lang-python"
 32 |     }, 
 33 |     "lib-python-zeromq": {
 34 |       "local": "yes", 
 35 |       "name": "Python ZeroMQ library", 
 36 |       "sort": 20, 
 37 |       "tags": "lib,python-package,zmq"
 38 |     }, 
 39 | 
 40 |     "mlperf-inference-src": {
 41 |       "add_to_path": "no", 
 42 |       "force_target_as_host": "yes", 
 43 |       "local": "yes", 
 44 |       "name": "MLPerf Inference source", 
 45 |       "sort": 110, 
 46 |       "tags": "mlperf,inference,source"
 47 |     }, 
 48 |     "lib-python-loadgen": {
 49 |       "local": "yes", 
 50 |       "name": "Python LoadGen library", 
 51 |       "sort": 120, 
 52 |       "tags": "lib,python-package,loadgen"
 53 |     }, 
 54 |     "loadgen-config-file": {
 55 |       "add_to_path": "no", 
 56 |       "enable_if_env": [ { "CK_LOADGEN_USE_CONFIG_ENV": [ "yes", "Yes", "YES", "on", "On", "ON", "true", "True", "TRUE", "1" ] } ], 
 57 |       "local": "yes", 
 58 |       "name": "LoadGen Config file", 
 59 |       "sort": 130, 
 60 |       "tags": "loadgen,config"
 61 |     }, 
 62 |     "dataset": {
 63 |       "force_target_as_host": "yes",
 64 |       "local": "yes",
 65 |       "name": "Preprocessed subset of COCO dataset",
 66 |       "sort": 210,
 67 |       "tags": "dataset,preprocessed,object-detection,coco"
 68 |     },
 69 |     "weights": {
 70 |       "local": "yes", 
 71 |       "name": "TensorRT object detection model", 
 72 |       "sort": 220, 
 73 |       "tags": "tensorrt,model,object-detection"
 74 |     },
 75 |     "lib-python-numpy": {
 76 |       "local": "yes", 
 77 |       "name": "Python NumPy library", 
 78 |       "sort": 230, 
 79 |       "tags": "lib,python-package,numpy"
 80 |     }, 
 81 |     "lib-python-matplotlib": {
 82 |       "local": "yes",
 83 |       "name": "Python Matplotlib library",
 84 |       "sort": 240,
 85 |       "tags": "lib,python-package,matplotlib"
 86 |     },
 87 |     "tool-coco": {
 88 |       "local": "yes",
 89 |       "name": "Python API for COCO",
 90 |       "sort": 250,
 91 |       "tags": "tool,coco"
 92 |     }
 93 |   }, 
 94 |   "run_vars": {
 95 |     "CK_BATCH_SIZE": 1, 
 96 |     "CK_LOADGEN_BUFFER_SIZE": 8, 
 97 |     "CK_LOADGEN_COUNT_OVERRIDE": "", 
 98 |     "CK_LOADGEN_DATASET_SIZE": 20, 
 99 |     "CK_LOADGEN_MAX_DURATION_S": "", 
100 |     "CK_LOADGEN_MODE": "AccuracyOnly", 
101 |     "CK_LOADGEN_MULTISTREAMNESS": "", 
102 |     "CK_LOADGEN_SCENARIO": "SingleStream", 
103 |     "CK_LOADGEN_SIDELOAD_JSON": "non-mlperf_sideload.json", 
104 |     "CK_LOADGEN_TARGET_QPS": "", 
105 |     "CK_LOADGEN_USE_CONFIG_ENV": "no", 
106 |     "CK_LOADGEN_WARMUP_SAMPLES": 0, 
107 |     "CK_PREPROCESS_ON_GPU": "NO", 
108 |     "CK_TRANSFER_FLOAT": "YES", 
109 |     "CK_TRANSFER_MODE": "numpy", 
110 |     "CK_ZMQ_FAN_PORT": 5557, 
111 |     "CK_ZMQ_FUNNEL_PORT": 5558
112 |   }, 
113 |   "tags": [
114 |     "object-detection",
115 |     "zeromq", 
116 |     "zmq", 
117 |     "hub", 
118 |     "loadgen", 
119 |     "lang-python"
120 |   ]
121 | }
122 | 


--------------------------------------------------------------------------------
/script/explore-params/explore.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | echo "ZeroMQ Push-Pull exploration!"
  4 | 
  5 | # Dry run - print commands but do not execute them.
  6 | dry_run=${CK_DRY_RUN:-""}
  7 | echo "- dry run: ${dry_run}"
  8 | 
  9 | # Hub IP.
 10 | hub_ip=${CK_HUB_IP:-"localhost"}
 11 | echo "- hub IP: ${hub_ip}"
 12 | 
 13 | # Workers can be defined in two ways:
 14 | # (1) As a list of N IPs. Worker IDs get derived as a sequence from 1 to N.
 15 | # (2) As a list of N IDs. Worker IPs get derived as a sequence of 192.168.1.<ID+1>.
 16 | ips=( ${CK_WORKER_IPS:-} ) # use parentheses to interpret the string as an array
 17 | ids=( ${CK_WORKER_IDS:-} ) # use parentheses to interpret the string as an array
 18 | if [[ -z "${ips}" ]] && [[ -z ${ids} ]]
 19 | then
 20 |   # If neither is defined, send to itself.
 21 |   ips=( "${hub_ip}" )
 22 | fi
 23 | if [[ "${ips}" ]] # (1)
 24 | then
 25 |   num_ips=${#ips[@]}
 26 |   ids=( $(seq 1 ${num_ips}) )
 27 |   num_ids=${#ids[@]}
 28 | else # (2)
 29 |   ids=( ${CK_WORKER_IDS:-1} )
 30 |   num_ids=${#ids[@]}
 31 |   ips=( )
 32 |   for id in ${ids[@]}; do
 33 |     id_plus_1=$((id+1))
 34 |     ips+=( "192.168.1.10${id_plus_1}" )
 35 |   done
 36 |   num_ips=${#ips[@]}
 37 | fi
 38 | echo "- ${num_ips} worker IP(s): ${ips[@]}"
 39 | echo "- ${num_ids} worker ID(s): ${ids[@]}"
 40 | if [[ ${num_ips} != ${num_ids} ]]; then
 41 |   echo "ERROR: ${num_ips} not equal to ${num_ids}!"
 42 |   exit 1
 43 | fi
 44 | 
 45 | # Worker ssh ports (22 by default).
 46 | ports=( ${CK_WORKER_PORTS:-} ) # use parentheses to interpret the string as an array
 47 | if [[ -z "${ports}" ]]; then
 48 |   for id in ${ips[@]}; do
 49 |     ports+=( "22" )
 50 |   done
 51 | fi
 52 | num_ports=${#ports[@]}
 53 | echo "- ${num_ports} worker port(s): ${ports[@]}"
 54 | if [[ ${num_ports} != ${num_ips} ]]; then
 55 |   echo "ERROR: ${num_ports} not equal to ${num_ips}!"
 56 |   exit 1
 57 | fi
 58 | 
 59 | # Time each worker should wait after last received work-item before exiting.
 60 | postwork_timeout_s=${CK_WORKER_POSTWORK_TIMEOUT_S:-10}
 61 | echo "- postwork timeout: ${postwork_timeout_s} s"
 62 | 
 63 | # Directory where run.sh is (may not be the current one in the future).
 64 | script_dir=`ck find ck-zeromq:script:explore-params`
 65 | 
 66 | # LoadGen mode: PerformanceOnly, AccuracyOnly.
 67 | mode=${CK_LOADGEN_MODE:-PerformanceOnly}
 68 | if [ "${mode}" = "PerformanceOnly" ]; then
 69 |   mode_tag="performance"
 70 |   dataset_size=${CK_LOADGEN_DATASET_SIZE:-1024}
 71 |   buffer_size=${CK_LOADGEN_BUFFER_SIZE:-1024}
 72 | elif [ "${mode}" = "AccuracyOnly" ]; then
 73 |   mode_tag="accuracy"
 74 |   imagenet_size=50000
 75 |   dataset_size=${CK_LOADGEN_DATASET_SIZE:-${imagenet_size}}
 76 |   buffer_size=${CK_LOADGEN_BUFFER_SIZE:-500}
 77 | else
 78 |   echo "ERROR: Unsupported LoadGen mode '${mode}'!"
 79 |   exit 1
 80 | fi
 81 | echo "- mode: ${mode} (${mode_tag})"
 82 | echo "- dataset size: ${dataset_size}"
 83 | echo "- buffer size: ${buffer_size}"
 84 | 
 85 | # Define the exploration space.
 86 | if [ "${mode_tag}" = "accuracy" ]; then
 87 |   batch_sizes=(1)
 88 |   transfer_modes=("raw")
 89 |   transfer_floats=("YES" "NO")
 90 | else
 91 |   batch_sizes=($(seq 1 4))
 92 |   transfer_modes=("raw" "pickle" "numpy" "json")
 93 |   transfer_floats=("YES" "NO")
 94 | fi
 95 | echo "- batch sizes: [ ${batch_sizes[@]} ]"
 96 | echo "- transfer modes: [ ${transfer_modes[@]} ]"
 97 | echo "- transfer floats: [ ${transfer_floats[@]} ]"
 98 | 
 99 | # Blank line.
100 | echo
101 | 
102 | # Run once for each point.
103 | experiment_id=1
104 | for batch_size in "${batch_sizes[@]}"; do
105 |   for transfer_mode in "${transfer_modes[@]}"; do
106 |     for transfer_float in "${transfer_floats[@]}"; do
107 |       if [ "${transfer_float}" = "YES" ] || [ "${transfer_mode}" = "json" ] ; then
108 |         preprocess_on_gpu_list=("NO")
109 |       else
110 |         preprocess_on_gpu_list=("NO" "YES")
111 |       fi
112 |       for preprocess_on_gpu in "${preprocess_on_gpu_list[@]}"; do
113 |           echo "[`date`] Experiment #${experiment_id}: ..."
114 |           experiment_id=$(( ${experiment_id}+1 ))
115 |           read -d '' CMD <<END_OF_CMD
116 | cd ${script_dir};
117 | CK_DRY_RUN=${dry_run} \
118 | CK_LOADGEN_MODE=${mode} \
119 | CK_LOADGEN_DATASET_SIZE=${dataset_size} \
120 | CK_LOADGEN_BUFFER_SIZE=${buffer_size} \
121 | CK_HUB_IP="${hub_ip}" \
122 | CK_WORKER_IPS="${ips}" \
123 | CK_WORKER_PORTS="${ports}" \
124 | CK_WORKER_POSTWORK_TIMEOUT_S=${postwork_timeout_s} \
125 | CK_BATCH_SIZE=${batch_size} \
126 | CK_TRANSFER_MODE=${transfer_mode} \
127 | CK_TRANSFER_FLOAT=${transfer_float} \
128 | CK_PREPROCESS_ON_GPU=${preprocess_on_gpu} \
129 | ./run.sh
130 | END_OF_CMD
131 |           echo ${CMD}
132 |           if [ -z "${dry_run}" ]; then
133 |             eval ${CMD}
134 |           fi
135 |           echo
136 |       done # preprocess on gpu
137 |     done # transfer float
138 |   done # transfer mode
139 | done # batch size
140 | 
141 | if [ -z "${dry_run}" ]; then
142 |   echo "[`date`] Done."
143 | else
144 |   echo "[`date`] Done (dry run)."
145 | fi
146 | echo
147 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-py/zpp_hub_detect.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import json
  4 | import os
  5 | import shutil
  6 | import struct
  7 | import threading
  8 | import time
  9 | 
 10 | from coco_helper import (load_preprocessed_batch, image_filenames, original_w_h, class_labels,
 11 |     MODEL_DATA_LAYOUT, MODEL_COLOURS_BGR, MODEL_INPUT_DATA_TYPE, MODEL_DATA_TYPE, MODEL_USE_DLA, MODEL_MAX_BATCH_SIZE,
 12 |     IMAGE_DIR, IMAGE_LIST_FILE, MODEL_NORMALIZE_DATA, SUBTRACT_MEAN, GIVEN_CHANNEL_MEANS, BATCH_SIZE, BATCH_COUNT)
 13 | 
 14 | import numpy as np
 15 | import zmq
 16 | 
 17 | try:
 18 |     raw_input
 19 | except NameError:
 20 |     # Python 3
 21 |     raw_input = input
 22 | 
 23 | 
 24 | ## Post-detection filtering by confidence score:
 25 | #
 26 | SCORE_THRESHOLD = float(os.getenv('CK_DETECTION_THRESHOLD', 0.0))
 27 | 
 28 | 
 29 | ## Model properties:
 30 | #
 31 | MODEL_PATH              = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME']
 32 | MODEL_MAX_PREDICTIONS   = int(os.getenv('ML_MODEL_MAX_PREDICTIONS', 100))
 33 | MODEL_SKIPPED_CLASSES   = os.getenv("ML_MODEL_SKIPS_ORIGINAL_DATASET_CLASSES", None)
 34 | 
 35 | if (MODEL_SKIPPED_CLASSES):
 36 |     SKIPPED_CLASSES = [int(x) for x in MODEL_SKIPPED_CLASSES.split(",")]
 37 | else:
 38 |     SKIPPED_CLASSES = None
 39 | 
 40 | ## Transfer mode (numpy floats by default):
 41 | #
 42 | TRANSFER_MODE           = os.getenv('CK_TRANSFER_MODE', 'numpy')
 43 | TRANSFER_FLOAT          = (os.getenv('CK_TRANSFER_FLOAT', 'YES') in ('YES', 'yes', 'ON', 'on', '1')) and (MODEL_INPUT_DATA_TYPE == 'float32')
 44 | TRANSFER_TYPE_NP, TRANSFER_TYPE_SYMBOL = (np.float32, 'f') if TRANSFER_FLOAT else (np.int8, 'b')
 45 | 
 46 | SLEEP_AFTER_SEND_MS     = int(os.getenv('CK_SLEEP_AFTER_SEND_MS', 0))
 47 | 
 48 | ## ZMQ ports:
 49 | #
 50 | ZMQ_FAN_PORT            = os.getenv('CK_ZMQ_FAN_PORT', 5557)
 51 | ZMQ_FUNNEL_PORT         = os.getenv('CK_ZMQ_FUNNEL_PORT', 5558)
 52 | 
 53 | 
 54 | ## Writing the results out:
 55 | #
 56 | CUR_DIR = os.getcwd()
 57 | DETECTIONS_OUT_DIR      = os.path.join(CUR_DIR, os.environ['CK_DETECTIONS_OUT_DIR'])
 58 | ANNOTATIONS_OUT_DIR     = os.path.join(CUR_DIR, os.environ['CK_ANNOTATIONS_OUT_DIR'])
 59 | RESULTS_OUT_DIR         = os.path.join(CUR_DIR, os.environ['CK_RESULTS_OUT_DIR'])
 60 | FULL_REPORT             = os.getenv('CK_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0')
 61 | 
 62 | 
 63 | ## ZeroMQ communication setup:
 64 | #
 65 | zmq_context = zmq.Context()
 66 | 
 67 | to_workers = zmq_context.socket(zmq.PUSH)
 68 | to_workers.bind("tcp://*:{}".format(ZMQ_FAN_PORT))
 69 | 
 70 | from_workers = zmq_context.socket(zmq.PULL)
 71 | from_workers.bind("tcp://*:{}".format(ZMQ_FUNNEL_PORT))
 72 | 
 73 | 
 74 | ## (Shared) placeholders:
 75 | #
 76 | in_progress     = {}    # to be written to by one thread and read by another
 77 | output_dict     = {     # to be topped up by both threads
 78 |     'batch_size': BATCH_SIZE,
 79 |     'batch_count': BATCH_COUNT,
 80 |     'avg_inference_time_ms_by_worker_id': {},
 81 | }
 82 | output_dictionary = {   # object detection postprocessor prefers another level of nestedness
 83 |     'run_time_state': output_dict
 84 | }
 85 | 
 86 | 
 87 | def fan_code():
 88 | 
 89 |     print("Press Enter when the workers are ready: ")
 90 |     _ = raw_input()
 91 |     print("[fan] Submitting jobs...")
 92 | 
 93 |     fan_start = time.time()
 94 | 
 95 |     image_index = 0
 96 |     for batch_index in range(BATCH_COUNT):
 97 | 
 98 |         batch_first_index = image_index
 99 |         batch_data, image_index = load_preprocessed_batch(image_filenames, image_index)
100 | 
101 |         batch_vector_numpy  = batch_data.ravel()
102 | 
103 |         batch_ids   = list(range(batch_first_index, image_index))
104 |         job_id      = batch_index+1
105 | 
106 |         in_progress[job_id] = {
107 |             'submission_time':  time.time(),
108 |             'batch_ids':        batch_ids,
109 |         }
110 | 
111 |         if TRANSFER_MODE == 'dummy':
112 |             job_data_raw = struct.pack('<II', job_id, BATCH_SIZE)
113 |             to_workers.send(job_data_raw)
114 |         elif TRANSFER_MODE == 'numpy':
115 |             job_data_struct = {
116 |                 'job_id': job_id,
117 |                 'batch_data': batch_vector_numpy,
118 |             }
119 |             to_workers.send_pyobj(job_data_struct)
120 |         elif TRANSFER_MODE == 'pickle':
121 |             job_data_struct = {
122 |                 'job_id': job_id,
123 |                 'batch_data': np.asarray(batch_vector_numpy),
124 |             }
125 |             to_workers.send_pyobj(job_data_struct)
126 |         else:
127 |             batch_vector_array  = batch_vector_numpy.tolist()
128 |             if TRANSFER_MODE == 'raw':
129 |                 job_data_raw = struct.pack('<I{}{}'.format(len(batch_vector_array), TRANSFER_TYPE_SYMBOL), job_id, *batch_vector_array)
130 |                 to_workers.send(job_data_raw)
131 |             elif TRANSFER_MODE == 'json':
132 |                 job_data_struct = {
133 |                     'job_id': job_id,
134 |                     'batch_data': batch_vector_array,
135 |                 }
136 |                 to_workers.send_json(job_data_struct)
137 | 
138 |         print("[fan] -> job_id={} {}".format(job_id, batch_ids))
139 | 
140 |         time.sleep(SLEEP_AFTER_SEND_MS/1000)  # do not overflow the ZeroMQ
141 | 
142 |     fan_time_s = time.time()-fan_start-SLEEP_AFTER_SEND_MS/1000
143 |     print("[fan] Done submitting batches. Submission took {} s".format(fan_time_s))
144 | 
145 |     output_dict['fan_time_s']               = fan_time_s
146 |     output_dict['avg_send_batch_time_ms']   = fan_time_s*1000/BATCH_COUNT
147 | 
148 | 
149 | def funnel_code():
150 | 
151 |     # Cleanup results directory
152 |     if os.path.isdir(DETECTIONS_OUT_DIR):
153 |         shutil.rmtree(DETECTIONS_OUT_DIR)
154 |     os.mkdir(DETECTIONS_OUT_DIR)
155 | 
156 |     bg_class_offset = 1
157 | 
158 |     ## Workaround for SSD-Resnet34 model incorrectly trained on filtered labels
159 |     class_map = None
160 |     if (SKIPPED_CLASSES):
161 |         class_map = []
162 |         for i in range(len(class_labels) + bg_class_offset):
163 |             if i not in SKIPPED_CLASSES:
164 |                 class_map.append(i)
165 | 
166 |     funnel_start = time.time()
167 |     inference_times_ms_by_worker_id = {}
168 | 
169 |     for _ in range(BATCH_COUNT):
170 |         done_job = from_workers.recv_json()
171 | 
172 |         job_id              = done_job['job_id']
173 |         local_metadata      = in_progress.pop(job_id)
174 |         roundtrip_time_ms   = (time.time()-local_metadata['submission_time'])*1000
175 |         worker_id           = done_job['worker_id']
176 |         inference_time_ms   = done_job['inference_time_ms']
177 |         floatize_time_ms    = done_job['floatize_time_ms']
178 | 
179 |         print("[funnel] <- [worker {}] job_id={}, worker_type_conversion={:.2f} ms, inference={:.2f} ms, roundtrip={:.2f} ms".format(
180 |                             worker_id, job_id, floatize_time_ms, inference_time_ms, roundtrip_time_ms))
181 | 
182 |         batch_ids           = local_metadata['batch_ids']
183 |         batch_size          = len(batch_ids)
184 |         apparent_batch_size = MODEL_MAX_BATCH_SIZE if MODEL_USE_DLA else batch_size
185 |         raw_batch_results   = np.array(done_job['raw_batch_results'], dtype=np.float32)
186 |         batch_results       = np.split(raw_batch_results, apparent_batch_size)[:batch_size]
187 | 
188 |         if worker_id not in inference_times_ms_by_worker_id:
189 |             inference_times_ms_by_worker_id[worker_id] = []
190 |         inference_times_ms_by_worker_id[worker_id].append( inference_time_ms )
191 | 
192 |         for global_image_index, single_image_predictions in zip(batch_ids, batch_results):
193 |             num_boxes = single_image_predictions[MODEL_MAX_PREDICTIONS*7].view('int32')
194 |             width_orig, height_orig = original_w_h[global_image_index]
195 | 
196 |             filename_orig = image_filenames[global_image_index]
197 |             detections_filename = os.path.splitext(filename_orig)[0] + '.txt'
198 |             detections_filepath = os.path.join(DETECTIONS_OUT_DIR, detections_filename)
199 | 
200 |             with open(detections_filepath, 'w') as det_file:
201 |                 det_file.write('{:d} {:d}\n'.format(width_orig, height_orig))
202 | 
203 |                 for row in range(num_boxes):
204 |                     (image_id, ymin, xmin, ymax, xmax, confidence, class_number) = single_image_predictions[row*7:(row+1)*7]
205 | 
206 |                     if confidence >= SCORE_THRESHOLD:
207 |                         class_number    = int(class_number)
208 | 
209 |                         if class_map:
210 |                             class_number = class_map[class_number]
211 | 
212 |                         image_id        = int(image_id)
213 |                         x1              = xmin * width_orig
214 |                         y1              = ymin * height_orig
215 |                         x2              = xmax * width_orig
216 |                         y2              = ymax * height_orig
217 |                         class_label     = class_labels[class_number - bg_class_offset]
218 |                         det_file.write('{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'.format(
219 |                                         x1, y1, x2, y2, confidence, class_number, class_label))
220 | 
221 | 
222 |     funnel_time_s = time.time()-funnel_start
223 |     print("[funnel] Done receiving batches. Receiving took {} s".format(funnel_time_s))
224 | 
225 |     for worker_id in inference_times_ms_by_worker_id:
226 |         offset = 1 if len(inference_times_ms_by_worker_id[worker_id]) > 1 else 0    # skip the potential cold startup in case there is more data
227 |         avg_inference_time_ms_by_worker_id = np.mean(inference_times_ms_by_worker_id[worker_id][offset:])
228 |         output_dict['avg_inference_time_ms_by_worker_id'][worker_id] = avg_inference_time_ms_by_worker_id
229 |         print("[funnel] Average batch inference time on [worker {}] is {}".format(worker_id, avg_inference_time_ms_by_worker_id))
230 | 
231 |     output_dict['funnel_time_s']        = funnel_time_s
232 |     output_dict['avg_rountrip_time_ms'] = funnel_time_s*1000/BATCH_COUNT
233 | 
234 | 
235 | ## We need one thread to feed the ZeroMQ, another (the main one) to read back from it:
236 | #
237 | fan_thread = threading.Thread(target=fan_code, args=())
238 | fan_thread.start()
239 | 
240 | funnel_code()
241 | 
242 | fan_thread.join()
243 | 
244 | 
245 | ## Store benchmarking results:
246 | #
247 | with open('tmp-ck-timer.json', 'w') as out_file:
248 |     json.dump(output_dictionary, out_file, indent=4, sort_keys=True)
249 | 
250 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-py/zpp_hub_classify.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import json
  4 | import os
  5 | import shutil
  6 | import struct
  7 | import threading
  8 | import time
  9 | 
 10 | from imagenet_helper import (load_preprocessed_batch, image_list, class_labels,
 11 |     MODEL_DATA_LAYOUT, MODEL_COLOURS_BGR, MODEL_INPUT_DATA_TYPE, MODEL_DATA_TYPE, MODEL_USE_DLA, MODEL_MAX_BATCH_SIZE,
 12 |     IMAGE_DIR, IMAGE_LIST_FILE, MODEL_NORMALIZE_DATA, SUBTRACT_MEAN, GIVEN_CHANNEL_MEANS, BATCH_SIZE)
 13 | 
 14 | import numpy as np
 15 | import zmq
 16 | 
 17 | try:
 18 |     raw_input
 19 | except NameError:
 20 |     # Python 3
 21 |     raw_input = input
 22 | 
 23 | 
 24 | import sys
 25 | try:
 26 |     sys.getwindowsversion()
 27 | except AttributeError:
 28 |     win = False
 29 | else:
 30 |     win = True
 31 | 
 32 | if win:
 33 |     import win32api,win32process,win32con
 34 |     pid = win32api.GetCurrentProcessId()
 35 |     handle = win32api.OpenProcess(win32con.PROCESS_ALL_ACCESS, True, pid)
 36 |     # https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setpriorityclass
 37 |     print("Setting REALTIME_PRIORITY_CLASS on Windows ...")
 38 |     win32process.SetPriorityClass(handle, win32process.REALTIME_PRIORITY_CLASS)
 39 | 
 40 | 
 41 | ## Model properties:
 42 | #
 43 | MODEL_PATH              = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME']
 44 | 
 45 | 
 46 | ## Transfer mode (numpy floats by default):
 47 | #
 48 | TRANSFER_MODE           = os.getenv('CK_TRANSFER_MODE', 'numpy')
 49 | TRANSFER_FLOAT          = (os.getenv('CK_TRANSFER_FLOAT', 'YES') in ('YES', 'yes', 'ON', 'on', '1')) and (MODEL_INPUT_DATA_TYPE == 'float32')
 50 | TRANSFER_TYPE_NP, TRANSFER_TYPE_SYMBOL = (np.float32, 'f') if TRANSFER_FLOAT else (np.int8, 'b')
 51 | 
 52 | SLEEP_AFTER_SEND_MS     = int(os.getenv('CK_SLEEP_AFTER_SEND_MS', 0))
 53 | 
 54 | ## ZMQ ports:
 55 | #
 56 | ZMQ_FAN_PORT            = os.getenv('CK_ZMQ_FAN_PORT', 5557)
 57 | ZMQ_FUNNEL_PORT         = os.getenv('CK_ZMQ_FUNNEL_PORT', 5558)
 58 | 
 59 | 
 60 | ## Writing the results out:
 61 | #
 62 | RESULTS_DIR             = os.getenv('CK_RESULTS_DIR', './results')
 63 | FULL_REPORT             = os.getenv('CK_SILENT_MODE', '0') in ('NO', 'no', 'OFF', 'off', '0')
 64 | 
 65 | 
 66 | ## Processing in batches:
 67 | #
 68 | BATCH_COUNT             = int(os.getenv('CK_BATCH_COUNT', 1))
 69 | 
 70 | 
 71 | ## ZeroMQ communication setup:
 72 | #
 73 | zmq_context = zmq.Context()
 74 | 
 75 | to_workers = zmq_context.socket(zmq.PUSH)
 76 | to_workers.bind("tcp://*:{}".format(ZMQ_FAN_PORT))
 77 | 
 78 | from_workers = zmq_context.socket(zmq.PULL)
 79 | from_workers.bind("tcp://*:{}".format(ZMQ_FUNNEL_PORT))
 80 | 
 81 | 
 82 | ## (Shared) placeholders:
 83 | #
 84 | in_progress     = {}    # to be written to by one thread and read by another
 85 | output_dict     = {     # to be topped up by both threads
 86 |     'batch_size': BATCH_SIZE,
 87 |     'batch_count': BATCH_COUNT,
 88 |     'avg_inference_time_ms_by_worker_id': {},
 89 |     'avg_roundtrip_time_ms_by_worker_id': {},
 90 |     'min_roundtrip_time_ms_by_worker_id': {},
 91 |     'pc50_roundtrip_time_ms_by_worker_id': {},
 92 |     'pc90_roundtrip_time_ms_by_worker_id': {},
 93 |     'pc99_roundtrip_time_ms_by_worker_id': {},
 94 |     'max_roundtrip_time_ms_by_worker_id': {},
 95 | }
 96 | 
 97 | 
 98 | def fan_code():
 99 | 
100 |     print("Press Enter when the workers are ready: ")
101 |     _ = raw_input()
102 |     print("[fan] Submitting jobs...")
103 | 
104 |     fan_start = time.time()
105 | 
106 |     image_index = 0
107 |     for batch_index in range(BATCH_COUNT):
108 | 
109 |         batch_first_index = image_index
110 |         batch_data, image_index = load_preprocessed_batch(image_list, image_index)
111 | 
112 |         batch_vector_numpy  = batch_data.ravel()
113 | 
114 |         batch_ids   = list(range(batch_first_index, image_index))
115 |         job_id      = batch_index+1
116 | 
117 |         in_progress[job_id] = {
118 |             'submission_time':  time.time(),
119 |             'batch_ids':        batch_ids,
120 |         }
121 | 
122 |         if TRANSFER_MODE == 'dummy':
123 |             job_data_raw = struct.pack('<II', job_id, BATCH_SIZE)
124 |             to_workers.send(job_data_raw)
125 |         elif TRANSFER_MODE == 'numpy':
126 |             job_data_struct = {
127 |                 'job_id': job_id,
128 |                 'batch_data': batch_vector_numpy,
129 |             }
130 |             to_workers.send_pyobj(job_data_struct)
131 |         elif TRANSFER_MODE == 'pickle':
132 |             job_data_struct = {
133 |                 'job_id': job_id,
134 |                 'batch_data': np.asarray(batch_vector_numpy),
135 |             }
136 |             to_workers.send_pyobj(job_data_struct)
137 |         else:
138 |             batch_vector_array  = batch_vector_numpy.tolist()
139 |             if TRANSFER_MODE == 'raw':
140 |                 job_data_raw = struct.pack('<I{}{}'.format(len(batch_vector_array), TRANSFER_TYPE_SYMBOL), job_id, *batch_vector_array)
141 |                 to_workers.send(job_data_raw)
142 |             elif TRANSFER_MODE == 'json':
143 |                 job_data_struct = {
144 |                     'job_id': job_id,
145 |                     'batch_data': batch_vector_array,
146 |                 }
147 |                 to_workers.send_json(job_data_struct)
148 | 
149 |         print("[fan] -> job_id={} {}".format(job_id, batch_ids))
150 | 
151 |         time.sleep(SLEEP_AFTER_SEND_MS/1000)  # do not overflow the ZeroMQ
152 | 
153 |     fan_time_s = time.time()-fan_start-SLEEP_AFTER_SEND_MS/1000
154 |     print("[fan] Done submitting batches. Submission took {:.2f} s".format(fan_time_s))
155 | 
156 |     output_dict['fan_time_s']               = fan_time_s
157 |     output_dict['avg_send_batch_time_ms']   = fan_time_s*1000/BATCH_COUNT
158 | 
159 | 
160 | def funnel_code():
161 | 
162 |     # Cleanup results directory
163 |     if os.path.isdir(RESULTS_DIR):
164 |         shutil.rmtree(RESULTS_DIR)
165 |     os.mkdir(RESULTS_DIR)
166 | 
167 |     funnel_start = time.time()
168 |     inference_times_ms_by_worker_id = {}
169 |     roundtrip_times_ms_by_worker_id = {}
170 | 
171 |     for _ in range(BATCH_COUNT):
172 |         done_job = from_workers.recv_json()
173 | 
174 |         job_id              = done_job['job_id']
175 |         local_metadata      = in_progress.pop(job_id)
176 |         roundtrip_time_ms   = (time.time()-local_metadata['submission_time'])*1000
177 |         worker_id           = done_job['worker_id']
178 |         inference_time_ms   = done_job['inference_time_ms']
179 |         floatize_time_ms    = done_job['floatize_time_ms']
180 | 
181 |         print("[funnel] <- [worker {}] job_id={}, worker_type_conversion={:.2f} ms, inference={:.2f} ms, roundtrip={:.2f} ms".format(
182 |                             worker_id, job_id, floatize_time_ms, inference_time_ms, roundtrip_time_ms))
183 | 
184 |         batch_ids           = local_metadata['batch_ids']
185 |         batch_size          = len(batch_ids)
186 |         apparent_batch_size = MODEL_MAX_BATCH_SIZE if MODEL_USE_DLA else batch_size
187 |         raw_batch_results   = np.array(done_job['raw_batch_results'])
188 |         batch_results       = np.split(raw_batch_results, apparent_batch_size)[:batch_size]
189 | 
190 |         if worker_id not in inference_times_ms_by_worker_id:
191 |             inference_times_ms_by_worker_id[worker_id] = []
192 |         inference_times_ms_by_worker_id[worker_id].append( inference_time_ms )
193 | 
194 |         if worker_id not in roundtrip_times_ms_by_worker_id:
195 |             roundtrip_times_ms_by_worker_id[worker_id] = []
196 |         roundtrip_times_ms_by_worker_id[worker_id].append( roundtrip_time_ms )
197 | 
198 |         for sample_id, prediction_for_one_sample in zip(batch_ids, batch_results):
199 |             if len(prediction_for_one_sample)==1:
200 |                 predicted_label = int(prediction_for_one_sample[0])
201 |                 trimmed_softmax_vector = [0]*predicted_label + [1] + [0]*(1000-predicted_label-1)
202 |             else:
203 |                 trimmed_softmax_vector = prediction_for_one_sample[-1000:]    # skipping the background class on the left (if present)
204 | 
205 |             res_file = os.path.join(RESULTS_DIR, image_list[int(sample_id)])
206 |             with open(res_file + '.txt', 'w') as f:
207 |                 for prob in trimmed_softmax_vector:
208 |                     f.write('{}\n'.format(prob))
209 | 
210 |     funnel_time_s = time.time()-funnel_start
211 |     print("[funnel] Done receiving batches. Receiving took {:.2f} s".format(funnel_time_s))
212 | 
213 |     print("")
214 | 
215 |     print("[funnel] Batch inference time (ms):")
216 |     for worker_id in inference_times_ms_by_worker_id:
217 |         offset = 1 if len(inference_times_ms_by_worker_id[worker_id]) > 1 else 0    # skip the potential cold startup in case there is more data
218 |         avg_inference_time_ms_by_worker_id = np.mean(inference_times_ms_by_worker_id[worker_id][offset:])
219 |         output_dict['avg_inference_time_ms_by_worker_id'][worker_id] = avg_inference_time_ms_by_worker_id
220 |         print("- [worker {}] average: {:.2f}".format(worker_id, avg_inference_time_ms_by_worker_id))
221 | 
222 |     print("")
223 | 
224 |     print("[funnel] Batch roundtrip time (ms):")
225 |     for worker_id in roundtrip_times_ms_by_worker_id:
226 |         offset = 1 if len(roundtrip_times_ms_by_worker_id[worker_id]) > 1 else 0    # skip the potential cold startup in case there is more data
227 | 
228 |         avg_roundtrip_time_ms_by_worker_id = np.mean(roundtrip_times_ms_by_worker_id[worker_id][offset:])
229 |         output_dict['avg_roundtrip_time_ms_by_worker_id'][worker_id] = avg_roundtrip_time_ms_by_worker_id
230 |         print("- [worker {}] average: {:.2f}".format(worker_id, avg_roundtrip_time_ms_by_worker_id))
231 | 
232 |         min_roundtrip_time_ms_by_worker_id = np.min(roundtrip_times_ms_by_worker_id[worker_id][offset:])
233 |         output_dict['min_roundtrip_time_ms_by_worker_id'][worker_id] = min_roundtrip_time_ms_by_worker_id
234 |         print("- [worker {}] minimum: {:.2f}".format(worker_id, min_roundtrip_time_ms_by_worker_id))
235 | 
236 |         pc50_roundtrip_time_ms_by_worker_id = np.percentile(roundtrip_times_ms_by_worker_id[worker_id][offset:], 50)
237 |         output_dict['pc50_roundtrip_time_ms_by_worker_id'][worker_id] = pc50_roundtrip_time_ms_by_worker_id
238 |         print("- [worker {}] 50 percentile: {:.2f}".format(worker_id, pc50_roundtrip_time_ms_by_worker_id))
239 | 
240 |         pc90_roundtrip_time_ms_by_worker_id = np.percentile(roundtrip_times_ms_by_worker_id[worker_id][offset:], 90)
241 |         output_dict['pc90_roundtrip_time_ms_by_worker_id'][worker_id] = pc90_roundtrip_time_ms_by_worker_id
242 |         print("- [worker {}] 90 percentile: {:.2f}".format(worker_id, pc90_roundtrip_time_ms_by_worker_id))
243 | 
244 |         pc99_roundtrip_time_ms_by_worker_id = np.percentile(roundtrip_times_ms_by_worker_id[worker_id][offset:], 99)
245 |         output_dict['pc99_roundtrip_time_ms_by_worker_id'][worker_id] = pc99_roundtrip_time_ms_by_worker_id
246 |         print("- [worker {}] 99 percentile: {:.2f}".format(worker_id, pc99_roundtrip_time_ms_by_worker_id))
247 | 
248 |         max_roundtrip_time_ms_by_worker_id = np.max(roundtrip_times_ms_by_worker_id[worker_id][offset:])
249 |         output_dict['max_roundtrip_time_ms_by_worker_id'][worker_id] = max_roundtrip_time_ms_by_worker_id
250 |         print("- [worker {}] maximum: {:.2f}".format(worker_id, max_roundtrip_time_ms_by_worker_id))
251 | 
252 |         print("")
253 | 
254 |     output_dict['funnel_time_s']        = funnel_time_s
255 |     output_dict['avg_rountrip_time_ms'] = funnel_time_s*1000/BATCH_COUNT
256 | 
257 | 
258 | ## We need one thread to feed the ZeroMQ, another (the main one) to read back from it:
259 | #
260 | fan_thread = threading.Thread(target=fan_code, args=())
261 | fan_thread.start()
262 | 
263 | funnel_code()
264 | 
265 | fan_thread.join()
266 | 
267 | 
268 | ## Store benchmarking results:
269 | #
270 | with open('tmp-ck-timer.json', 'w') as out_file:
271 |     json.dump(output_dict, out_file, indent=4, sort_keys=True)
272 | 
273 | 


--------------------------------------------------------------------------------
/script/explore-params/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | echo "ZeroMQ Push-Pull experiment:"
  4 | 
  5 | # Task: image-classification or object-detection.
  6 | task=${CK_TASK:-"image-classification"}
  7 | echo "- task: ${task}"
  8 | 
  9 | # Platform: tx2, tx1, velociti, xavier, ...
 10 | platform=${CK_PLATFORM:-"tx2"}
 11 | echo "- platform: ${platform}"
 12 | 
 13 | # Model tags.
 14 | model_tags=${CK_MODEL_TAGS:-"converted-from-onnx"}
 15 | echo "- model tags: ${model_tags}"
 16 | 
 17 | # Launch hub-side program: NO for debugging only!
 18 | launch_hub=${CK_LAUNCH_HUB:-YES}
 19 | echo "- launch hub: ${launch_hub}"
 20 | 
 21 | # Use hub-side program with LoadGen: YES/NO.
 22 | loadgen=${CK_LOADGEN:-YES}
 23 | echo "- use LoadGen: ${loadgen}"
 24 | 
 25 | # Hub-side program.
 26 | if [ "${loadgen}" = "YES" ]; then
 27 |     program="${task}-zpp-hub-loadgen-py"
 28 | else
 29 |     program="${task}-zpp-hub-py"
 30 | fi
 31 | program_dir=`ck find ck-zeromq:program:${program}`
 32 | echo "- program: ${program}"
 33 | echo "- program directory: ${program_dir}"
 34 | 
 35 | # LoadGen config file.
 36 | config_file=${CK_ENV_LOADGEN_CONFIG_FILE:-${program_dir}/user.conf}
 37 | echo "- config file: ${config_file}"
 38 | 
 39 | # Model name for LoadGen config: resnet50, ssd-resnet34, mobilenet, ssd-mobilenet, gnmt.
 40 | model_name=${CK_LOADGEN_MODEL_NAME:-"unknown"}
 41 | echo "- model name: ${model_name}"
 42 | 
 43 | # Dry run - print commands but do not execute them.
 44 | dry_run=${CK_DRY_RUN:-""}
 45 | echo "- dry run: ${dry_run}"
 46 | 
 47 | # Skip existing experiments.
 48 | skip_existing=${CK_SKIP_EXISTING:-""}
 49 | echo "- skip existing: ${skip_existing}"
 50 | 
 51 | # Timestamp.
 52 | timestamp=$(date +%Y%m%d-%H%M%S)
 53 | echo "- timestamp: ${timestamp}"
 54 | 
 55 | # Hub IP.
 56 | hub_ip=${CK_HUB_IP:-"localhost"}
 57 | echo "- hub IP: ${hub_ip}"
 58 | 
 59 | # Workers can be defined in two ways:
 60 | # (1) As a list of N IPs. Worker IDs get derived as a sequence from 1 to N.
 61 | # (2) As a list of N IDs. Worker IPs get derived as a sequence of 192.168.1.<ID+1>.
 62 | ips=( ${CK_WORKER_IPS:-} ) # use parentheses to interpret the string as an array
 63 | ids=( ${CK_WORKER_IDS:-} ) # use parentheses to interpret the string as an array
 64 | if [[ -z "${ips}" ]] && [[ -z ${ids} ]]
 65 | then
 66 |   # If neither is defined, send to itself.
 67 |   ips=( "${hub_ip}" )
 68 | fi
 69 | if [[ "${ips}" ]] # (1)
 70 | then
 71 |   num_ips=${#ips[@]}
 72 |   ids=( $(seq 1 ${num_ips}) )
 73 |   num_ids=${#ids[@]}
 74 | else # (2)
 75 |   ids=( ${CK_WORKER_IDS:-1} )
 76 |   num_ids=${#ids[@]}
 77 |   ips=( )
 78 |   for id in ${ids[@]}; do
 79 |     id_plus_1=$((id+1))
 80 |     ips+=( "192.168.1.10${id_plus_1}" )
 81 |   done
 82 |   num_ips=${#ips[@]}
 83 | fi
 84 | echo "- ${num_ips} worker IP(s): ${ips[@]}"
 85 | echo "- ${num_ids} worker ID(s): ${ids[@]}"
 86 | if [[ ${num_ips} != ${num_ids} ]]; then
 87 |   echo "ERROR: ${num_ips} not equal to ${num_ids}!"
 88 |   exit 1
 89 | fi
 90 | 
 91 | # Worker ssh ports (22 by default).
 92 | ports=( ${CK_WORKER_PORTS:-} ) # use parentheses to interpret the string as an array
 93 | if [[ -z "${ports}" ]]; then
 94 |   for id in ${ips[@]}; do
 95 |     ports+=( "22" )
 96 |   done
 97 | fi
 98 | num_ports=${#ports[@]}
 99 | echo "- ${num_ports} worker port(s): ${ports[@]}"
100 | if [[ ${num_ports} != ${num_ips} ]]; then
101 |   echo "ERROR: ${num_ports} not equal to ${num_ips}!"
102 |   exit 1
103 | fi
104 | 
105 | # ZMQ ports: fan (out), funnel (in).
106 | fan_port=${CK_ZMQ_FAN_PORT:-15051}
107 | funnel_port=${CK_ZMQ_FUNNEL_PORT:-15052}
108 | echo "- fan port: ${fan_port}"
109 | echo "- funnel port: ${funnel_port}"
110 | 
111 | # Time each worker should wait after last received work-item before exiting.
112 | postwork_timeout_s=${CK_WORKER_POSTWORK_TIMEOUT_S:-10}
113 | echo "- postwork timeout: ${postwork_timeout_s} s"
114 | 
115 | # Worker response format:
116 | # - argmax returns a class id;
117 | # - softmax returns a 1000 or 1001-element vector of class probabilities;
118 | # - direct_return returns a (7*N+1)-element vector where N is the maximum
119 | # number of predictions (100 for SSD-MobileNet, 200 for SSD-ResNet).
120 | worker_output=${CK_WORKER_OUTPUT_FORMAT:-argmax}
121 | echo "- worker output: ${worker_output}"
122 | 
123 | # Transfer mode: raw, json, pickle, numpy.
124 | transfer_mode=${CK_TRANSFER_MODE:-numpy}
125 | echo "- transfer mode: ${transfer_mode}"
126 | 
127 | # Transfer as floats or as 8-bit integers: YES/NO.
128 | transfer_float=${CK_TRANSFER_FLOAT:-YES}
129 | echo "- transfer float: ${transfer_float}"
130 | 
131 | # Preprocess on GPU: YES/NO.
132 | preprocess_on_gpu=${CK_PREPROCESS_ON_GPU:-NO}
133 | if [ "${preprocess_on_gpu}" = "YES" ] && [ "${transfer_float}" = "YES" ]; then
134 |   echo "WARNING: Forcing not to preprocess on GPU since transferring float!"
135 |   preprocess_on_gpu="NO"
136 | fi
137 | if [ "${preprocess_on_gpu}" = "YES" ] && [ "${transfer_mode}" = "json" ]; then
138 |   echo "WARNING: Forcing not to preprocess on GPU since transferring json!"
139 |   preprocess_on_gpu="NO"
140 | fi
141 | echo "- preprocess on GPU: ${preprocess_on_gpu}"
142 | 
143 | # LoadGen scenario: MultiStream, SingleStream, Offline.
144 | scenario=${CK_LOADGEN_SCENARIO:-MultiStream}
145 | if [ "${scenario}" = "MultiStream" ]; then
146 |   scenario_tag="multistream"
147 | elif [ "${scenario}" = "SingleStream" ]; then
148 |   scenario_tag="singlestream"
149 | elif [ "${scenario}" = "Offline" ]; then
150 |   scenario_tag="offline"
151 | else
152 |   echo "ERROR: Unsupported LoadGen scenario '${scenario}'!"
153 |   exit 1
154 | fi
155 | echo "- scenario: ${scenario} (${scenario_tag})"
156 | 
157 | # LoadGen mode: PerformanceOnly, AccuracyOnly.
158 | mode=${CK_LOADGEN_MODE:-PerformanceOnly}
159 | if [ "${mode}" = "PerformanceOnly" ]; then
160 |   mode_tag="performance"
161 | elif [ "${mode}" = "AccuracyOnly" ]; then
162 |   mode_tag="accuracy"
163 | else
164 |   echo "ERROR: Unsupported LoadGen mode '${mode}'!"
165 |   exit 1
166 | fi
167 | echo "- mode: ${mode} (${mode_tag})"
168 | 
169 | if [ "${task}" = "image-classification" ]; then
170 |   imagenet_size=50000
171 |   if [ "${mode}" = "AccuracyOnly" ]; then
172 |     dataset_size=${CK_LOADGEN_DATASET_SIZE:-${imagenet_size}}
173 |     buffer_size=${CK_LOADGEN_BUFFER_SIZE:-500}
174 |   else
175 |     dataset_size=${CK_LOADGEN_DATASET_SIZE:-1024}
176 |     buffer_size=${CK_LOADGEN_BUFFER_SIZE:-1024}
177 |   fi
178 | elif [ "${task}" = "object-detection" ]; then
179 |   coco_size=5000
180 |   if [ "${mode}" = "AccuracyOnly" ]; then
181 |     dataset_size=${CK_LOADGEN_DATASET_SIZE:-${coco_size}}
182 |     buffer_size=${CK_LOADGEN_BUFFER_SIZE:-50}
183 |   else
184 |     if [ "${model_name}" = "ssd-mobilenet" ]; then
185 |       dataset_size=${CK_LOADGEN_DATASET_SIZE:-256}
186 |       buffer_size=${CK_LOADGEN_BUFFER_SIZE:-256}
187 |     elif [ "${model_name}" = "ssd-resnet34" ]; then
188 |       dataset_size=${CK_LOADGEN_DATASET_SIZE:-64}
189 |       buffer_size=${CK_LOADGEN_BUFFER_SIZE:-64}
190 |     else
191 |       dataset_size=${CK_LOADGEN_DATASET_SIZE:-1024}
192 |       buffer_size=${CK_LOADGEN_BUFFER_SIZE:-1024}
193 |     fi # model name
194 |   fi # mode
195 | else
196 |   echo "ERROR: Unsupported task '${task}'!"
197 |   exit 1
198 | fi # task
199 | 
200 | 
201 | echo "- dataset size: ${dataset_size}"
202 | echo "- buffer size: ${buffer_size}"
203 | 
204 | # In the PerformanceOnly mode, affects the number of samples per query that LoadGen issues
205 | # (aiming to meet the minimum duration of 60 seconds and, in the Offline mode, the minimum
206 | # number of samples of 24,576).
207 | target_qps=${CK_LOADGEN_TARGET_QPS:-70}
208 | if [ "${mode}" = "PerformanceOnly" ]; then
209 |   if [ "${scenario}" == "SingleStream" ] || [ "${scenario}" == "Offline" ]; then
210 |     TARGET_QPS="--env.CK_LOADGEN_TARGET_QPS=${target_qps}"
211 |   fi
212 | fi
213 | echo "- target QPS (queries per second): ${target_qps} ('${TARGET_QPS}')"
214 | 
215 | # Allow to override the number of queries in the PerformanceOnly mode.
216 | # By default, use 1440=720*2:
217 | # - 720==6! (6 factorial) is evenly divisible between any number of co-processors 1-6.
218 | # - 1200==60*20 is the minimum number of 50 ms queries to meet the minimum duration of 60 ms.
219 | # - 1440 > 1200.
220 | count_override=${CK_LOADGEN_COUNT_OVERRIDE:-1440}
221 | if [ "${mode}" = "PerformanceOnly" ]; then
222 |   COUNT_OVERRIDE="--env.CK_LOADGEN_COUNT_OVERRIDE=${count_override}"
223 | fi
224 | echo "- count override: ${count_override} ('${COUNT_OVERRIDE}')"
225 | 
226 | # Batch size.
227 | batch_size=${CK_BATCH_SIZE:-1}
228 | echo "- batch size: ${batch_size}"
229 | 
230 | # Batch count.
231 | batch_count=${CK_BATCH_COUNT:-1}
232 | echo "- batch count: ${batch_count}"
233 | 
234 | # In the MultiStream scenario, affects the number of streams that LoadGen issues
235 | # (aiming to meet the target latency of 50 ms).
236 | # By default, set to the product of the number of workers and the batch size.
237 | multistreamness=${CK_LOADGEN_MULTISTREAMNESS:-$((${num_ids} * ${batch_size}))}
238 | if [ "${scenario}" = "MultiStream" ]; then
239 |   MULTISTREAMNESS="--env.CK_LOADGEN_MULTISTREAMNESS=${multistreamness}"
240 | fi
241 | echo "- multistreamness: ${multistreamness} ('${MULTISTREAMNESS}')"
242 | 
243 | # Number of warming up samples to be discarded.
244 | # By default, set to the product of the number of workers and the batch size.
245 | warmup_samples=${CK_LOADGEN_WARMUP_SAMPLES:-$((${num_ids} * ${batch_size}))}
246 | echo "- warm-up samples: ${warmup_samples}"
247 | 
248 | # Maximum batch size that the TensorRT model supports.
249 | maxbatch=${CK_WEIGHTS_MAXBATCH:-20}
250 | echo "- weights maxbatch: ${maxbatch}"
251 | 
252 | # Numerical precision of the TensorRT model.
253 | precision=${CK_WEIGHTS_PRECISION:-fp16}
254 | echo "- weights precision: ${precision}"
255 | 
256 | # Input preprocessing.
257 | preprocessing_tags=${CK_PREPROCESSING_TAGS:-"rgb8,full,side.224,preprocessed,using-opencv"}
258 | echo "- preprocessing tags: ${preprocessing_tags}"
259 | 
260 | # Prepare record UOA and tags.
261 | mlperf="mlperf"
262 | division="closed"
263 | library="zpp" # ZeroMQ Push-Pull.
264 | benchmark=${model_name}
265 | record_uoa="${mlperf}.${division}.${task}.${platform}.${library}.${benchmark}.${scenario_tag}.${mode_tag}"
266 | record_tags="${mlperf},${division},${task},${platform},${library},${benchmark},${scenario_tag},${mode_tag}"
267 | if [ "${mode_tag}" = "accuracy" ]; then
268 |   # Get substring after "preprocessed," to end, i.e. "using-opencv" here.
269 |   preprocessing="${preprocessing_tags##*preprocessed,}"
270 |   record_uoa+=".${preprocessing}"
271 |   record_tags+=",${preprocessing}"
272 | fi
273 | if [ "${mode_tag}" = "accuracy" ]; then
274 |   if [ "${task}" = "image-classification" ] && [ "${dataset_size}" != "${imagenet_size}" ]; then
275 |     record_uoa+=".${dataset_size}"
276 |     record_tags+=",${dataset_size}"
277 |   fi
278 |   if [ "${task}" = "object-detection" ] && [ "${dataset_size}" != "${coco_size}" ]; then
279 |     record_uoa+=".${dataset_size}"
280 |     record_tags+=",${dataset_size}"
281 |   fi
282 | fi
283 | echo "- record UOA: ${record_uoa}"
284 | echo "- record tags: ${record_tags}"
285 | 
286 | # Blank line before printing commands.
287 | echo
288 | 
289 | # Skip existing experiments if requested.
290 | if (ck find experiment:${record_uoa} >/dev/null) && [[ "${skip_existing}" ]]; then
291 |   echo "Experiment '${record_uoa}' already exists, skipping ..."
292 |   exit 0
293 | fi
294 | 
295 | # Launch the worker programs.
296 | for i in $(seq 1 ${#ips[@]}); do
297 |   ip=${ips[${i}-1]}
298 |   id=${ids[${i}-1]}
299 |   port=${ports[${i}-1]}
300 |   worker_id="worker-${id}"
301 |   read -d '' CMD <<END_OF_CMD
302 |   ssh -n -f ${USER}@${ip} -p ${port} \
303 |   "bash -c 'nohup \
304 |     ck benchmark program:zpp-worker-tensorrt-py --repetitions=1 \
305 |     --dep_add_tags.weights=${model_tags},maxbatch.${maxbatch},${precision} \
306 |     --dep_add_tags.lib-python-tensorrt=python-package,tensorrt \
307 |     --env.CK_HUB_IP=${hub_ip} \
308 |     --env.CK_ZMQ_FAN_PORT=${fan_port} \
309 |     --env.CK_ZMQ_FUNNEL_PORT=${funnel_port} \
310 |     --env.CK_WORKER_ID=${worker_id} \
311 |     --env.CK_WORKER_OUTPUT_FORMAT=${worker_output} \
312 |     --env.CK_WORKER_POSTWORK_TIMEOUT_S=${postwork_timeout_s} \
313 |     --env.CK_TRANSFER_MODE=${transfer_mode} \
314 |     --env.CK_TRANSFER_FLOAT=${transfer_float} \
315 |     --env.CK_PREPROCESS_ON_GPU=${preprocess_on_gpu} \
316 |     --record --record_repo=local \
317 |     --record_uoa=${record_uoa}.${worker_id} \
318 |     --tags=${record_tags},${worker_id} \
319 |     --skip_print_timers --skip_stat_analysis --process_multi_keys \
320 |   > /home/$USER/nohup.log 2>&1 &'"
321 | END_OF_CMD
322 |   echo ${CMD}
323 |   if [ -z "${dry_run}" ]; then
324 |     eval ${CMD}
325 |   fi
326 |   echo
327 | done
328 | 
329 | # Wait a bit.
330 | sleep 1s
331 | 
332 | # Launch the hub program.
333 | read -d '' CMD <<END_OF_CMD
334 | ck benchmark program:${program} --repetitions=1 \
335 | --dep_add_tags.weights=${model_tags},maxbatch.${maxbatch},${precision} \
336 | --dep_add_tags.images=${preprocessing_tags} \
337 | --dep_add_tags.dataset=${preprocessing_tags} \
338 | --dep_add_tags.python=v3 \
339 | --env.CK_HUB_IP=${hub_ip} \
340 | --env.CK_ZMQ_FAN_PORT=${fan_port} \
341 | --env.CK_ZMQ_FUNNEL_PORT=${funnel_port} \
342 | --env.CK_ENV_LOADGEN_CONFIG_FILE=${config_file} \
343 | --env.CK_LOADGEN_MODEL_NAME=${model_name} \
344 | --env.CK_LOADGEN_SCENARIO=${scenario} \
345 | --env.CK_LOADGEN_MODE=${mode} \
346 | --env.CK_LOADGEN_DATASET_SIZE=${dataset_size} \
347 | --env.CK_LOADGEN_BUFFER_SIZE=${buffer_size} \
348 | --env.CK_LOADGEN_WARMUP_SAMPLES=${warmup_samples} \
349 | --env.CK_TRANSFER_MODE=${transfer_mode} \
350 | --env.CK_TRANSFER_FLOAT=${transfer_float} \
351 | --env.CK_PREPROCESS_ON_GPU=${preprocess_on_gpu} \
352 | --env.CK_BATCH_SIZE=${batch_size} \
353 | --env.CK_BATCH_COUNT=${batch_count} \
354 | ${MULTISTREAMNESS} \
355 | ${TARGET_QPS} \
356 | ${COUNT_OVERRIDE} \
357 | --env.CK_SILENT_MODE=YES \
358 | --record --record_repo=local \
359 | --record_uoa=${record_uoa}.hub \
360 | --tags=${record_tags},hub \
361 | --skip_print_timers --skip_stat_analysis --process_multi_keys
362 | END_OF_CMD
363 | echo ${CMD}
364 | if [ -z "${dry_run}" ] && [ "${launch_hub}" == "YES" ]; then
365 |   eval ${CMD}
366 | fi
367 | echo
368 | 
369 | 
370 | # Print the status.
371 | if [ -z "${dry_run}" ]; then
372 |   if [ "${loadgen}" = "YES" ]; then
373 |       # Show the summary table.
374 |       head -n 12 "${program_dir}/tmp/mlperf_log_summary.txt"
375 |       echo
376 |   fi
377 |   echo "Done."
378 | else
379 |   echo "Done (dry run)."
380 | fi
381 | 


--------------------------------------------------------------------------------
/program/image-classification-zpp-hub-loadgen-py/zpp_hub_classify_loadgen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import array
  4 | import json
  5 | import os
  6 | import struct
  7 | import sys
  8 | import threading
  9 | import time
 10 | 
 11 | import numpy as np
 12 | import zmq
 13 | import mlperf_loadgen as lg
 14 | 
 15 | 
 16 | ###########################################################################################################
 17 | ## NB: if you run into "zmq.error.ZMQError: Address already in use" after a crash,
 18 | ##     run
 19 | ##          kill `netstat -ltnp 2>/dev/null | grep python | grep 5557 | awk '{print $7}' | sed 's/\/.*//'`
 20 | ##     to stop the socket-hogging process.
 21 | ###########################################################################################################
 22 | 
 23 | 
 24 | ## ZMQ ports:
 25 | #
 26 | ZMQ_FAN_PORT            = os.getenv('CK_ZMQ_FAN_PORT', 5557)
 27 | ZMQ_FUNNEL_PORT         = os.getenv('CK_ZMQ_FUNNEL_PORT', 5558)
 28 | 
 29 | ## LoadGen test properties:
 30 | #
 31 | LOADGEN_SCENARIO            = os.getenv('CK_LOADGEN_SCENARIO', 'SingleStream')
 32 | LOADGEN_MODE                = os.getenv('CK_LOADGEN_MODE', 'AccuracyOnly')
 33 | LOADGEN_BUFFER_SIZE         = int(os.getenv('CK_LOADGEN_BUFFER_SIZE'))      # set to how many samples are you prepared to keep in memory at once
 34 | LOADGEN_DATASET_SIZE        = int(os.getenv('CK_LOADGEN_DATASET_SIZE'))     # set to how many total samples to choose from (0 = full set)
 35 | LOADGEN_CONFIG_FILE         = os.getenv('CK_ENV_LOADGEN_CONFIG_FILE', '')   # Very Important: make sure 'pass_env_to_resolve' is on
 36 | LOADGEN_MODEL_NAME          = os.getenv('CK_LOADGEN_MODEL_NAME', 'random_model_name')
 37 | LOADGEN_MULTISTREAMNESS     = os.getenv('CK_LOADGEN_MULTISTREAMNESS', '')   # if not set, use value from LoadGen's config file, or LoadGen code
 38 | LOADGEN_MAX_DURATION_S      = os.getenv('CK_LOADGEN_MAX_DURATION_S', '')    # if not set, use value from LoadGen's config file, or LoadGen code
 39 | LOADGEN_COUNT_OVERRIDE      = os.getenv('CK_LOADGEN_COUNT_OVERRIDE', '')
 40 | LOADGEN_TARGET_QPS          = os.getenv('CK_LOADGEN_TARGET_QPS', '')        # Maps to differently named internal config options, depending on scenario - see below.
 41 | LOADGEN_WARMUP_SAMPLES      = int(os.getenv('CK_LOADGEN_WARMUP_SAMPLES', '0'))
 42 | BATCH_SIZE                  = int(os.getenv('CK_BATCH_SIZE', '1'))
 43 | SIDELOAD_JSON               = os.getenv('CK_LOADGEN_SIDELOAD_JSON','')
 44 | 
 45 | ## Model properties:
 46 | #
 47 | MODEL_PATH              = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME']
 48 | MODEL_DATA_LAYOUT       = os.getenv('ML_MODEL_DATA_LAYOUT', 'NCHW')
 49 | LABELS_PATH             = os.environ['CK_CAFFE_IMAGENET_SYNSET_WORDS_TXT']
 50 | MODEL_COLOURS_BGR       = os.getenv('ML_MODEL_COLOUR_CHANNELS_BGR', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 51 | MODEL_INPUT_DATA_TYPE   = os.getenv('ML_MODEL_INPUT_DATA_TYPE', 'float32')
 52 | MODEL_DATA_TYPE         = os.getenv('ML_MODEL_DATA_TYPE', '(unknown)')
 53 | MODEL_IMAGE_HEIGHT      = int(os.getenv('ML_MODEL_IMAGE_HEIGHT',
 54 |                               os.getenv('CK_ENV_ONNX_MODEL_IMAGE_HEIGHT',
 55 |                               os.getenv('CK_ENV_TENSORFLOW_MODEL_IMAGE_HEIGHT',
 56 |                               ''))))
 57 | MODEL_IMAGE_WIDTH       = int(os.getenv('ML_MODEL_IMAGE_WIDTH',
 58 |                               os.getenv('CK_ENV_ONNX_MODEL_IMAGE_WIDTH',
 59 |                               os.getenv('CK_ENV_TENSORFLOW_MODEL_IMAGE_WIDTH',
 60 |                               ''))))
 61 | MODEL_IMAGE_CHANNELS    = 3
 62 | MODEL_SOFTMAX_LAYER     = os.getenv('CK_ENV_ONNX_MODEL_OUTPUT_LAYER_NAME', os.getenv('CK_ENV_TENSORFLOW_MODEL_OUTPUT_LAYER_NAME', ''))
 63 | 
 64 | 
 65 | ## Data transfer (numpy floats by default):
 66 | #
 67 | TRANSFER_MODE           = os.getenv('CK_TRANSFER_MODE', 'numpy')
 68 | TRANSFER_FLOAT          = (os.getenv('CK_TRANSFER_FLOAT', 'YES') in ('YES', 'yes', 'ON', 'on', '1')) and (MODEL_INPUT_DATA_TYPE == 'float32')
 69 | TRANSFER_TYPE_NP, TRANSFER_TYPE_SYMBOL = (np.float32, 'f') if TRANSFER_FLOAT else (np.int8, 'b')
 70 | 
 71 | ## Image normalization:
 72 | #
 73 | PREPROCESS_ON_GPU       = not TRANSFER_FLOAT and os.getenv('CK_PREPROCESS_ON_GPU', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 74 | MODEL_NORMALIZE_DATA    = os.getenv('ML_MODEL_NORMALIZE_DATA') in ('YES', 'yes', 'ON', 'on', '1')
 75 | SUBTRACT_MEAN           = os.getenv('ML_MODEL_SUBTRACT_MEAN', 'YES') in ('YES', 'yes', 'ON', 'on', '1')
 76 | GIVEN_CHANNEL_MEANS     = os.getenv('ML_MODEL_GIVEN_CHANNEL_MEANS', '')
 77 | if GIVEN_CHANNEL_MEANS:
 78 |     GIVEN_CHANNEL_MEANS = np.fromstring(GIVEN_CHANNEL_MEANS, dtype=np.float32, sep=' ').astype(TRANSFER_TYPE_NP)
 79 |     if MODEL_COLOURS_BGR:
 80 |         GIVEN_CHANNEL_MEANS = GIVEN_CHANNEL_MEANS[::-1]     # swapping Red and Blue colour channels
 81 | 
 82 | ## Input image properties:
 83 | #
 84 | IMAGE_DIR               = os.getenv('CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR')
 85 | IMAGE_LIST_FILE         = os.path.join(IMAGE_DIR, os.getenv('CK_ENV_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF'))
 86 | IMAGE_DATA_TYPE         = np.dtype( os.getenv('CK_ENV_DATASET_IMAGENET_PREPROCESSED_DATA_TYPE', 'uint8') )
 87 | 
 88 | ## Misc
 89 | #
 90 | VERBOSITY_LEVEL         = int(os.getenv('CK_VERBOSE', '0'))
 91 | 
 92 | 
 93 | ## ZeroMQ communication setup:
 94 | #
 95 | zmq_context = zmq.Context()
 96 | 
 97 | to_workers = zmq_context.socket(zmq.PUSH)
 98 | to_workers.bind("tcp://*:{}".format(ZMQ_FAN_PORT))
 99 | 
100 | from_workers = zmq_context.socket(zmq.PULL)
101 | from_workers.bind("tcp://*:{}".format(ZMQ_FUNNEL_PORT))
102 | from_workers.RCVTIMEO = 2000
103 | 
104 | 
105 | # Load preprocessed image filepaths:
106 | with open(IMAGE_LIST_FILE, 'r') as f:
107 |     image_path_list = [ os.path.join(IMAGE_DIR, s.strip()) for s in f ]
108 | LOADGEN_DATASET_SIZE = LOADGEN_DATASET_SIZE or len(image_path_list)
109 | 
110 | 
111 | def load_labels(labels_filepath):
112 |     my_labels = []
113 |     input_file = open(labels_filepath, 'r')
114 |     for l in input_file:
115 |         my_labels.append(l.strip())
116 |     return my_labels
117 | 
118 | 
119 | def tick(letter, quantity=1):
120 |     print(letter + (str(quantity) if quantity>1 else ''), end='')
121 | 
122 | 
123 | # Currently loaded preprocessed images are stored in a dictionary:
124 | preprocessed_image_buffer = {}
125 | 
126 | 
127 | def load_query_samples(sample_indices):     # 0-based indices in our whole dataset
128 |     global MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, MODEL_IMAGE_CHANNELS
129 |     global preprocessed_image_buffer
130 | 
131 |     print("load_query_samples({})".format(sample_indices))
132 | 
133 |     tick('B', len(sample_indices))
134 | 
135 |     for sample_index in sample_indices:
136 |         img_filename = image_path_list[sample_index]
137 |         img = np.fromfile(img_filename, IMAGE_DATA_TYPE)
138 |         img = img.reshape((MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, MODEL_IMAGE_CHANNELS))
139 | 
140 |         if PREPROCESS_ON_GPU:
141 |             nhwc_img = img if MODEL_DATA_LAYOUT == 'NHWC' else img.transpose(2,0,1)
142 |             preprocessed_image_buffer[sample_index] = np.array(nhwc_img).ravel() # transfer bytes and unsigned
143 |         else:
144 |             if MODEL_COLOURS_BGR:
145 |                 img = img[...,::-1]     # swapping Red and Blue colour channels
146 | 
147 |             if IMAGE_DATA_TYPE != 'float32':
148 |                 img = img.astype(np.float32)
149 | 
150 |                 # Normalize
151 |                 if MODEL_NORMALIZE_DATA:
152 |                     img = img/127.5 - 1.0
153 | 
154 |                 # Subtract mean value
155 |                 if SUBTRACT_MEAN:
156 |                     if len(GIVEN_CHANNEL_MEANS):
157 |                         img -= GIVEN_CHANNEL_MEANS
158 |                     else:
159 |                         img -= np.mean(img, axis=(0,1), keepdims=True)
160 | 
161 |             if MODEL_INPUT_DATA_TYPE == 'int8' or TRANSFER_TYPE_NP == np.int8:
162 |                 img = np.clip(img, -128, 127)
163 | 
164 |             nhwc_img = img if MODEL_DATA_LAYOUT == 'NHWC' else img.transpose(2,0,1)
165 |             preprocessed_image_buffer[sample_index] = np.array(nhwc_img).ravel().astype(TRANSFER_TYPE_NP) # transfer bytes as signed
166 | 
167 |         tick('l')
168 |     print('')
169 | 
170 | 
171 | def unload_query_samples(sample_indices):
172 |     #print("unload_query_samples({})".format(sample_indices))
173 |     global preprocessed_image_buffer
174 | 
175 |     preprocessed_image_buffer = {}
176 |     tick('U')
177 |     print('')
178 | 
179 | 
180 | openme_data  = {}                   # side-loaded stats
181 | in_progress  = {}                   # local store of metadata about batches between issue_queries and send_responses
182 | funnel_should_be_running = True     # a way for the fan to signal to the funnel_thread to end
183 | warmup_mode              = False    # while on, QuerySampleResponses will not be sent to LoadGen
184 | 
185 | def issue_queries(query_samples):
186 | 
187 |     global BATCH_SIZE
188 | 
189 |     if VERBOSITY_LEVEL:
190 |         printable_query = [(qs.index, qs.id) for qs in query_samples]
191 |         print("issue_queries( {} )".format(printable_query))
192 |     tick('Q', len(query_samples))
193 | 
194 |     for j in range(0, len(query_samples), BATCH_SIZE):
195 |         batch               = query_samples[j:j+BATCH_SIZE]   # NB: the last one may be shorter than BATCH_SIZE in length
196 |         batch_vector_numpy  = np.ravel([ preprocessed_image_buffer[qs.index] for qs in batch ])
197 | 
198 |         job_id      = batch[0].id   # assume it is both sufficiently unique and sufficiently small to fit our needs
199 | 
200 |         in_progress[job_id] = {
201 |             'submission_time':  time.time(),
202 |             'batch':            batch,
203 |         }
204 |     
205 |         if TRANSFER_MODE == 'numpy':
206 |             job_data_struct = {
207 |                 'job_id': job_id,
208 |                 'batch_data': batch_vector_numpy,
209 |             }
210 |             to_workers.send_pyobj(job_data_struct)
211 |         elif TRANSFER_MODE == 'pickle':
212 |             job_data_struct = {
213 |                 'job_id': job_id,
214 |                 'batch_data': np.asarray(batch_vector_numpy),
215 |             }
216 |             to_workers.send_pyobj(job_data_struct)
217 |         elif TRANSFER_MODE == 'raw':
218 |             ## Slower, but insensitive to endianness:
219 |             # batch_vector_list = batch_vector_numpy.tolist()
220 |             # job_data_raw = struct.pack('<I{}{}'.format(len(batch_vector_list), TRANSFER_TYPE_SYMBOL), job_id, *batch_vector_list)
221 | 
222 |             ## Faster, but potentially sensitive to endianness:
223 |             batch_vector_array = np.asarray(batch_vector_numpy)
224 |             job_data_raw   = struct.pack('<I', job_id) + memoryview( batch_vector_array )
225 | 
226 |             to_workers.send(job_data_raw)
227 |         elif TRANSFER_MODE == 'json':
228 |             job_data_struct = {
229 |                 'job_id': job_id,
230 |                 'batch_data': batch_vector_numpy.tolist(),
231 |             }
232 |             to_workers.send_json(job_data_struct)
233 | 
234 |         print("[fan] -> job_id={} {}".format(job_id, [qs.index for qs in batch]))
235 | 
236 | 
237 | def send_responses():
238 | 
239 |     global funnel_should_be_running, warmup_mode, openme_data
240 | 
241 |     funnel_start = time.time()
242 | 
243 |     received_job_timings = openme_data['received_job_timings'] = []
244 |     inference_times_ms_by_worker_id = {}
245 | 
246 |     while funnel_should_be_running:
247 | 
248 |         try:
249 |             done_job            = from_workers.recv_json()
250 |         except Exception as e:
251 |             continue    # go back and check if the funnel_should_be_running condition has been turned off by the main thread
252 | 
253 |         job_id              = done_job['job_id']
254 |         local_metadata      = in_progress.pop(job_id)
255 |         received_timestamp  = time.time()
256 |         roundtrip_time_ms   = (received_timestamp-local_metadata['submission_time'])*1000
257 |         worker_id           = done_job['worker_id']
258 |         inference_time_ms   = done_job['inference_time_ms']
259 |         floatize_time_ms    = done_job['floatize_time_ms']
260 | 
261 |         print("[funnel] <- [worker {}] job_id={}, worker_type_conversion={:.2f} ms, inference={:.2f} ms, roundtrip={:.2f} ms".format(
262 |                             worker_id, job_id, floatize_time_ms, inference_time_ms, roundtrip_time_ms))
263 | 
264 |         received_job_timings.append({
265 |             'job_id':                   job_id,
266 |             'worker_id':                worker_id,
267 |             'received_timestamp':       received_timestamp,
268 |             'worker_floatize_time_ms':  floatize_time_ms,
269 |             'inference_time_ms':        inference_time_ms,
270 |             'roundtrip_time_ms':        roundtrip_time_ms,
271 |         })
272 | 
273 |         if warmup_mode:
274 |             continue
275 | 
276 |         if worker_id not in inference_times_ms_by_worker_id:
277 |             inference_times_ms_by_worker_id[worker_id] = []
278 |         inference_times_ms_by_worker_id[worker_id].append( inference_time_ms )
279 | 
280 |         batch               = local_metadata['batch']
281 |         batch_size          = len(batch)
282 |         raw_batch_results   = np.array(done_job['raw_batch_results'])
283 |         batch_results       = np.split(raw_batch_results, batch_size)
284 | 
285 |         response = []
286 |         response_array_refs = []    # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected.
287 |         for qs, prediction_for_one_sample in zip(batch, batch_results):
288 |             if len(prediction_for_one_sample)==1:
289 |                 predicted_label = prediction_for_one_sample
290 |             else:
291 |                 predicted_label = np.argmax( prediction_for_one_sample[-1000:] )
292 | 
293 |             response_array = array.array("B", np.array(predicted_label, np.float32).tobytes())
294 |             response_array_refs.append(response_array)
295 |             bi = response_array.buffer_info()
296 |             response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1]))
297 |         lg.QuerySamplesComplete(response)
298 |         tick('R', len(response))
299 |         sys.stdout.flush()
300 |     print("[funnel] quitting")
301 | 
302 | 
303 | def flush_queries():
304 |     pass
305 | 
306 | 
307 | def process_latencies(latencies_ns):
308 | 
309 |     global openme_data
310 | 
311 |     latencies_ms = openme_data['loadgen_measured_latencies_ms'] = [ns/1.0e6 for ns in latencies_ns]
312 |     print("LG called process_latencies({})".format(latencies_ms))
313 | 
314 |     latencies_size      = len(latencies_ms)
315 |     latencies_avg       = sum(latencies_ms)/latencies_size
316 |     latencies_sorted    = sorted(latencies_ms)
317 |     latencies_p50       = int(latencies_size * 0.5);
318 |     latencies_p90       = int(latencies_size * 0.9);
319 |     latencies_p99       = int(latencies_size * 0.99);
320 | 
321 |     print("--------------------------------------------------------------------")
322 |     print("|                LATENCIES (in milliseconds and fps)               |")
323 |     print("--------------------------------------------------------------------")
324 |     print("Number of samples run:       {:9d}".format(latencies_size))
325 |     print("Min latency:                 {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[0], 1e3/latencies_sorted[0]))
326 |     print("Median latency:              {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[latencies_p50], 1e3/latencies_sorted[latencies_p50]))
327 |     print("Average latency:             {:9.2f} ms   ({:.3f} fps)".format(latencies_avg, 1e3/latencies_avg))
328 |     print("90 percentile latency:       {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[latencies_p90], 1e3/latencies_sorted[latencies_p90]))
329 |     print("99 percentile latency:       {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[latencies_p99], 1e3/latencies_sorted[latencies_p99]))
330 |     print("Max latency:                 {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[-1], 1e3/latencies_sorted[-1]))
331 |     print("--------------------------------------------------------------------")
332 | 
333 | 
334 | def benchmark_using_loadgen():
335 |     "Perform the benchmark using python API for the LoadGen library"
336 | 
337 |     global funnel_should_be_running, warmup_mode, openme_data
338 | 
339 |     scenario = {
340 |         'SingleStream':     lg.TestScenario.SingleStream,
341 |         'MultiStream':      lg.TestScenario.MultiStream,
342 |         'Server':           lg.TestScenario.Server,
343 |         'Offline':          lg.TestScenario.Offline,
344 |     }[LOADGEN_SCENARIO]
345 | 
346 |     mode = {
347 |         'AccuracyOnly':     lg.TestMode.AccuracyOnly,
348 |         'PerformanceOnly':  lg.TestMode.PerformanceOnly,
349 |         'SubmissionRun':    lg.TestMode.SubmissionRun,
350 |     }[LOADGEN_MODE]
351 | 
352 |     ts = lg.TestSettings()
353 |     if LOADGEN_CONFIG_FILE:
354 |         ts.FromConfig(LOADGEN_CONFIG_FILE, LOADGEN_MODEL_NAME, LOADGEN_SCENARIO)
355 |     ts.scenario = scenario
356 |     ts.mode     = mode
357 | 
358 |     if LOADGEN_MULTISTREAMNESS:
359 |         ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)
360 | 
361 |     if LOADGEN_MAX_DURATION_S:
362 |         ts.max_duration_ms = int(LOADGEN_MAX_DURATION_S)*1000
363 | 
364 |     if LOADGEN_COUNT_OVERRIDE:
365 |         ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE)
366 |         ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE)
367 | 
368 |     if LOADGEN_TARGET_QPS:
369 |         target_qps                  = float(LOADGEN_TARGET_QPS)
370 |         ts.multi_stream_target_qps  = target_qps
371 |         ts.server_target_qps        = target_qps
372 |         ts.offline_expected_qps     = target_qps
373 | 
374 |     sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
375 |     qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples)
376 | 
377 |     log_settings = lg.LogSettings()
378 |     log_settings.enable_trace = False
379 | 
380 |     funnel_thread = threading.Thread(target=send_responses, args=())
381 |     funnel_should_be_running = True
382 |     funnel_thread.start()
383 | 
384 |     if LOADGEN_WARMUP_SAMPLES:
385 |         warmup_id_range = list(range(LOADGEN_WARMUP_SAMPLES))
386 |         load_query_samples(warmup_id_range)
387 | 
388 |         warmup_mode = True
389 |         print("Sending out the warm-up samples, waiting for responses...")
390 |         issue_queries([lg.QuerySample(id,id) for id in warmup_id_range])
391 | 
392 |         while len(in_progress)>0:       # waiting for the in_progress queue to clear up
393 |             time.sleep(1)
394 |         print(" Done!")
395 | 
396 |         warmup_mode = False
397 | 
398 |     lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)
399 | 
400 |     funnel_should_be_running = False    # politely ask the funnel_thread to end
401 |     funnel_thread.join()                # wait for it to actually end
402 | 
403 |     from_workers.close()
404 |     to_workers.close()
405 | 
406 |     lg.DestroyQSL(qsl)
407 |     lg.DestroySUT(sut)
408 | 
409 |     if SIDELOAD_JSON:
410 |         with open(SIDELOAD_JSON, 'w') as sideload_fd:
411 |             json.dump(openme_data, sideload_fd, indent=4, sort_keys=True)
412 | 
413 | 
414 | benchmark_using_loadgen()
415 | 


--------------------------------------------------------------------------------
/program/zpp-worker-tensorrt-py/zpp_worker_trt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import numpy as np
  4 | import os
  5 | import struct
  6 | import time
  7 | import zmq
  8 | 
  9 | import tensorrt as trt
 10 | import pycuda.driver as cuda
 11 | import pycuda.autoinit
 12 | import pycuda.tools
 13 | 
 14 | 
 15 | ## ZMQ ports:
 16 | #
 17 | ZMQ_FAN_PORT            = os.getenv('CK_ZMQ_FAN_PORT', 5557)
 18 | ZMQ_FUNNEL_PORT         = os.getenv('CK_ZMQ_FUNNEL_PORT', 5558)
 19 | 
 20 | ## Worker properties:
 21 | #
 22 | HUB_IP                  = os.getenv('CK_HUB_IP', 'localhost')
 23 | JOBS_LIMIT              = int(os.getenv('CK_WORKER_JOB_LIMIT', 0))
 24 | WORKER_ID               = os.getenv('CK_WORKER_ID') or os.getpid()
 25 | WORKER_OUTPUT_FORMAT    = os.getenv('CK_WORKER_OUTPUT_FORMAT', 'softmax')
 26 | WORKER_POSTWORK_TIMEOUT_S = os.getenv('CK_WORKER_POSTWORK_TIMEOUT_S', '')  # empty string means no timeout
 27 | 
 28 | ## Model properties:
 29 | #
 30 | MODEL_PATH              = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME']
 31 | MODEL_PLUGIN_PATH       = os.getenv('CK_ENV_TENSORRT_PLUGIN_PATH', os.getenv('ML_MODEL_TENSORRT_PLUGIN',''))
 32 | 
 33 | if MODEL_PLUGIN_PATH:
 34 |     import ctypes
 35 |     if not os.path.isfile(MODEL_PLUGIN_PATH):
 36 |         raise IOError("{}\n{}\n".format(
 37 |             "Failed to load library ({}).".format(MODEL_PLUGIN_PATH),
 38 |             "Please build the plugin."
 39 |         ))
 40 |     ctypes.CDLL(MODEL_PLUGIN_PATH)
 41 | 
 42 | MODEL_DATA_LAYOUT       = os.getenv('ML_MODEL_DATA_LAYOUT', 'NCHW')
 43 | MODEL_COLOURS_BGR       = os.getenv('ML_MODEL_COLOUR_CHANNELS_BGR', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 44 | MODEL_INPUT_DATA_TYPE   = os.getenv('ML_MODEL_INPUT_DATA_TYPE', 'float32')
 45 | MODEL_DATA_TYPE         = os.getenv('ML_MODEL_DATA_TYPE', '(unknown)')
 46 | MODEL_SOFTMAX_LAYER     = os.getenv('CK_ENV_ONNX_MODEL_OUTPUT_LAYER_NAME', os.getenv('CK_ENV_TENSORFLOW_MODEL_OUTPUT_LAYER_NAME', ''))
 47 | MODEL_SUBTRACT_MEAN     = os.getenv('ML_MODEL_SUBTRACT_MEAN', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 48 | if MODEL_SUBTRACT_MEAN:
 49 |     MODEL_GIVEN_CHANNEL_MEANS = os.getenv('ML_MODEL_GIVEN_CHANNEL_MEANS', '0.0 0.0 0.0')
 50 |     channel_means = np.fromstring(MODEL_GIVEN_CHANNEL_MEANS, dtype=np.float32, sep=' ')
 51 |     if MODEL_COLOURS_BGR:
 52 |         channel_means = channel_means[::-1]     # swapping Red and Blue colour channels
 53 | 
 54 | ## Transfer mode (numpy floats by default):
 55 | #
 56 | TRANSFER_MODE           = os.getenv('CK_TRANSFER_MODE', 'numpy')
 57 | TRANSFER_FLOAT          = os.getenv('CK_TRANSFER_FLOAT', 'YES') in ('YES', 'yes', 'ON', 'on', '1')
 58 | PREPROCESS_ON_GPU       = (TRANSFER_FLOAT == False) and (TRANSFER_MODE != 'json') and os.getenv('CK_PREPROCESS_ON_GPU', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 59 | CONVERSION_NEEDED       = (TRANSFER_FLOAT == False) and (MODEL_INPUT_DATA_TYPE == 'float32')
 60 | CONVERSION_TYPE_SYMBOL  = 'f' if (MODEL_INPUT_DATA_TYPE == 'float32') else 'b'
 61 | ID_SIZE_IN_BYTES        = 4 # assuming uint32
 62 | 
 63 | ## ZeroMQ communication setup:
 64 | #
 65 | zmq_context = zmq.Context()
 66 | 
 67 | from_factory = zmq_context.socket(zmq.PULL)
 68 | from_factory.connect('tcp://{}:{}'.format(HUB_IP, ZMQ_FAN_PORT))
 69 | if WORKER_POSTWORK_TIMEOUT_S != '':
 70 |     from_factory.RCVTIMEO = int(WORKER_POSTWORK_TIMEOUT_S)*1000   # expects milliseconds
 71 | 
 72 | to_funnel = zmq_context.socket(zmq.PUSH)
 73 | to_funnel.connect('tcp://{}:{}'.format(HUB_IP, ZMQ_FUNNEL_PORT))
 74 | 
 75 | 
 76 | ## CUDA/TensorRT model setup:
 77 | #
 78 | pycuda_context = pycuda.tools.make_default_context()
 79 | 
 80 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
 81 | try:
 82 |     trt.init_libnvinfer_plugins(TRT_LOGGER, "")
 83 |     with open(MODEL_PATH, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
 84 |         serialized_engine = f.read()
 85 |         trt_engine = runtime.deserialize_cuda_engine(serialized_engine)
 86 |         trt_version = [ int(v) for v in trt.__version__.split('.') ]
 87 |         print('[TensorRT v{}.{}] successfully loaded'.format(trt_version[0], trt_version[1]))
 88 | except:
 89 |     pycuda_context.pop()
 90 |     raise RuntimeError('TensorRT model file {} is not found or corrupted'.format(MODEL_PATH))
 91 | 
 92 | max_batch_size      = trt_engine.max_batch_size
 93 | 
 94 | d_inputs, h_d_outputs, model_bindings = [], [], []
 95 | for interface_layer in trt_engine:
 96 |     dtype   = trt_engine.get_binding_dtype(interface_layer)
 97 |     shape   = trt_engine.get_binding_shape(interface_layer)
 98 |     fmt     = trt_engine.get_binding_format(trt_engine.get_binding_index(interface_layer)) if trt_version[0] >= 6 else None
 99 | 
100 |     if fmt and fmt == trt.TensorFormat.CHW4 and trt_engine.binding_is_input(interface_layer):
101 |         shape[-3] = ((shape[-3] - 1) // 4 + 1) * 4
102 |     size    = trt.volume(shape) * max_batch_size
103 | 
104 |     dev_mem = cuda.mem_alloc(size * dtype.itemsize)
105 |     model_bindings.append( int(dev_mem) )
106 | 
107 |     if trt_engine.binding_is_input(interface_layer):
108 |         interface_type = 'Input'
109 |         d_inputs.append(dev_mem)
110 |         model_input_shape       = shape
111 |         model_input_type_size   = dtype.itemsize
112 |         if CONVERSION_NEEDED:
113 |             d_preconverted_input = cuda.mem_alloc(size * 1)
114 |     else:
115 |         interface_type = 'Output'
116 |         host_mem    = cuda.pagelocked_empty(size, trt.nptype(dtype))
117 |         h_d_outputs.append({ 'host_mem': host_mem, 'dev_mem': dev_mem })
118 |         if MODEL_SOFTMAX_LAYER=='' or interface_layer == MODEL_SOFTMAX_LAYER:
119 |             model_output_shape  = shape
120 |             h_output            = host_mem
121 | 
122 |     print("{} layer {}: dtype={}, shape={}, elements_per_max_batch={}".format(interface_type, interface_layer, dtype, shape, size))
123 | 
124 | cuda_stream         = cuda.Stream()
125 | input_volume        = trt.volume(model_input_shape)     # total number of monochromatic subpixels (before batching)
126 | output_volume       = trt.volume(model_output_shape)    # total number of elements in one image prediction (before batching)
127 | 
128 | if MODEL_DATA_LAYOUT == 'NHWC':
129 |     (MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, MODEL_IMAGE_CHANNELS) = model_input_shape
130 | else:
131 |     (MODEL_IMAGE_CHANNELS, MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH) = model_input_shape
132 | 
133 | print("Data layout: {}".format(MODEL_DATA_LAYOUT) )
134 | print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT))
135 | print('Model image width: {}'.format(MODEL_IMAGE_WIDTH))
136 | print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS))
137 | print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE))
138 | print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE))
139 | print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR))
140 | print('Model max_batch_size: {}'.format(max_batch_size))
141 | print('Model input_volume: {}'.format(input_volume))
142 | print('Model output_volume: {}'.format(output_volume))
143 | print('Image transfer mode: {}'.format(TRANSFER_MODE))
144 | print('Transferred images need to be converted to the input data type of the model: {}'.format(CONVERSION_NEEDED))
145 | print('Transferred images need to be preprocessed (e.g. by subtracting means): {}'.format(PREPROCESS_ON_GPU))
146 | print('Worker output format: {}'.format(WORKER_OUTPUT_FORMAT))
147 | 
148 | if CONVERSION_NEEDED:
149 |     compilation_start = time.time();
150 | 
151 |     from pycuda.compiler import SourceModule
152 |     # Define type conversion kernels and more. NB: Must be done after initializing CUDA context.
153 |     source_module = SourceModule(source="""
154 |     // See all type converstion (cast) built-in functionshere:
155 |     // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__CAST.html
156 |     // Convert signed 8-bit integer to 32-bit floating-point using round-to-nearest-even mode.
157 |     __global__ void convert_int8_to_fp32(
158 |         float * __restrict__ out, const signed char * __restrict__ in, long num_elems
159 |     )
160 |     {
161 |         long idx = threadIdx.x + blockIdx.x * blockDim.x;
162 |         if (idx >=  num_elems)
163 |             return;
164 | 
165 |         out[idx] = __int2float_rn( (signed int) in[idx] );
166 |     }
167 | 
168 |     // Convert unsigned 8-bit integer to 32-bit floating-point using round-to-nearest-even mode.
169 |     __global__ void convert_uint8_to_fp32(
170 |         float * __restrict__ out, const unsigned char * __restrict__ in, long num_elems
171 |     )
172 |     {
173 |         long idx = threadIdx.x + blockIdx.x * blockDim.x;
174 |         if (idx >=  num_elems)
175 |             return;
176 | 
177 |         out[idx] = __int2float_rn( (unsigned int) in[idx] );
178 |     }
179 | 
180 |     // Subtract channel means assuming NCHW layout.
181 |     __global__ void subtract_means(float * data,
182 |         float R_mean, float G_mean, float B_mean,
183 |         long HW, // H*W
184 |         long num_elems // N*C*H*W
185 |     )
186 |     {
187 |         long idx = threadIdx.x + blockIdx.x * blockDim.x;
188 |         if (idx >= num_elems)
189 |             return;
190 | 
191 |         switch ( (idx / HW) % 3 )
192 |         {
193 |             case 0:
194 |                 data[idx] -= R_mean;
195 |                 break;
196 |             case 1:
197 |                 data[idx] -= G_mean;
198 |                 break;
199 |             case 2:
200 |                 data[idx] -= B_mean;
201 |                 break;
202 |         }
203 |     }
204 | 
205 |     // Convert unsigned 8-bit integer to 32-bit floating-point using round-to-nearest-even mode,
206 |     // and then subtract RGB channel means assuming NCHW layout.
207 |     __global__ void convert_uint8_to_fp32_and_subtract_means(
208 |         float * __restrict__ out, const unsigned char * __restrict__ in,
209 |         float R_mean, float G_mean, float B_mean,
210 |         long HW, // H*W
211 |         long num_elems // N*C*H*W
212 |     )
213 |     {
214 |         long idx = threadIdx.x + blockIdx.x * blockDim.x;
215 |         if (idx >= num_elems)
216 |             return;
217 | 
218 |         // Convert.
219 |         out[idx] = __int2float_rn( (unsigned int) in[idx] );
220 | 
221 |         // Subtract means.
222 |         switch ( (idx / HW) % 3 )
223 |         {
224 |             case 0:
225 |                 out[idx] -= R_mean;
226 |                 break;
227 |             case 1:
228 |                 out[idx] -= G_mean;
229 |                 break;
230 |             case 2:
231 |                 out[idx] -= B_mean;
232 |                 break;
233 |         }
234 |     }
235 |     """, cache_dir=False)
236 | 
237 |     if PREPROCESS_ON_GPU and MODEL_SUBTRACT_MEAN:
238 |         conversion_kernel_name = 'convert_uint8_to_fp32_and_subtract_means' if CONVERSION_TYPE_SYMBOL == 'f' else None
239 |         conversion_kernel = source_module.get_function(conversion_kernel_name)
240 | #        subtract_means_kernel = source_module.get_function('subtract_means')
241 | #        conversion_kernel_name = 'convert_uint8_to_fp32' if CONVERSION_TYPE_SYMBOL == 'f' else None
242 | #        conversion_kernel = source_module.get_function(conversion_kernel_name)
243 |     elif not PREPROCESS_ON_GPU:
244 |         conversion_kernel_name = 'convert_int8_to_fp32' if CONVERSION_TYPE_SYMBOL == 'f' else None
245 |         conversion_kernel = source_module.get_function(conversion_kernel_name)
246 | 
247 |     compilation_time_ms = (time.time() - compilation_start)*1000
248 |     print("Compilation time of GPU kernel(s): {:.2f} ms".format(compilation_time_ms))
249 | 
250 | print("")
251 | print("[worker {}] Ready to run inference on batches up to {} samples".format(WORKER_ID, max_batch_size))
252 | 
253 | 
254 | ## Main inference loop:
255 | #
256 | with trt_engine.create_execution_context() as trt_context:
257 |     done_count = 0
258 |     total_inference_time = 0
259 |     while JOBS_LIMIT<1 or done_count < JOBS_LIMIT:
260 | 
261 |         wait_and_receive_start = time.time()
262 | 
263 |         try:
264 |             if TRANSFER_MODE == 'dummy':
265 |                 job_data_raw    = from_factory.recv()
266 |             elif TRANSFER_MODE == 'raw':
267 |                 job_data_raw    = memoryview( from_factory.recv() )
268 |             elif TRANSFER_MODE == 'json':
269 |                 job_data_struct = from_factory.recv_json()
270 |             elif TRANSFER_MODE in ('pickle', 'numpy'):
271 |                 job_data_struct = from_factory.recv_pyobj()
272 |         except zmq.error.Again as e:    # ZeroMQ's timeout exception
273 |             if done_count==0:
274 |                 print('.', end='', flush=True)
275 |                 continue
276 |             else:
277 |                 print("Having done {} inference cycles, leaving after a timeout of {} seconds".format(
278 |                                 done_count, WORKER_POSTWORK_TIMEOUT_S))
279 |                 break
280 | 
281 |         # FIXME: floatize -> conversion?
282 |         floatize_start = time.time()
283 | 
284 |         if TRANSFER_MODE == 'dummy':
285 |             job_id, batch_size  = struct.unpack('ii', job_data_raw)
286 |             converted_batch = None
287 |         else:
288 |             if TRANSFER_MODE == 'raw':
289 |                 job_id          = struct.unpack('<I', job_data_raw[:ID_SIZE_IN_BYTES])[0]
290 |                 batch_data      = job_data_raw[ID_SIZE_IN_BYTES:]
291 |                 num_raw_bytes   = len(batch_data)
292 | 
293 |                 if CONVERSION_NEEDED:
294 |                     batch_size      = num_raw_bytes // input_volume
295 |                     converted_batch = None
296 |                 else:
297 |                     batch_size      = num_raw_bytes // input_volume // model_input_type_size
298 |                     converted_batch = batch_data
299 |             elif TRANSFER_MODE in ('json', 'pickle', 'numpy'):
300 |                 job_id      = job_data_struct['job_id']
301 |                 batch_data  = job_data_struct['batch_data']
302 |                 batch_size  = len(batch_data) // input_volume
303 |                 if type(batch_data)==list: # json
304 |                     converted_batch = struct.pack("{}{}".format(len(batch_data), CONVERSION_TYPE_SYMBOL), *batch_data)
305 |                 elif CONVERSION_NEEDED: # pickle, numpy
306 |                     converted_batch = None
307 |                 else:
308 |                     converted_batch = batch_data
309 | 
310 |             if converted_batch is not None:
311 |                 memcpy_htod_start = time.time()
312 |                 cuda.memcpy_htod_async(d_inputs[0], converted_batch, cuda_stream) # assuming one input layer for image classification
313 |                 memcpy_htod_time_ms = (time.time() - memcpy_htod_start)*1000
314 |             else: # raw, pickle, numpy if CONVERSION_NEEDED
315 |                 # TODO: Read max dimensions from CUDA info. Currently taken from TX2.
316 |                 max_block_dim_x = 1024
317 |                 max_grid_dim_x = 2147483647
318 |                 max_x = max_block_dim_x * max_grid_dim_x
319 |                 # The kernel processes one element per thread, so we cannot exceed max_x elements.
320 |                 num_elems = len(batch_data)
321 |                 if num_elems >= max_x:
322 |                     print("Error: Number of elements exceeds max dimension X: {} >= {}".format(num_elems, max_x))
323 |                     pass
324 |                 # Copy input to the GPU.
325 |                 memcpy_htod_start = time.time()
326 |                 cuda.memcpy_htod_async(d_preconverted_input, batch_data, cuda_stream)
327 |                 memcpy_htod_time_ms = (time.time() - memcpy_htod_start )*1000
328 |                 # One thread per element. TODO: Number of threads can be tuned down e.g. halved.
329 |                 block_dim_x = int( max_block_dim_x / 1 )
330 |                 grid_dim_x = int( (num_elems + block_dim_x - 1) / block_dim_x )
331 |                 if PREPROCESS_ON_GPU:
332 |                     if MODEL_SUBTRACT_MEAN:
333 |                         (R_mean, G_mean, B_mean) = channel_means
334 |                         conversion_kernel(d_inputs[0], d_preconverted_input,
335 |                             R_mean, G_mean, B_mean, np.int64(MODEL_IMAGE_HEIGHT*MODEL_IMAGE_WIDTH), np.int64(num_elems), grid=(grid_dim_x,1,1), block=(block_dim_x,1,1))
336 |                     # TODO: Implement other transforms e.g. normalization.
337 |                 else:
338 |                     conversion_kernel(d_inputs[0], d_preconverted_input, np.int64(num_elems), grid=(grid_dim_x,1,1), block=(block_dim_x,1,1))
339 | 
340 | 
341 |         if batch_size > max_batch_size:   # basic protection. FIXME: could report to hub, could split and still do inference...
342 |             print("[worker {}] unable to perform inference on {}-sample batch. Skipping it.".format(WORKER_ID, batch_size))
343 |             continue
344 | 
345 |         inference_start = time.time()
346 | 
347 |         if TRANSFER_MODE != 'dummy':
348 |             trt_context.execute_async(bindings=model_bindings, batch_size=batch_size, stream_handle=cuda_stream.handle)
349 |             for output in h_d_outputs:
350 |                 cuda.memcpy_dtoh_async(output['host_mem'], output['dev_mem'], cuda_stream)
351 |             cuda_stream.synchronize()
352 | 
353 |         inference_time_ms           = (time.time() - inference_start)*1000 + memcpy_htod_time_ms
354 |         floatize_time_ms            = (inference_start-floatize_start)*1000 - memcpy_htod_time_ms
355 |         wait_and_receive_time_ms    = (floatize_start-wait_and_receive_start)*1000
356 | 
357 |         if TRANSFER_MODE == 'dummy':        # no inference - fake a batch
358 |             merged_batch_predictions = [ 0 ] * output_volume * batch_size
359 |         else:
360 |             batch_results = h_output[:output_volume * batch_size].tolist()
361 | 
362 |             if WORKER_OUTPUT_FORMAT == 'direct_return':
363 |                 merged_batch_predictions = batch_results
364 | 
365 |             elif WORKER_OUTPUT_FORMAT == 'softmax':
366 |                 if output_volume == 1:          # model returns argmax - fake the softmax by padding with 1000 zeros (1001 overall)
367 |                     merged_batch_predictions = []
368 |                     for arg_max in batch_results:
369 |                         merged_batch_predictions.extend( [0]*(arg_max +1) + [1] + [0]*(1000-arg_max-1) )
370 |                 else:                           # model returns softmax - just pass it on
371 |                     merged_batch_predictions = batch_results
372 | 
373 |             elif WORKER_OUTPUT_FORMAT == 'argmax':
374 |                 if output_volume == 1:          # model returns argmax - just pass it on
375 |                     merged_batch_predictions = batch_results
376 |                 else:                           # model returns softmax - filter it to return argmax
377 |                     merged_batch_predictions = []
378 | 
379 |                     for j in range(batch_size): # walk through the batch and append individual argmaxen
380 |                         one_argmax = max(zip(batch_results[j*1001:(j+1)*1001], range(1001)))[1]-1
381 |                         merged_batch_predictions.append( one_argmax )
382 | 
383 |         response = {
384 |             'job_id': job_id,
385 |             'worker_id': WORKER_ID,
386 |             'wait_and_receive_time_ms': wait_and_receive_time_ms,
387 |             'floatize_time_ms': floatize_time_ms,
388 |             'inference_time_ms': inference_time_ms,
389 |             'raw_batch_results': merged_batch_predictions,
390 |         }
391 | 
392 |         to_funnel.send_json(response)
393 | 
394 |         print("[worker {}] classified job_id={} [{}] in {:.2f} ms (after spending {:.2f} ms on waiting+receiving AND {:.2f} ms on type conversion)".format(WORKER_ID, job_id, batch_size, inference_time_ms, wait_and_receive_time_ms, floatize_time_ms))
395 |         total_inference_time += inference_time_ms
396 | 
397 |         done_count += 1
398 | 
399 |     print("[worker {}] Total inference time: {}s".format(WORKER_ID, total_inference_time))
400 | 
401 | pycuda_context.pop()
402 | 


--------------------------------------------------------------------------------
/program/object-detection-zpp-hub-loadgen-py/zpp_hub_detect_loadgen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import array
  4 | import json
  5 | import os
  6 | import struct
  7 | import sys
  8 | import threading
  9 | import time
 10 | 
 11 | import numpy as np
 12 | import zmq
 13 | import mlperf_loadgen as lg
 14 | 
 15 | 
 16 | ###########################################################################################################
 17 | ## NB: if you run into "zmq.error.ZMQError: Address already in use" after a crash,
 18 | ##     run
 19 | ##          kill `netstat -ltnp 2>/dev/null | grep python | grep 5557 | awk '{print $7}' | sed 's/\/.*//'`
 20 | ##     to stop the socket-hogging process.
 21 | ###########################################################################################################
 22 | 
 23 | 
 24 | ## ZMQ ports:
 25 | #
 26 | ZMQ_FAN_PORT            = os.getenv('CK_ZMQ_FAN_PORT', 5557)
 27 | ZMQ_FUNNEL_PORT         = os.getenv('CK_ZMQ_FUNNEL_PORT', 5558)
 28 | 
 29 | ## LoadGen test properties:
 30 | #
 31 | LOADGEN_SCENARIO            = os.getenv('CK_LOADGEN_SCENARIO', 'SingleStream')
 32 | LOADGEN_MODE                = os.getenv('CK_LOADGEN_MODE', 'AccuracyOnly')
 33 | LOADGEN_BUFFER_SIZE         = int(os.getenv('CK_LOADGEN_BUFFER_SIZE'))      # set to how many samples are you prepared to keep in memory at once
 34 | LOADGEN_DATASET_SIZE        = int(os.getenv('CK_LOADGEN_DATASET_SIZE'))     # set to how many total samples to choose from (0 = full set)
 35 | LOADGEN_CONFIG_FILE         = os.getenv('CK_ENV_LOADGEN_CONFIG_FILE', '')   # Very Important: make sure 'pass_env_to_resolve' is on
 36 | LOADGEN_MULTISTREAMNESS     = os.getenv('CK_LOADGEN_MULTISTREAMNESS', '')   # if not set, use value from LoadGen's config file, or LoadGen code
 37 | LOADGEN_MAX_DURATION_S      = os.getenv('CK_LOADGEN_MAX_DURATION_S', '')    # if not set, use value from LoadGen's config file, or LoadGen code
 38 | LOADGEN_COUNT_OVERRIDE      = os.getenv('CK_LOADGEN_COUNT_OVERRIDE', '')
 39 | LOADGEN_TARGET_QPS          = os.getenv('CK_LOADGEN_TARGET_QPS', '')        # Maps to differently named internal config options, depending on scenario - see below.
 40 | LOADGEN_WARMUP_SAMPLES      = int(os.getenv('CK_LOADGEN_WARMUP_SAMPLES', '0'))
 41 | BATCH_SIZE                  = int(os.getenv('CK_BATCH_SIZE', '1'))
 42 | SIDELOAD_JSON               = os.getenv('CK_LOADGEN_SIDELOAD_JSON','')
 43 | 
 44 | ## Model properties:
 45 | #
 46 | MODEL_PATH              = os.environ['CK_ENV_TENSORRT_MODEL_FILENAME']
 47 | MODEL_DATA_LAYOUT       = os.getenv('ML_MODEL_DATA_LAYOUT', 'NCHW')
 48 | LABELS_PATH             = os.getenv('CK_ENV_TENSORRT_MODEL_FLATLABELS_FILE') or os.environ['ML_MODEL_CLASS_LABELS']
 49 | MODEL_COLOURS_BGR       = os.getenv('ML_MODEL_COLOUR_CHANNELS_BGR', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 50 | MODEL_INPUT_DATA_TYPE   = os.getenv('ML_MODEL_INPUT_DATA_TYPE', 'float32')
 51 | MODEL_DATA_TYPE         = os.getenv('ML_MODEL_DATA_TYPE', '(unknown)')
 52 | MODEL_MAX_PREDICTIONS   = int(os.getenv('ML_MODEL_MAX_PREDICTIONS', 100))
 53 | MODEL_IMAGE_HEIGHT      = int(os.getenv('ML_MODEL_IMAGE_HEIGHT',
 54 |                               os.getenv('CK_ENV_ONNX_MODEL_IMAGE_HEIGHT',
 55 |                               os.getenv('CK_ENV_TENSORFLOW_MODEL_IMAGE_HEIGHT',
 56 |                               ''))))
 57 | MODEL_IMAGE_WIDTH       = int(os.getenv('ML_MODEL_IMAGE_WIDTH',
 58 |                               os.getenv('CK_ENV_ONNX_MODEL_IMAGE_WIDTH',
 59 |                               os.getenv('CK_ENV_TENSORFLOW_MODEL_IMAGE_WIDTH',
 60 |                               ''))))
 61 | MODEL_IMAGE_CHANNELS    = 3
 62 | 
 63 | MODEL_SKIPPED_CLASSES   = os.getenv("ML_MODEL_SKIPS_ORIGINAL_DATASET_CLASSES", None)
 64 | if (MODEL_SKIPPED_CLASSES):
 65 |     SKIPPED_CLASSES = [int(x) for x in MODEL_SKIPPED_CLASSES.split(",")]
 66 | else:
 67 |     SKIPPED_CLASSES = None
 68 | 
 69 | 
 70 | ## Data transfer (numpy floats by default):
 71 | #
 72 | TRANSFER_MODE           = os.getenv('CK_TRANSFER_MODE', 'numpy')
 73 | TRANSFER_FLOAT          = (os.getenv('CK_TRANSFER_FLOAT', 'YES') in ('YES', 'yes', 'ON', 'on', '1')) and (MODEL_INPUT_DATA_TYPE == 'float32')
 74 | TRANSFER_TYPE_NP, TRANSFER_TYPE_SYMBOL = (np.float32, 'f') if TRANSFER_FLOAT else (np.int8, 'b')
 75 | 
 76 | ## Image normalization:
 77 | #
 78 | PREPROCESS_ON_GPU       = not TRANSFER_FLOAT and os.getenv('CK_PREPROCESS_ON_GPU', 'NO') in ('YES', 'yes', 'ON', 'on', '1')
 79 | 
 80 | MODEL_NORMALIZE_DATA    = os.getenv('ML_MODEL_NORMALIZE_DATA') in ('YES', 'yes', 'ON', 'on', '1')
 81 | MODEL_NORMALIZE_LOWER   = float(os.getenv('ML_MODEL_NORMALIZE_LOWER', -1.0))
 82 | MODEL_NORMALIZE_UPPER   = float(os.getenv('ML_MODEL_NORMALIZE_UPPER',  1.0))
 83 | 
 84 | SUBTRACT_MEAN           = os.getenv('ML_MODEL_SUBTRACT_MEAN', 'YES') in ('YES', 'yes', 'ON', 'on', '1')
 85 | GIVEN_CHANNEL_MEANS     = os.getenv('ML_MODEL_GIVEN_CHANNEL_MEANS', '')
 86 | if GIVEN_CHANNEL_MEANS:
 87 |     GIVEN_CHANNEL_MEANS = np.fromstring(GIVEN_CHANNEL_MEANS, dtype=np.float32, sep=' ')
 88 |     if MODEL_COLOURS_BGR:
 89 |         GIVEN_CHANNEL_MEANS = GIVEN_CHANNEL_MEANS[::-1]     # swapping Red and Blue colour channels
 90 | 
 91 | GIVEN_CHANNEL_STDS      = os.getenv('ML_MODEL_GIVEN_CHANNEL_STDS', '')
 92 | if GIVEN_CHANNEL_STDS:
 93 |     GIVEN_CHANNEL_STDS = np.fromstring(GIVEN_CHANNEL_STDS, dtype=np.float32, sep=' ')
 94 |     if MODEL_COLOURS_BGR:
 95 |         GIVEN_CHANNEL_STDS  = GIVEN_CHANNEL_STDS[::-1]      # swapping Red and Blue colour channels
 96 | 
 97 | 
 98 | ## Input image properties:
 99 | #
100 | IMAGE_DIR               = os.getenv('CK_ENV_DATASET_OBJ_DETECTION_PREPROCESSED_DIR')
101 | IMAGE_LIST_FILE_NAME    = os.getenv('CK_ENV_DATASET_OBJ_DETECTION_PREPROCESSED_SUBSET_FOF')
102 | IMAGE_LIST_FILE         = os.path.join(IMAGE_DIR, IMAGE_LIST_FILE_NAME)
103 | IMAGE_DATA_TYPE         = os.getenv('CK_ENV_DATASET_OBJ_DETECTION_PREPROCESSED_DATA_TYPE', 'uint8')
104 | 
105 | ## Misc
106 | #
107 | VERBOSITY_LEVEL         = int(os.getenv('CK_VERBOSE', '0'))
108 | 
109 | 
110 | ## ZeroMQ communication setup:
111 | #
112 | zmq_context = zmq.Context()
113 | 
114 | to_workers = zmq_context.socket(zmq.PUSH)
115 | to_workers.bind("tcp://*:{}".format(ZMQ_FAN_PORT))
116 | 
117 | from_workers = zmq_context.socket(zmq.PULL)
118 | from_workers.bind("tcp://*:{}".format(ZMQ_FUNNEL_PORT))
119 | from_workers.RCVTIMEO = 2000
120 | 
121 | 
122 | # Load preprocessed image filepaths:
123 | image_path_list = []
124 | original_w_h    = []
125 | with open(IMAGE_LIST_FILE, 'r') as f:
126 |     for line in f:
127 |         file_name, width, height = line.strip().split(";")
128 |         image_path_list.append( os.path.join(IMAGE_DIR, file_name) )
129 |         original_w_h.append( (int(width), int(height)) )
130 | 
131 | LOADGEN_DATASET_SIZE = LOADGEN_DATASET_SIZE or len(image_path_list)
132 | 
133 | 
134 | def load_labels(labels_filepath):
135 |     my_labels = []
136 |     input_file = open(labels_filepath, 'r')
137 |     for l in input_file:
138 |         my_labels.append(l.strip())
139 |     return my_labels
140 | 
141 | 
142 | def tick(letter, quantity=1):
143 |     print(letter + (str(quantity) if quantity>1 else ''), end='')
144 | 
145 | 
146 | # Currently loaded preprocessed images are stored in a dictionary:
147 | preprocessed_image_buffer = {}
148 | 
149 | labels          = load_labels(LABELS_PATH)
150 | bg_class_offset = 1
151 | class_map       = None
152 | if (SKIPPED_CLASSES):
153 |     class_map = []
154 |     for i in range(len(labels) + bg_class_offset):
155 |         if i not in SKIPPED_CLASSES:
156 |             class_map.append(i)
157 | 
158 | 
159 | def load_query_samples(sample_indices):     # 0-based indices in our whole dataset
160 |     global MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, MODEL_IMAGE_CHANNELS
161 |     global preprocessed_image_buffer
162 | 
163 |     print("load_query_samples({})".format(sample_indices))
164 | 
165 |     tick('B', len(sample_indices))
166 | 
167 |     for sample_index in sample_indices:
168 |         img_filepath = image_path_list[sample_index]
169 |         img = np.fromfile(img_filepath, np.dtype(IMAGE_DATA_TYPE))
170 |         img = img.reshape((MODEL_IMAGE_HEIGHT, MODEL_IMAGE_WIDTH, MODEL_IMAGE_CHANNELS))
171 | 
172 |         if PREPROCESS_ON_GPU:
173 |             nhwc_img = img if MODEL_DATA_LAYOUT == 'NHWC' else img.transpose(2,0,1)
174 |             preprocessed_image_buffer[sample_index] = np.array(nhwc_img).ravel() # transfer bytes as unsigned
175 |         else:
176 |             if MODEL_COLOURS_BGR:
177 |                 img = img[...,::-1]     # swapping Red and Blue colour channels
178 | 
179 |             if IMAGE_DATA_TYPE != 'float32':
180 |                 img = img.astype(np.float32)
181 | 
182 |                 # Normalize
183 |                 if MODEL_NORMALIZE_DATA:
184 |                     img = img*(MODEL_NORMALIZE_UPPER-MODEL_NORMALIZE_LOWER)/255.0+MODEL_NORMALIZE_LOWER
185 | 
186 |                 # Subtract mean value
187 |                 if SUBTRACT_MEAN:
188 |                     if len(GIVEN_CHANNEL_MEANS):
189 |                         img -= GIVEN_CHANNEL_MEANS
190 |                     else:
191 |                         img -= np.mean(img, axis=(0,1), keepdims=True)
192 | 
193 |                 if len(GIVEN_CHANNEL_STDS):
194 |                     img /= GIVEN_CHANNEL_STDS
195 | 
196 |             if MODEL_INPUT_DATA_TYPE == 'int8' or TRANSFER_TYPE_NP == np.int8:
197 |                 img = np.clip(img, -128, 127)
198 | 
199 |             nhwc_img = img if MODEL_DATA_LAYOUT == 'NHWC' else img.transpose(2,0,1)
200 |             preprocessed_image_buffer[sample_index] = np.array(nhwc_img).ravel().astype(TRANSFER_TYPE_NP) # transfer bytes as signed
201 | 
202 |         tick('l')
203 |     print('')
204 | 
205 | 
206 | def unload_query_samples(sample_indices):
207 |     #print("unload_query_samples({})".format(sample_indices))
208 |     global preprocessed_image_buffer
209 | 
210 |     preprocessed_image_buffer = {}
211 |     tick('U')
212 |     print('')
213 | 
214 | 
215 | openme_data  = {}                   # side-loaded stats
216 | in_progress  = {}                   # local store of metadata about batches between issue_queries and send_responses
217 | funnel_should_be_running = True     # a way for the fan to signal to the funnel_thread to end
218 | warmup_mode              = False    # while on, QuerySampleResponses will not be sent to LoadGen
219 | 
220 | def issue_queries(query_samples):
221 | 
222 |     global BATCH_SIZE
223 | 
224 |     if VERBOSITY_LEVEL:
225 |         printable_query = [(qs.index, qs.id) for qs in query_samples]
226 |         print("issue_queries( {} )".format(printable_query))
227 |     tick('Q', len(query_samples))
228 | 
229 |     for j in range(0, len(query_samples), BATCH_SIZE):
230 |         batch               = query_samples[j:j+BATCH_SIZE]   # NB: the last one may be shorter than BATCH_SIZE in length
231 |         batch_vector_numpy  = np.ravel([ preprocessed_image_buffer[qs.index] for qs in batch ])
232 | 
233 |         job_id      = batch[0].id   # assume it is both sufficiently unique and sufficiently small to fit our needs
234 | 
235 |         in_progress[job_id] = {
236 |             'submission_time':  time.time(),
237 |             'batch':            batch,
238 |         }
239 |     
240 |         if TRANSFER_MODE == 'numpy':
241 |             job_data_struct = {
242 |                 'job_id': job_id,
243 |                 'batch_data': batch_vector_numpy,
244 |             }
245 |             to_workers.send_pyobj(job_data_struct)
246 |         elif TRANSFER_MODE == 'pickle':
247 |             job_data_struct = {
248 |                 'job_id': job_id,
249 |                 'batch_data': np.asarray(batch_vector_numpy),
250 |             }
251 |             to_workers.send_pyobj(job_data_struct)
252 |         elif TRANSFER_MODE == 'raw':
253 |             ## Slower, but insensitive to endianness:
254 |             # batch_vector_list = batch_vector_numpy.tolist()
255 |             # job_data_raw = struct.pack('<I{}{}'.format(len(batch_vector_list), TRANSFER_TYPE_SYMBOL), job_id, *batch_vector_list)
256 | 
257 |             ## Faster, but potentially sensitive to endianness:
258 |             batch_vector_array = np.asarray(batch_vector_numpy)
259 |             job_data_raw   = struct.pack('<I', job_id) + memoryview( batch_vector_array )
260 | 
261 |             to_workers.send(job_data_raw)
262 |         elif TRANSFER_MODE == 'json':
263 |             job_data_struct = {
264 |                 'job_id': job_id,
265 |                 'batch_data': batch_vector_numpy.tolist(),
266 |             }
267 |             to_workers.send_json(job_data_struct)
268 | 
269 |         print("[fan] -> job_id={} {}".format(job_id, [qs.index for qs in batch]))
270 | 
271 | 
272 | def send_responses():
273 | 
274 |     global funnel_should_be_running, warmup_mode, openme_data
275 | 
276 |     funnel_start = time.time()
277 | 
278 |     received_job_timings = openme_data['received_job_timings'] = []
279 |     inference_times_ms_by_worker_id = {}
280 | 
281 |     while funnel_should_be_running:
282 | 
283 |         try:
284 |             done_job            = from_workers.recv_json()
285 |         except Exception as e:
286 |             continue    # go back and check if the funnel_should_be_running condition has been turned off by the main thread
287 | 
288 |         job_id              = done_job['job_id']
289 |         local_metadata      = in_progress.pop(job_id)
290 |         received_timestamp  = time.time()
291 |         roundtrip_time_ms   = (received_timestamp-local_metadata['submission_time'])*1000
292 |         worker_id           = done_job['worker_id']
293 |         inference_time_ms   = done_job['inference_time_ms']
294 |         floatize_time_ms    = done_job['floatize_time_ms']
295 | 
296 |         print("[funnel] <- [worker {}] job_id={}, worker_type_conversion={:.2f} ms, inference={:.2f} ms, roundtrip={:.2f} ms".format(
297 |                             worker_id, job_id, floatize_time_ms, inference_time_ms, roundtrip_time_ms))
298 | 
299 |         received_job_timings.append({
300 |             'job_id':                   job_id,
301 |             'worker_id':                worker_id,
302 |             'received_timestamp':       received_timestamp,
303 |             'worker_floatize_time_ms':  floatize_time_ms,
304 |             'inference_time_ms':        inference_time_ms,
305 |             'roundtrip_time_ms':        roundtrip_time_ms,
306 |         })
307 | 
308 |         if warmup_mode:
309 |             continue
310 | 
311 |         if worker_id not in inference_times_ms_by_worker_id:
312 |             inference_times_ms_by_worker_id[worker_id] = []
313 |         inference_times_ms_by_worker_id[worker_id].append( inference_time_ms )
314 | 
315 |         batch               = local_metadata['batch']
316 |         batch_size          = len(batch)
317 |         raw_batch_results   = np.array(done_job['raw_batch_results'], dtype=np.float32)
318 |         batch_results       = np.split(raw_batch_results, batch_size)
319 | 
320 |         response = []
321 |         response_array_refs = []    # This is needed to guarantee that the individual buffers to which we keep extra-Pythonian references, do not get garbage-collected.
322 |         for qs, all_boxes_for_this_sample in zip(batch, batch_results):
323 | 
324 |             num_active_boxes_for_this_sample = all_boxes_for_this_sample[MODEL_MAX_PREDICTIONS*7].view('int32')
325 |             global_image_index = qs.index
326 |             width_orig, height_orig = original_w_h[global_image_index]
327 |             reformed_active_boxes_for_this_sample = []
328 |             for i in range(num_active_boxes_for_this_sample):
329 |                 (image_id, ymin, xmin, ymax, xmax, confidence_score, class_number) = all_boxes_for_this_sample[i*7:(i+1)*7]
330 | 
331 |                 if class_map:
332 |                     class_number = float(class_map[int(class_number)])
333 | 
334 |                 reformed_active_boxes_for_this_sample += [
335 |                     float(global_image_index), ymin, xmin, ymax, xmax, confidence_score, class_number ]
336 | 
337 |             response_array = array.array("B", np.array(reformed_active_boxes_for_this_sample, np.float32).tobytes())
338 |             response_array_refs.append(response_array)
339 |             bi = response_array.buffer_info()
340 |             response.append(lg.QuerySampleResponse(qs.id, bi[0], bi[1]))
341 |         lg.QuerySamplesComplete(response)
342 |         tick('R', len(response))
343 |         sys.stdout.flush()
344 |     print("[funnel] quitting")
345 | 
346 | 
347 | def flush_queries():
348 |     pass
349 | 
350 | 
351 | def process_latencies(latencies_ns):
352 | 
353 |     global openme_data
354 | 
355 |     latencies_ms = openme_data['loadgen_measured_latencies_ms'] = [ns/1.0e6 for ns in latencies_ns]
356 |     print("LG called process_latencies({})".format(latencies_ms))
357 | 
358 |     latencies_size      = len(latencies_ms)
359 |     latencies_avg       = sum(latencies_ms)/latencies_size
360 |     latencies_sorted    = sorted(latencies_ms)
361 |     latencies_p50       = int(latencies_size * 0.5);
362 |     latencies_p90       = int(latencies_size * 0.9);
363 |     latencies_p99       = int(latencies_size * 0.99);
364 | 
365 |     print("--------------------------------------------------------------------")
366 |     print("|                LATENCIES (in milliseconds and fps)               |")
367 |     print("--------------------------------------------------------------------")
368 |     print("Number of samples run:       {:9d}".format(latencies_size))
369 |     print("Min latency:                 {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[0], 1e3/latencies_sorted[0]))
370 |     print("Median latency:              {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[latencies_p50], 1e3/latencies_sorted[latencies_p50]))
371 |     print("Average latency:             {:9.2f} ms   ({:.3f} fps)".format(latencies_avg, 1e3/latencies_avg))
372 |     print("90 percentile latency:       {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[latencies_p90], 1e3/latencies_sorted[latencies_p90]))
373 |     print("99 percentile latency:       {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[latencies_p99], 1e3/latencies_sorted[latencies_p99]))
374 |     print("Max latency:                 {:9.2f} ms   ({:.3f} fps)".format(latencies_sorted[-1], 1e3/latencies_sorted[-1]))
375 |     print("--------------------------------------------------------------------")
376 | 
377 | 
378 | def benchmark_using_loadgen():
379 |     "Perform the benchmark using python API for the LoadGen library"
380 | 
381 |     global funnel_should_be_running, warmup_mode, openme_data
382 | 
383 |     scenario = {
384 |         'SingleStream':     lg.TestScenario.SingleStream,
385 |         'MultiStream':      lg.TestScenario.MultiStream,
386 |         'Server':           lg.TestScenario.Server,
387 |         'Offline':          lg.TestScenario.Offline,
388 |     }[LOADGEN_SCENARIO]
389 | 
390 |     mode = {
391 |         'AccuracyOnly':     lg.TestMode.AccuracyOnly,
392 |         'PerformanceOnly':  lg.TestMode.PerformanceOnly,
393 |         'SubmissionRun':    lg.TestMode.SubmissionRun,
394 |     }[LOADGEN_MODE]
395 | 
396 |     ts = lg.TestSettings()
397 |     if LOADGEN_CONFIG_FILE:
398 |         ts.FromConfig(LOADGEN_CONFIG_FILE, 'random_model_name', LOADGEN_SCENARIO)
399 |     ts.scenario = scenario
400 |     ts.mode     = mode
401 | 
402 |     if LOADGEN_MULTISTREAMNESS:
403 |         ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)
404 | 
405 |     if LOADGEN_MAX_DURATION_S:
406 |         ts.max_duration_ms = int(LOADGEN_MAX_DURATION_S)*1000
407 | 
408 |     if LOADGEN_COUNT_OVERRIDE:
409 |         ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE)
410 |         ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE)
411 | 
412 |     if LOADGEN_TARGET_QPS:
413 |         target_qps                  = float(LOADGEN_TARGET_QPS)
414 |         ts.multi_stream_target_qps  = target_qps
415 |         ts.server_target_qps        = target_qps
416 |         ts.offline_expected_qps     = target_qps
417 | 
418 |     sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
419 |     qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples)
420 | 
421 |     log_settings = lg.LogSettings()
422 |     log_settings.enable_trace = False
423 | 
424 |     funnel_thread = threading.Thread(target=send_responses, args=())
425 |     funnel_should_be_running = True
426 |     funnel_thread.start()
427 | 
428 |     if LOADGEN_WARMUP_SAMPLES:
429 |         warmup_id_range = list(range(LOADGEN_WARMUP_SAMPLES))
430 |         load_query_samples(warmup_id_range)
431 | 
432 |         warmup_mode = True
433 |         print("Sending out the warm-up samples, waiting for responses...")
434 |         issue_queries([lg.QuerySample(id,id) for id in warmup_id_range])
435 | 
436 |         while len(in_progress)>0:       # waiting for the in_progress queue to clear up
437 |             time.sleep(1)
438 |         print(" Done!")
439 | 
440 |         warmup_mode = False
441 | 
442 |     lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)
443 | 
444 |     funnel_should_be_running = False    # politely ask the funnel_thread to end
445 |     funnel_thread.join()                # wait for it to actually end
446 | 
447 |     from_workers.close()
448 |     to_workers.close()
449 | 
450 |     lg.DestroyQSL(qsl)
451 |     lg.DestroySUT(sut)
452 | 
453 |     if SIDELOAD_JSON:
454 |         with open(SIDELOAD_JSON, 'w') as sideload_fd:
455 |             json.dump(openme_data, sideload_fd, indent=4, sort_keys=True)
456 | 
457 | 
458 | benchmark_using_loadgen()
459 | 


--------------------------------------------------------------------------------