├── .github
    └── workflows
    │   ├── stale.yml
    │   └── sync_issues.yml
├── .gitignore
├── .gitmodules
├── README.md
├── examples
    ├── armnn
    │   ├── README.md
    │   ├── common
    │   │   ├── cv_utils.py
    │   │   ├── network_executor.py
    │   │   ├── tests
    │   │   │   ├── conftest.py
    │   │   │   ├── context.py
    │   │   │   ├── test_network_executor.py
    │   │   │   └── test_utils.py
    │   │   └── utils.py
    │   ├── face_age-gender
    │   │   ├── README.md
    │   │   ├── box.py
    │   │   ├── requirements.txt
    │   │   ├── run_video_file.py
    │   │   ├── run_video_stream.py
    │   │   └── yolov2.py
    │   ├── face_keypoints
    │   │   ├── README.md
    │   │   ├── box.py
    │   │   ├── requirements.txt
    │   │   ├── run_video_file.py
    │   │   ├── run_video_stream.py
    │   │   └── yolov2.py
    │   └── face_recognition
    │   │   ├── README.md
    │   │   ├── box.py
    │   │   ├── calculate_features.py
    │   │   ├── requirements.txt
    │   │   ├── run_video_file.py
    │   │   ├── run_video_stream.py
    │   │   └── yolov2.py
    ├── edge_impulse
    │   └── multi_stage_inference_vehicle_type
    │   │   └── multi_stage.py
    ├── mediapipe
    │   ├── README.md
    │   ├── common
    │   │   └── cv_utils.py
    │   ├── face_detection
    │   │   ├── run_video_file.py
    │   │   └── run_video_stream.py
    │   ├── face_mesh
    │   │   ├── run_video_file.py
    │   │   └── run_video_stream.py
    │   ├── hand_landmarks
    │   │   ├── run_video_file.py
    │   │   └── run_video_stream.py
    │   └── pose_estimation
    │   │   ├── run_video_file.py
    │   │   └── run_video_stream.py
    ├── sample_files
    │   ├── cars.mp4
    │   ├── test_dance.mp4
    │   └── test_s.mp4
    └── tensorflow_lite
    │   ├── face_recognition
    │       ├── README.md
    │       ├── base_camera.py
    │       ├── calculate_features.py
    │       ├── camera_opencv.py
    │       ├── camera_pi.py
    │       ├── cv_utils.py
    │       ├── multi_stage_file.py
    │       ├── multi_stage_stream.py
    │       ├── requirements.txt
    │       └── templates
    │       │   └── index.html
    │   ├── multi_stage_inference_age_gender
    │       ├── base_camera.py
    │       ├── camera_opencv.py
    │       ├── camera_pi.py
    │       ├── cv_utils.py
    │       ├── multi_stage_file.py
    │       ├── multi_stage_stream.py
    │       └── templates
    │       │   └── index.html
    │   ├── multi_stage_inference_emotion
    │       ├── base_camera.py
    │       ├── camera_opencv.py
    │       ├── camera_pi.py
    │       ├── cv_utils.py
    │       ├── multi_stage_file.py
    │       ├── multi_stage_stream.py
    │       └── templates
    │       │   └── index.html
    │   └── multi_stage_inference_vehicle_type
    │       ├── base_camera.py
    │       ├── camera_opencv.py
    │       ├── camera_pi.py
    │       ├── cv_utils.py
    │       ├── labels.txt
    │       ├── multi_stage_file.py
    │       ├── multi_stage_stream.py
    │       └── templates
    │           └── index.html
└── jupyter_notebooks
    ├── aXeleRate_conveyor_belt_rip_recognition.ipynb
    ├── aXeleRate_face_anti_spoofing.ipynb
    ├── aXeleRate_lung_segmentation.ipynb
    └── aXeleRate_multi_stage.ipynb


/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: 'Close stale issues and PRs'
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: '0 4 * * *'
 7 | 
 8 | jobs:
 9 |   stale:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout repository
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Checkout script repository
17 |         uses: actions/checkout@v4
18 |         with:
19 |             repository: Seeed-Studio/sync-github-all-issues
20 |             path: ci
21 | 
22 |       - name: Run script
23 |         run: ./ci/tools/stale.sh
24 |         env:
25 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/.github/workflows/sync_issues.yml:
--------------------------------------------------------------------------------
 1 | name: Automate Issue Management
 2 | 
 3 | on:
 4 |   issues:
 5 |     types:
 6 |       - opened
 7 |       - edited
 8 |       - assigned
 9 |       - unassigned
10 |       - labeled
11 |       - unlabeled
12 |       - reopened
13 | 
14 | jobs:
15 |   add_issue_to_project:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Add issue to GitHub Project
19 |         uses: actions/add-to-project@v1.0.2
20 |         with:
21 |           project-url: https://github.com/orgs/Seeed-Studio/projects/17
22 |           github-token: ${{ secrets.ISSUE_ASSEMBLE }}
23 |           labeled: bug
24 |           label-operator: NOT


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | *.tflite
132 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "training_code/edge_ml_age_gender_recognition"]
2 | 	path = training_code/edge_ml_age_gender_recognition
3 | 	url = https://github.com/AIWintermuteAI/edge_ml_age_gender_recognition.git
4 | [submodule "training_code/edge_ml_emotion_recognition"]
5 | 	path = training_code/edge_ml_emotion_recognition
6 | 	url = https://github.com/AIWintermuteAI/edge_ml_emotion_recognition.git
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Seeed_Python_MachineLearning
2 | Examples and training code for Machine Learning samples that can be run on various Edge devices
3 | 


--------------------------------------------------------------------------------
/examples/armnn/README.md:
--------------------------------------------------------------------------------
  1 | # PyArmNN Sample Applications
  2 | 
  3 | ## Introduction
  4 | This sample application guides the user and shows how to perform inference using PyArmNN API. We assume the user has already built PyArmNN by following the instructions of the README in the main PyArmNN directory.
  5 | 
  6 | We provide example scripts for performing inference from video file and video stream with `run_video_file.py` and `run_video_stream.py`. For detailed instructions and download links to reference models, check README file inside each application folder.
  7 | 
  8 | ## Prerequisites
  9 | 
 10 | ##### PyArmNN
 11 | 
 12 | Before proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory.
 13 | 
 14 | You can verify that PyArmNN library is installed and check PyArmNN version using:
 15 | ```bash
 16 | $ pip show pyarmnn
 17 | ```
 18 | 
 19 | You can also verify it by running the following and getting output similar to below:
 20 | ```bash
 21 | $ python -c "import pyarmnn as ann;print(ann.GetVersion())"
 22 | '24.0.0'
 23 | ```
 24 | 
 25 | ##### Dependencies
 26 | 
 27 | Install the following libraries on your system:
 28 | ```bash
 29 | $ sudo apt-get install python3-opencv libqtgui4 libqt4-test
 30 | ```
 31 | 
 32 | Create a virtual environment:
 33 | ```bash
 34 | $ python3.7 -m venv devenv --system-site-packages
 35 | $ source devenv/bin/activate
 36 | ```
 37 | 
 38 | Install the dependencies:
 39 | ```bash
 40 | $ pip install -r requirements.txt
 41 | ```
 42 | 
 43 | ---
 44 | 
 45 | ## Implementing Your Own Network
 46 | The examples provide support for `yolo_v2` detection layer models. However, the user is able to add their own network to the object detection scripts by following the steps:
 47 | 
 48 | 1. Create a new file for your network, for example `network.py`, to contain functions to process the output of the model
 49 | 2. In that file, the user will need to write a function that decodes the output vectors obtained from running inference on their network and return the bounding box positions of detected objects plus their class index and confidence. Additionally, include a function that returns a resize factor that will scale the obtained bounding boxes to their correct positions in the original frame
 50 | 3. Import the functions into the main file and, such as with the provided networks, add a conditional statement to the `get_model_processing()` function with the new model name and functions
 51 | 4. The labels associated with the model can then be passed in with `--label_path` argument
 52 | 
 53 | ---
 54 | 
 55 | # Application Overview
 56 | 
 57 | This section provides a walkthrough of the application, explaining in detail the steps:
 58 | 
 59 | 1. Initialisation
 60 | 2. Creating a Network
 61 | 3. Preparing the Workload Tensors
 62 | 4. Executing Inference
 63 | 5. Postprocessing
 64 | 
 65 | 
 66 | ### Initialisation
 67 | 
 68 | ##### Reading from Video Source
 69 | After parsing user arguments, the chosen video file or stream is loaded into an OpenCV `cv2.VideoCapture()` object. We use this object to capture frames from the source using the `read()` function.
 70 | 
 71 | The `VideoCapture` object also tells us information about the source, such as the framerate and resolution of the input video. Using this information, we create a `cv2.VideoWriter()` object which will be used at the end of every loop to write the processed frame to an output video file of the same format as the input.
 72 | 
 73 | ##### Preparing Labels and Model Specific Functions
 74 | In order to interpret the result of running inference on the loaded network, it is required to load the labels associated with the model. In the provided example code, the `dict_labels()` function creates a dictionary that is keyed on the classification index at the output node of the model, with values of the dictionary corresponding to a label and a randomly generated RGB color. This ensures that each class has a unique color which will prove helpful when plotting the bounding boxes of various detected objects in a frame.
 75 | 
 76 | Depending on the model being used, the user-specified model name accesses and returns functions to decode and process the inference output, along with a resize factor used when plotting bounding boxes to ensure they are scaled to their correct position in the original frame.
 77 | 
 78 | 
 79 | ### Creating a Network
 80 | 
 81 | ##### Creating Parser and Importing Graph
 82 | The first step with PyArmNN is to import a graph from file by using the appropriate parser.
 83 | 
 84 | The Arm NN SDK provides parsers for reading graphs from a variety of model formats. In our application we specifically focus on `.tflite, .pb, .onnx` models.
 85 | 
 86 | Based on the extension of the provided model file, the corresponding parser is created and the network file loaded with `CreateNetworkFromBinaryFile()` function. The parser will handle the creation of the underlying Arm NN graph.
 87 | 
 88 | ##### Optimizing Graph for Compute Device
 89 | Arm NN supports optimized execution on multiple CPU and GPU devices. Prior to executing a graph, we must select the appropriate device context. We do this by creating a runtime context with default options with `IRuntime()`.
 90 | 
 91 | We can optimize the imported graph by specifying a list of backends in order of preference and implement backend-specific optimizations. The backends are identified by a string unique to the backend, for example `CpuAcc, GpuAcc, CpuRef`.
 92 | 
 93 | Internally and transparently, Arm NN splits the graph into subgraph based on backends, it calls a optimize subgraphs function on each of them and, if possible, substitutes the corresponding subgraph in the original graph with its optimized version.
 94 | 
 95 | Using the `Optimize()` function we optimize the graph for inference and load the optimized network onto the compute device with `LoadNetwork()`. This function creates the backend-specific workloads for the layers and a backend specific workload factory which is called to create the workloads.
 96 | 
 97 | ##### Creating Input and Output Binding Information
 98 | Parsers can also be used to extract the input information for the network. By calling `GetSubgraphInputTensorNames` we extract all the input names and, with `GetNetworkInputBindingInfo`, bind the input points of the graph.
 99 | 
100 | The input binding information contains all the essential information about the input. It is a tuple consisting of integer identifiers for bindable layers (inputs, outputs) and the tensor info (data type, quantization information, number of dimensions, total number of elements).
101 | 
102 | Similarly, we can get the output binding information for an output layer by using the parser to retrieve output tensor names and calling `GetNetworkOutputBindingInfo()`.
103 | 
104 | 
105 | ### Preparing the Workload Tensors
106 | 
107 | ##### Preprocessing the Captured Frame
108 | Each frame captured from source is read as an `ndarray` in BGR format and therefore has to be preprocessed before being passed into the network.
109 | 
110 | This preprocessing step consists of swapping channels (BGR to RGB in this example), resizing the frame to the required resolution, expanding dimensions of the array and doing data type conversion to match the model input layer. This information about the input tensor can be readily obtained from reading the `input_binding_info`. For example, SSD MobileNet V1 takes for input a tensor with shape `[1, 300, 300, 3]` and data type `uint8`.
111 | 
112 | ##### Making Input and Output Tensors
113 | To produce the workload tensors, calling the functions `make_input_tensors()` and `make_output_tensors()` will return the input and output tensors respectively.
114 | 
115 | 
116 | ### Executing Inference
117 | After making the workload tensors, a compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context. By calling the `workload_tensors_to_ndarray()` function, we obtain the results from inference as a list of `ndarrays`.
118 | 
119 | 
120 | ### Postprocessing
121 | 
122 | ##### Decoding and Processing Inference Output
123 | The output from inference must be decoded to obtain information about detected objects in the frame. In the examples there are implementations for two networks but you may also implement your own network decoding solution here. Please refer to <i>Implementing Your Own Network</i> section of this document to learn how to do this.
124 | 
125 | For SSD MobileNet V1 models, we decode the results to obtain the bounding box positions, classification index, confidence and number of detections in the input frame.
126 | 
127 | For YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold.
128 | 
129 | It is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU) to achieve the best visual results.
130 | 
131 | The detection results are always returned as a list in the form `[class index, [box positions], confidence score]`, with the box positions list containing bounding box coordinates in the form `[x_min, y_min, x_max, y_max]`.
132 | 
133 | ##### Drawing Bounding Boxes
134 | With the obtained results and using `draw_bounding_boxes()`, we are able to draw bounding boxes around detected objects and add the associated label and confidence score. The labels dictionary created earlier uses the class index of the detected object as a key to return the associated label and color for that class. The resize factor defined at the beginning scales the bounding box coordinates to their correct positions in the original frame. The processed frames are written to file or displayed in a separate window.
135 | 


--------------------------------------------------------------------------------
/examples/armnn/common/cv_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
  3 | # SPDX-License-Identifier: MIT
  4 | 
  5 | """
  6 | This file contains helper functions for reading video/image data and
  7 |  pre/postprocessing of video/image data using OpenCV.
  8 | """
  9 | 
 10 | import os
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | 
 15 | import pyarmnn as ann
 16 | 
 17 | 
 18 | def preprocess_array(x, **kwargs):
 19 |       x /= 127.5
 20 |       x -= 1.
 21 |       return x
 22 | 
 23 | def preprocess(frame: np.ndarray, input_binding_info: tuple):
 24 |     """
 25 |     Takes a frame, resizes, swaps channels and converts data type to match
 26 |     model input layer. The converted frame is wrapped in a const tensor
 27 |     and bound to the input tensor.
 28 | 
 29 |     Args:
 30 |         frame: Captured frame from video.
 31 |         input_binding_info:  Contains shape and data type of model input layer.
 32 | 
 33 |     Returns:
 34 |         Input tensor.
 35 |     """
 36 |     # Swap channels and resize frame to model resolution
 37 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 38 |     resized_frame = resize_with_aspect_ratio(frame, input_binding_info)
 39 | 
 40 |     # Expand dimensions and convert data type to match model input
 41 |     data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8
 42 |     resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)
 43 |     resized_frame = preprocess_array(resized_frame)
 44 |     assert resized_frame.shape == tuple(input_binding_info[1].GetShape())
 45 | 
 46 |     input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])
 47 |     return input_tensors
 48 | 
 49 | 
 50 | def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):
 51 |     """
 52 |     Resizes frame while maintaining aspect ratio, padding any empty space.
 53 | 
 54 |     Args:
 55 |         frame: Captured frame.
 56 |         input_binding_info: Contains shape of model input layer.
 57 | 
 58 |     Returns:
 59 |         Frame resized to the size of model input layer.
 60 |     """
 61 |     aspect_ratio = frame.shape[1] / frame.shape[0]
 62 |     model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
 63 | 
 64 |     if aspect_ratio >= 1.0:
 65 |         new_height, new_width = int(model_width / aspect_ratio), model_width
 66 |         b_padding, r_padding = model_height - new_height, 0
 67 |     else:
 68 |         new_height, new_width = model_height, int(model_height * aspect_ratio)
 69 |         b_padding, r_padding = 0, model_width - new_width
 70 | 
 71 |     # Resize and pad any empty space
 72 |     frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
 73 |     frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,
 74 |                                borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
 75 |     return frame
 76 | 
 77 | 
 78 | def create_video_writer(video: cv2.VideoCapture, video_path: str, name: str):
 79 |     """
 80 |     Creates a video writer object to write processed frames to file.
 81 | 
 82 |     Args:
 83 |         video: Video capture object, contains information about data source.
 84 |         video_path: User-specified video file path.
 85 |         output_path: Optional path to save the processed video.
 86 | 
 87 |     Returns:
 88 |         Video writer object.
 89 |     """
 90 |     _, ext = os.path.splitext(video_path)
 91 | 
 92 |     i, filename = 0, os.path.join(str(), f'{name}{ext}')
 93 | 
 94 |     while os.path.exists(filename):
 95 |         i += 1
 96 |         filename = os.path.join(str(), f'{name}({i}){ext}')
 97 |     print(filename)
 98 |     video_writer = cv2.VideoWriter(filename=filename,
 99 |                                    fourcc=get_source_encoding_int(video),
100 |                                    fps=int(video.get(cv2.CAP_PROP_FPS)),
101 |                                    frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
102 |                                               int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
103 |     return video_writer
104 | 
105 | 
106 | def init_video_file_capture(video_path: str, name: str):
107 |     """
108 |     Creates a video capture object from a video file.
109 | 
110 |     Args:
111 |         video_path: User-specified video file path.
112 |         output_path: Optional path to save the processed video.
113 | 
114 |     Returns:
115 |         Video capture object to capture frames, video writer object to write processed
116 |         frames to file, plus total frame count of video source to iterate through.
117 |     """
118 |     if not os.path.exists(video_path):
119 |         raise FileNotFoundError(f'Video file not found for: {video_path}')
120 |     video = cv2.VideoCapture(video_path)
121 |     if not video.isOpened:
122 |         raise RuntimeError(f'Failed to open video capture from file: {video_path}')
123 | 
124 |     video_writer = create_video_writer(video, video_path, name)
125 |     iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))
126 |     return video, video_writer, iter_frame_count
127 | 
128 | 
129 | def init_video_stream_capture(video_source: int):
130 |     """
131 |     Creates a video capture object from a device.
132 | 
133 |     Args:
134 |         video_source: Device index used to read video stream.
135 | 
136 |     Returns:
137 |         Video capture object used to capture frames from a video stream.
138 |     """
139 |     video = cv2.VideoCapture(video_source)
140 |     if not video.isOpened:
141 |         raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')
142 |     print('Processing video stream. Press \'Esc\' key to exit the demo.')
143 |     return video
144 | 
145 | 
146 | def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):
147 |     """
148 |     Draws bounding boxes around detected objects and adds a label and confidence score.
149 | 
150 |     Args:
151 |         frame: The original captured frame from video source.
152 |         detections: A list of detected objects in the form [class, [box positions], confidence].
153 |         resize_factor: Resizing factor to scale box coordinates to output frame size.
154 |         labels: Dictionary of labels and colors keyed on the classification index.
155 |     """
156 |     for detection in detections:
157 |         class_idx, box, confidence = [d for d in detection]
158 |         label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]
159 | 
160 |         # Obtain frame size and resized bounding box positions
161 |         frame_height, frame_width = frame.shape[:2]
162 |         x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
163 | 
164 |         # Ensure box stays within the frame
165 |         x_min, y_min = max(0, x_min), max(0, y_min)
166 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
167 | 
168 |         # Draw bounding box around detected object
169 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
170 | 
171 |         # Create label for detected object class
172 |         label = f'{label} {confidence * 100:.1f}%'
173 |         label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
174 | 
175 |         # Make sure label always stays on-screen
176 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
177 | 
178 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
179 |         lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
180 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
181 | 
182 |         # Add label and confidence value
183 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
184 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
185 |                     label_color, 1, cv2.LINE_AA)
186 | 
187 | 
188 | def get_source_encoding_int(video_capture):
189 |     return int(video_capture.get(cv2.CAP_PROP_FOURCC))
190 | 


--------------------------------------------------------------------------------
/examples/armnn/common/network_executor.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | import os
  5 | from typing import List, Tuple
  6 | 
  7 | import pyarmnn as ann
  8 | import numpy as np
  9 | 
 10 | 
 11 | def create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()):
 12 |     """
 13 |     Creates a network based on the model file and a list of backends.
 14 | 
 15 |     Args:
 16 |         model_file: User-specified model file.
 17 |         backends: List of backends to optimize network.
 18 |         input_names:
 19 |         output_names:
 20 | 
 21 |     Returns:
 22 |         net_id: Unique ID of the network to run.
 23 |         runtime: Runtime context for executing inference.
 24 |         input_binding_info: Contains essential information about the model input.
 25 |         output_binding_info: Used to map output tensor and its memory.
 26 |     """
 27 |     if not os.path.exists(model_file):
 28 |         raise FileNotFoundError(f'Model file not found for: {model_file}')
 29 | 
 30 |     _, ext = os.path.splitext(model_file)
 31 |     if ext == '.tflite':
 32 |         parser = ann.ITfLiteParser()
 33 |     else:
 34 |         raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]")
 35 | 
 36 |     network = parser.CreateNetworkFromBinaryFile(model_file)
 37 | 
 38 |     # Specify backends to optimize network
 39 |     preferred_backends = []
 40 |     for b in backends:
 41 |         preferred_backends.append(ann.BackendId(b))
 42 | 
 43 |     # Select appropriate device context and optimize the network for that device
 44 |     options = ann.CreationOptions()
 45 |     runtime = ann.IRuntime(options)
 46 |     opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
 47 |                                          ann.OptimizerOptions())
 48 |     print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n'
 49 |           f'Optimization warnings: {messages}')
 50 | 
 51 |     # Load the optimized network onto the Runtime device
 52 |     net_id, _ = runtime.LoadNetwork(opt_network)
 53 | 
 54 |     # Get input and output binding information
 55 |     graph_id = parser.GetSubgraphCount() - 1
 56 |     input_names = parser.GetSubgraphInputTensorNames(graph_id)
 57 |     input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
 58 |     output_names = parser.GetSubgraphOutputTensorNames(graph_id)
 59 |     output_binding_info = []
 60 | 
 61 |     for output_name in output_names:
 62 |         out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
 63 |         output_binding_info.append(out_bind_info)
 64 | 
 65 |     return net_id, runtime, input_binding_info, output_binding_info
 66 | 
 67 | 
 68 | def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]:
 69 |     """
 70 |     Executes inference for the loaded network.
 71 | 
 72 |     Args:
 73 |         input_tensors: The input frame tensor.
 74 |         output_tensors: The output tensor from output node.
 75 |         runtime: Runtime context for executing inference.
 76 |         net_id: Unique ID of the network to run.
 77 | 
 78 |     Returns:
 79 |         list: Inference results as a list of ndarrays.
 80 |     """
 81 |     runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
 82 |     output = ann.workload_tensors_to_ndarray(output_tensors)
 83 |     return output
 84 | 
 85 | 
 86 | class ArmnnNetworkExecutor:
 87 | 
 88 |     def __init__(self, model_file: str, backends: list):
 89 |         """
 90 |         Creates an inference executor for a given network and a list of backends.
 91 | 
 92 |         Args:
 93 |             model_file: User-specified model file.
 94 |             backends: List of backends to optimize network.
 95 |         """
 96 |         self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file,
 97 |                                                                                                           backends)
 98 |         self.output_tensors = ann.make_output_tensors(self.output_binding_info)
 99 | 
100 |     def run(self, input_tensors: list) -> List[np.ndarray]:
101 |         """
102 |         Executes inference for the loaded network.
103 | 
104 |         Args:
105 |             input_tensors: The input frame tensor.
106 | 
107 |         Returns:
108 |             list: Inference results as a list of ndarrays.
109 |         """
110 |         return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id)
111 | 


--------------------------------------------------------------------------------
/examples/armnn/common/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | import os
 5 | import ntpath
 6 | 
 7 | import urllib.request
 8 | import zipfile
 9 | 
10 | import pytest
11 | 
12 | script_dir = os.path.dirname(__file__)
13 | @pytest.fixture(scope="session")
14 | def test_data_folder(request):
15 |     """
16 |         This fixture returns path to folder with shared test resources among all tests
17 |     """
18 | 
19 |     data_dir = os.path.join(script_dir, "testdata")
20 |     if not os.path.exists(data_dir):
21 |         os.mkdir(data_dir)
22 | 
23 |     files_to_download = ["https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/messi5.jpg",
24 |                          "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/basketball1.png",
25 |                          "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/Megamind.avi",
26 |                          "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"
27 |                          ]
28 | 
29 |     for file in files_to_download:
30 |         path, filename = ntpath.split(file)
31 |         file_path = os.path.join(data_dir, filename)
32 |         if not os.path.exists(file_path):
33 |             print("\nDownloading test file: " + file_path + "\n")
34 |             urllib.request.urlretrieve(file, file_path)
35 | 
36 |     # Any unzipping needed, and moving around of files
37 |     with zipfile.ZipFile(os.path.join(data_dir, "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"), 'r') as zip_ref:
38 |         zip_ref.extractall(data_dir)
39 | 
40 |     return data_dir
41 | 


--------------------------------------------------------------------------------
/examples/armnn/common/tests/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
4 | 
5 | import cv_utils
6 | import network_executor
7 | import utils
8 | 


--------------------------------------------------------------------------------
/examples/armnn/common/tests/test_network_executor.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | import os
 5 | 
 6 | import cv2
 7 | 
 8 | from context import network_executor
 9 | from context import cv_utils
10 | 
11 | 
12 | def test_execute_network(test_data_folder):
13 |     model_path = os.path.join(test_data_folder, "detect.tflite")
14 |     backends = ["CpuAcc", "CpuRef"]
15 | 
16 |     executor = network_executor.ArmnnNetworkExecutor(model_path, backends)
17 |     img = cv2.imread(os.path.join(test_data_folder, "messi5.jpg"))
18 |     input_tensors = cv_utils.preprocess(img, executor.input_binding_info)
19 | 
20 |     output_result = executor.run(input_tensors)
21 | 
22 |     # Ensure it detects a person
23 |     classes = output_result[1]
24 |     assert classes[0][0] == 0
25 | 


--------------------------------------------------------------------------------
/examples/armnn/common/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | import os
 5 | 
 6 | from context import cv_utils
 7 | from context import utils
 8 | 
 9 | 
10 | def test_get_source_encoding(test_data_folder):
11 |     video_file = os.path.join(test_data_folder, "Megamind.avi")
12 |     video, video_writer, frame_count = cv_utils.init_video_file_capture(video_file, "/tmp")
13 |     assert cv_utils.get_source_encoding_int(video) == 1145656920
14 | 
15 | 
16 | def test_read_existing_labels_file(test_data_folder):
17 |     label_file = os.path.join(test_data_folder, "labelmap.txt")
18 |     labels_map = utils.dict_labels(label_file)
19 |     assert labels_map is not None
20 | 


--------------------------------------------------------------------------------
/examples/armnn/common/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | """Contains helper functions that can be used across the example apps."""
 5 | 
 6 | import os
 7 | import errno
 8 | from pathlib import Path
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def dict_labels(labels_file_path: str, include_rgb=False) -> dict:
14 |     """Creates a dictionary of labels from the input labels file.
15 | 
16 |     Args:
17 |         labels_file: Path to file containing labels to map model outputs.
18 |         include_rgb: Adds randomly generated RGB values to the values of the
19 |             dictionary. Used for plotting bounding boxes of different colours.
20 | 
21 |     Returns:
22 |         Dictionary with classification indices for keys and labels for values.
23 | 
24 |     Raises:
25 |         FileNotFoundError:
26 |             Provided `labels_file_path` does not exist.
27 |     """
28 |     labels_file = Path(labels_file_path)
29 |     if not labels_file.is_file():
30 |         raise FileNotFoundError(
31 |             errno.ENOENT, os.strerror(errno.ENOENT), labels_file_path
32 |         )
33 | 
34 |     labels = {}
35 |     with open(labels_file, "r") as f:
36 |         for idx, line in enumerate(f, 0):
37 |             if include_rgb:
38 |                 labels[idx] = line.strip("\n"), tuple(np.random.random(size=3) * 255)
39 |             else:
40 |                 labels[idx] = line.strip("\n")
41 |         return labels
42 | 


--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/README.md:
--------------------------------------------------------------------------------
 1 | # PyArmNN Human face age/gender recognition Sample Application
 2 | 
 3 | ## Introduction
 4 | This sample application guides the user and shows how to perform age/gender recognition using PyArmNN API. 
 5 | 
 6 | The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected faces and age/gender labels overlaid.
 7 | 
 8 | ## Human face age/gender recognition from Video File
 9 | Human face age/gender recognition demo that takes a video file, runs inference on each frame producing
10 | bounding boxes and labels around detected faces, and saves the processed video.
11 | 
12 | Example usage:
13 | 
14 | ```bash
15 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite --video_file_path ../samples/test_s.mp4 
16 | ```
17 | 
18 | ## Human face age/gender recognition from Video Stream
19 | 
20 | Human face age/gender recognition demo that takes a video stream from a device, runs inference
21 | on each frame producing bounding boxes and labels around detected faces,
22 | and displays a window with the latest processed frame.
23 | 
24 | Example usage:
25 | 
26 | ```bash
27 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite
28 | ```
29 | 
30 | This application has been verified to work against the YOLOv2 detection layer MobileNet models and MobileFaceNet keypoints detector, which can be downloaded from:
31 | 
32 | https://files.seeedstudio.com/ml/age_gender_recognition_models.zip
33 | 


--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | # Todo : BoundBox & its related method extraction
  6 | class BoundBox:
  7 |     def __init__(self, x, y, w, h, c = None, classes = None):
  8 |         self.x     = x
  9 |         self.y     = y
 10 |         self.w     = w
 11 |         self.h     = h
 12 |         
 13 |         self.c     = c
 14 |         self.classes = classes
 15 | 
 16 |     def get_label(self):
 17 |         return np.argmax(self.classes)
 18 |     
 19 |     def get_score(self):
 20 |         return self.classes[self.get_label()]
 21 |     
 22 |     def iou(self, bound_box):
 23 |         b1 = self.as_centroid()
 24 |         b2 = bound_box.as_centroid()
 25 |         return centroid_box_iou(b1, b2)
 26 | 
 27 |     def as_centroid(self):
 28 |         return np.array([self.x, self.y, self.w, self.h])
 29 |     
 30 | 
 31 | def boxes_to_array(bound_boxes):
 32 |     """
 33 |     # Args
 34 |         boxes : list of BoundBox instances
 35 |     
 36 |     # Returns
 37 |         centroid_boxes : (N, 4)
 38 |         probs : (N, nb_classes)
 39 |     """
 40 |     centroid_boxes = []
 41 |     probs = []
 42 |     for box in bound_boxes:
 43 |         centroid_boxes.append([box.x, box.y, box.w, box.h])
 44 |         probs.append(box.classes)
 45 |     return np.array(centroid_boxes), np.array(probs)
 46 | 
 47 | 
 48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):
 49 |     """
 50 |     # Args
 51 |         boxes : list of BoundBox
 52 |     
 53 |     # Returns
 54 |         boxes : list of BoundBox
 55 |             non maximum supressed BoundBox instances
 56 |     """
 57 |     # suppress non-maximal boxes
 58 |     for c in range(n_classes):
 59 |         sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
 60 | 
 61 |         for i in range(len(sorted_indices)):
 62 |             index_i = sorted_indices[i]
 63 |             
 64 |             if boxes[index_i].classes[c] == 0: 
 65 |                 continue
 66 |             else:
 67 |                 for j in range(i+1, len(sorted_indices)):
 68 |                     index_j = sorted_indices[j]
 69 | 
 70 |                     if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:
 71 |                         boxes[index_j].classes[c] = 0
 72 |     # remove the boxes which are less likely than a obj_threshold
 73 |     boxes = [box for box in boxes if box.get_score() > obj_threshold]
 74 |     return boxes
 75 | 
 76 | 
 77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):
 78 |     img_size = min(image.shape[:2])
 79 |     if img_size < desired_size:
 80 |         scale_factor = float(desired_size) / img_size
 81 |     else:
 82 |         scale_factor = 1.0
 83 |     
 84 |     h, w = image.shape[:2]
 85 |     img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))
 86 |     if boxes != []:
 87 |         boxes_scaled = boxes*scale_factor
 88 |         boxes_scaled = boxes_scaled.astype(np.int)
 89 |     else:
 90 |         boxes_scaled = boxes
 91 |     return draw_boxes(img_scaled, boxes_scaled, probs, labels)
 92 |         
 93 | 
 94 | def draw_boxes(image, boxes, probs, labels):
 95 |     for box, classes in zip(boxes, probs):
 96 |         x1, y1, x2, y2 = box
 97 |         cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)
 98 |         cv2.putText(image, 
 99 |                     '{}:  {:.2f}'.format(labels[np.argmax(classes)], classes.max()), 
100 |                     (x1, y1 - 13), 
101 |                     cv2.FONT_HERSHEY_SIMPLEX, 
102 |                     1e-3 * image.shape[0], 
103 |                     (0,255,0), 2)
104 |     return image        
105 | 
106 | 
107 | def centroid_box_iou(box1, box2):
108 |     def _interval_overlap(interval_a, interval_b):
109 |         x1, x2 = interval_a
110 |         x3, x4 = interval_b
111 |     
112 |         if x3 < x1:
113 |             if x4 < x1:
114 |                 return 0
115 |             else:
116 |                 return min(x2,x4) - x1
117 |         else:
118 |             if x2 < x3:
119 |                 return 0
120 |             else:
121 |                 return min(x2,x4) - x3
122 |     
123 |     _, _, w1, h1 = box1.reshape(-1,)
124 |     _, _, w2, h2 = box2.reshape(-1,)
125 |     x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)
126 |     x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)
127 |             
128 |     intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])
129 |     intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])
130 |     intersect = intersect_w * intersect_h
131 |     union = w1 * h1 + w2 * h2 - intersect
132 |     
133 |     return float(intersect) / union
134 | 
135 | 
136 | def to_centroid(minmax_boxes):
137 |     """
138 |     minmax_boxes : (N, 4)
139 |     """
140 |     minmax_boxes = minmax_boxes.astype(np.float)
141 |     centroid_boxes = np.zeros_like(minmax_boxes)
142 |     
143 |     x1 = minmax_boxes[:,0]
144 |     y1 = minmax_boxes[:,1]
145 |     x2 = minmax_boxes[:,2]
146 |     y2 = minmax_boxes[:,3]
147 |     
148 |     centroid_boxes[:,0] = (x1 + x2) / 2
149 |     centroid_boxes[:,1] = (y1 + y2) / 2
150 |     centroid_boxes[:,2] = x2 - x1
151 |     centroid_boxes[:,3] = y2 - y1
152 |     return centroid_boxes
153 | 
154 | def to_minmax(centroid_boxes):
155 |     centroid_boxes = centroid_boxes.astype(np.float)
156 |     minmax_boxes = np.zeros_like(centroid_boxes)
157 |     
158 |     cx = centroid_boxes[:,0]
159 |     cy = centroid_boxes[:,1]
160 |     w = centroid_boxes[:,2]
161 |     h = centroid_boxes[:,3]
162 |     
163 |     minmax_boxes[:,0] = cx - w/2
164 |     minmax_boxes[:,1] = cy - h/2
165 |     minmax_boxes[:,2] = cx + w/2
166 |     minmax_boxes[:,3] = cy + h/2
167 |     return minmax_boxes
168 | 
169 | def create_anchor_boxes(anchors):
170 |     """
171 |     # Args
172 |         anchors : list of floats
173 |     # Returns
174 |         boxes : array, shape of (len(anchors)/2, 4)
175 |             centroid-type
176 |     """
177 |     boxes = []
178 |     n_boxes = int(len(anchors)/2)
179 |     for i in range(n_boxes):
180 |         boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))
181 |     return np.array(boxes)
182 | 
183 | def find_match_box(centroid_box, centroid_boxes):
184 |     """Find the index of the boxes with the largest overlap among the N-boxes.
185 |     # Args
186 |         box : array, shape of (1, 4)
187 |         boxes : array, shape of (N, 4)
188 |     
189 |     # Return
190 |         match_index : int
191 |     """
192 |     match_index = -1
193 |     max_iou     = -1
194 |     
195 |     for i, box in enumerate(centroid_boxes):
196 |         iou = centroid_box_iou(centroid_box, box)
197 |         
198 |         if max_iou < iou:
199 |             match_index = i
200 |             max_iou     = iou
201 |     return match_index
202 | 


--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 | 


--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/run_video_file.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
  3 | # SPDX-License-Identifier: MIT
  4 | 
  5 | """
  6 | Human face age/gender recognition demo that takes a video file, runs inference on each frame producing
  7 | bounding boxes and labels around detected faces, and saves the processed video.
  8 | 
  9 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite --video_file_path ../samples/test_s.mp4 
 10 | 
 11 | """
 12 | 
 13 | import os
 14 | import sys
 15 | import time
 16 | script_dir = os.path.dirname(__file__)
 17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
 18 | 
 19 | import cv2
 20 | import numpy as np
 21 | from tqdm import tqdm
 22 | from argparse import ArgumentParser
 23 | 
 24 | from yolov2 import yolo_processing, yolo_resize_factor
 25 | from cv_utils import init_video_file_capture, resize_with_aspect_ratio, preprocess, preprocess_array
 26 | from network_executor import ArmnnNetworkExecutor
 27 | 
 28 | import pyarmnn as ann
 29 | 
 30 | gender_list = ["female","male"]
 31 | age_list = ["0-10","11-20","21-45","46-60","60-100"]
 32 | 
 33 | def process_faces(frame, detections, executor_age_gender, resize_factor):
 34 |     global age_list, gender_list
 35 | 
 36 |     result_list = []
 37 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 38 | 
 39 |     for detection in detections:
 40 |         box = detection[1].copy()
 41 |         for i in range(len(box)):
 42 |             box[i] = int(box[i] * resize_factor)
 43 | 
 44 |         frame_height, frame_width = frame.shape[:2]
 45 |         x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
 46 | 
 47 |         # Ensure box stays within the frame
 48 |         x_min, y_min = max(0, x_min), max(0, y_min)
 49 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 50 | 
 51 |         face_img = frame[y_min:y_max, x_min:x_max]
 52 |         face_img = cv2.resize(face_img, (128, 128)) 
 53 | 
 54 |         face_img = face_img.astype(np.float32)
 55 |         face_img = preprocess_array(face_img)
 56 | 
 57 |         input_tensors = ann.make_input_tensors([executor_age_gender.input_binding_info], [face_img])
 58 | 
 59 |         result = executor_age_gender.run(input_tensors)
 60 |         gender = gender_list[np.argmax(result[0][0])]
 61 |         age = age_list[np.argmax(result[1][0])]
 62 | 
 63 |         result_list.append([gender, age])
 64 | 
 65 |     return result_list
 66 | 
 67 | def draw_result(frame: np.ndarray, detections: list, resize_factor, face_data):
 68 |     """
 69 |     Draws bounding boxes around detected objects and adds a label and confidence score.
 70 | 
 71 |     Args:
 72 |         frame: The original captured frame from video source.
 73 |         detections: A list of detected objects in the form [class, [box positions], confidence].
 74 |         resize_factor: Resizing factor to scale box coordinates to output frame size.
 75 |         face_data: List containing information about age and gender
 76 |     """
 77 |     for i in range(len(detections)):
 78 |         class_idx, box, confidence = [d for d in detections[i]]
 79 |         color = (255, 0, 0) if face_data[i][0] == 'male' else (0, 0, 255)
 80 | 
 81 |         # Obtain frame size and resized bounding box positions
 82 |         frame_height, frame_width = frame.shape[:2]
 83 |         x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
 84 | 
 85 |         # Ensure box stays within the frame
 86 |         x_min, y_min = max(0, x_min), max(0, y_min)
 87 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 88 | 
 89 |         # Draw bounding box around detected object
 90 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 91 | 
 92 |         # Create label for detected object class
 93 |         label = "Gender: {}, Age: {}".format(face_data[i][0], face_data[i][1])
 94 |         label_color = (255, 255, 255)
 95 | 
 96 |         # Make sure label always stays on-screen
 97 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
 98 | 
 99 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
100 |         lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
101 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
102 | 
103 |         # Add label and confidence value
104 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
105 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
106 | 
107 | 
108 | def main(args):
109 |     video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'age_gender_demo')
110 |     frame_num = len(frame_count)
111 | 
112 |     executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
113 |     executor_age_gender = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)    
114 | 
115 |     process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
116 | 
117 |     times = []
118 | 
119 |     for _ in tqdm(frame_count, desc='Processing frames'):
120 |         frame_present, frame = video.read()
121 |         if not frame_present:
122 |             continue
123 | 
124 |         input_tensors = preprocess(frame, executor_fd.input_binding_info)
125 | 
126 |         start_time = time.time() # measure only inference and intermediary processing times
127 |         output_result = executor_fd.run(input_tensors)
128 |         detections = process_output(output_result)
129 |         face_data = process_faces(frame, detections, executor_age_gender, resize_factor)
130 |         end_time = (time.time() - start_time)*1000
131 | 
132 |         draw_result(frame, detections, resize_factor, face_data)
133 | 
134 |         times.append(end_time)
135 |         video_writer.write(frame)
136 | 
137 |     print('Finished processing frames')
138 |     video.release(), video_writer.release()
139 | 
140 |     print("Average time(ms): ", sum(times)//frame_num) 
141 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
142 | 
143 | if __name__ == '__main__':
144 |     parser = ArgumentParser()
145 |     parser.add_argument('--video_file_path', required=True, type=str,
146 |                         help='Path to the video file to run object detection on')
147 | 
148 |     parser.add_argument('--first_model_file_path', required=True, type=str,
149 |                         help='Path to the first stage model to use')
150 |     parser.add_argument('--second_model_file_path', required=True, type=str,
151 |                         help='Path to the second stage model to use')
152 | 
153 |     parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
154 |                         help='Takes the preferred backends in preference order, separated by whitespace, '
155 |                              'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
156 |                              'Defaults to [CpuAcc, CpuRef]')
157 |     args = parser.parse_args()
158 |     main(args)
159 | 


--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/run_video_stream.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
  3 | # SPDX-License-Identifier: MIT
  4 | 
  5 | """
  6 | Human face age/gender recognition demo that takes a video stream from a device, runs inference
  7 | on each frame producing bounding boxes and labels around detected faces,
  8 | and displays a window with the latest processed frame.
  9 | 
 10 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite
 11 | 
 12 | """
 13 | 
 14 | import os
 15 | import sys
 16 | import time
 17 | script_dir = os.path.dirname(__file__)
 18 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
 19 | 
 20 | import cv2
 21 | import numpy as np
 22 | from tqdm import tqdm
 23 | from argparse import ArgumentParser
 24 | 
 25 | from yolov2 import yolo_processing, yolo_resize_factor
 26 | 
 27 | from cv_utils import init_video_stream_capture, resize_with_aspect_ratio, preprocess, preprocess_array
 28 | from network_executor import ArmnnNetworkExecutor
 29 | import pyarmnn as ann
 30 | 
 31 | gender_list = ["female","male"]
 32 | age_list = ["0-10","11-20","21-45","46-60","60-100"]
 33 | 
 34 | def process_faces(frame, detections, executor_age_gender, resize_factor):
 35 |     global age_list, gender_list
 36 | 
 37 |     result_list = []
 38 | 
 39 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 40 | 
 41 |     for detection in detections:
 42 |         box = detection[1].copy()
 43 |         for i in range(len(box)):
 44 |             box[i] = int(box[i] * resize_factor)
 45 | 
 46 |         frame_height, frame_width = frame.shape[:2]
 47 |         x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
 48 | 
 49 |         # Ensure box stays within the frame
 50 |         x_min, y_min = max(0, x_min), max(0, y_min)
 51 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 52 | 
 53 |         face_img = frame[y_min:y_max, x_min:x_max]
 54 |         face_img = cv2.resize(face_img, (128, 128)) 
 55 | 
 56 |         #cv2.imshow('PyArmNN Object Detection Demo face', face_img)
 57 | 
 58 |         face_img = face_img.astype(np.float32)
 59 |         face_img = preprocess_array(face_img)
 60 | 
 61 |         input_tensors = ann.make_input_tensors([executor_age_gender.input_binding_info], [face_img])
 62 | 
 63 |         result = executor_age_gender.run(input_tensors)
 64 |         gender = gender_list[np.argmax(result[0][0])]
 65 |         age = age_list[np.argmax(result[1][0])]
 66 | 
 67 |         result_list.append([gender, age])
 68 | 
 69 |     return result_list
 70 | 
 71 | def draw_result(frame: np.ndarray, detections: list, resize_factor, face_data):
 72 |     """
 73 |     Draws bounding boxes around detected objects and adds a label and confidence score.
 74 | 
 75 |     Args:
 76 |         frame: The original captured frame from video source.
 77 |         detections: A list of detected objects in the form [class, [box positions], confidence].
 78 |         resize_factor: Resizing factor to scale box coordinates to output frame size.
 79 |         face_data: List containing information about age and gender
 80 |     """
 81 | 
 82 |     for i in range(len(detections)):
 83 |         class_idx, box, confidence = [d for d in detections[i]]
 84 |         color = (255, 0, 0) if face_data[i][0] == 'male' else (0, 0, 255)
 85 | 
 86 |         # Obtain frame size and resized bounding box positions
 87 |         frame_height, frame_width = frame.shape[:2]
 88 |         x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
 89 | 
 90 |         # Ensure box stays within the frame
 91 |         x_min, y_min = max(0, x_min), max(0, y_min)
 92 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 93 | 
 94 |         # Draw bounding box around detected object
 95 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 96 | 
 97 |         # Create label for detected object class
 98 |         label = "Gender: {}, Age: {}".format(face_data[i][0], face_data[i][1])
 99 |         label_color = (255, 255, 255)
100 | 
101 |         # Make sure label always stays on-screen
102 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
103 | 
104 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
105 |         lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
106 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
107 | 
108 |         # Add label and confidence value
109 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
110 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
111 | 
112 | 
113 | def main(args):
114 |     video = init_video_stream_capture(args.video_source)
115 | 
116 |     executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
117 |     executor_age_gender = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)     
118 | 
119 |     process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
120 | 
121 |     while True:
122 | 
123 |         frame_present, frame = video.read()
124 |         frame = cv2.flip(frame, 1)  # Horizontally flip the frame
125 |         if not frame_present:
126 |             raise RuntimeError('Error reading frame from video stream')
127 |         input_tensors = preprocess(frame, executor_fd.input_binding_info)
128 |         print("Running inference...")
129 | 
130 |         start_time = time.time() # start time of the inference
131 |         output_result = executor_fd.run(input_tensors)
132 |         detections = process_output(output_result)
133 |         face_data = process_faces(frame, detections, executor_age_gender, resize_factor)
134 | 
135 |         print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
136 |         print("Time(ms): ", (time.time() - start_time)*1000) 
137 | 
138 |         draw_result(frame, detections, resize_factor, face_data)
139 |         cv2.imshow('PyArmNN Object Detection Demo', frame)
140 | 
141 |         if cv2.waitKey(1) == 27:
142 |             print('\nExit key activated. Closing video...')
143 |             break
144 |     video.release(), cv2.destroyAllWindows()
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     parser = ArgumentParser()
149 |     parser.add_argument('--video_source', type=int, default=0,
150 |                         help='Device index to access video stream. Defaults to primary device camera at index 0')
151 | 
152 |     parser.add_argument('--first_model_file_path', required=True, type=str,
153 |                         help='Path to the first stage model to use')
154 |     parser.add_argument('--second_model_file_path', required=True, type=str,
155 |                         help='Path to the second stage model to use')
156 | 
157 |     parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
158 |                         help='Takes the preferred backends in preference order, separated by whitespace, '
159 |                              'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
160 |                              'Defaults to [CpuAcc, CpuRef]')
161 |     args = parser.parse_args()
162 |     main(args)
163 | 


--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/yolov2.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | """
  5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models.
  6 | """
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes
 11 | 
 12 | 
 13 | def yolo_processing(netout):
 14 |     anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]
 15 |     nms_threshold=0.2
 16 |     """Convert Yolo network output to bounding box
 17 | 
 18 |     # Args
 19 |         netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
 20 |             YOLO neural network output array
 21 | 
 22 |     # Returns
 23 |         boxes : array, shape of (N, 4)
 24 |             coordinate scale is normalized [0, 1]
 25 |         probs : array, shape of (N, nb_classes)
 26 |     """
 27 |     netout = netout[0].reshape(7,7,5,6)
 28 |     grid_h, grid_w, nb_box = netout.shape[:3]
 29 |     boxes = []
 30 | 
 31 |     # decode the output by the network
 32 |     netout[..., 4]  = _sigmoid(netout[..., 4])
 33 |     netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
 34 |     netout[..., 5:] *= netout[..., 5:] > 0.3
 35 | 
 36 |     for row in range(grid_h):
 37 |         for col in range(grid_w):
 38 |             for b in range(nb_box):
 39 |                 # from 4th element onwards are confidence and class classes
 40 |                 classes = netout[row,col,b,5:]
 41 |                 
 42 |                 if np.sum(classes) > 0:
 43 |                     # first 4 elements are x, y, w, and h
 44 |                     x, y, w, h = netout[row,col,b,:4]
 45 | 
 46 |                     x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
 47 |                     y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
 48 |                     w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
 49 |                     h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
 50 |                     confidence = netout[row,col,b,4]
 51 |                     box = BoundBox(x, y, w, h, confidence, classes)
 52 |                     boxes.append(box)
 53 | 
 54 |     boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)
 55 |     boxes, probs = boxes_to_array(boxes)
 56 |     #print(boxes)
 57 |     predictions = []
 58 |     def _to_original_scale(boxes):
 59 |         minmax_boxes = to_minmax(boxes)
 60 |         minmax_boxes[:,0] *= 224
 61 |         minmax_boxes[:,2] *= 224
 62 |         minmax_boxes[:,1] *= 224
 63 |         minmax_boxes[:,3] *= 224
 64 |         return minmax_boxes.astype(np.int)
 65 | 
 66 |     if len(boxes) > 0:
 67 |         boxes = _to_original_scale(boxes)
 68 | 
 69 |         for i in range(len(boxes)):
 70 |             predictions.append([0, boxes[i], probs[i][0]])
 71 | 
 72 |     return predictions
 73 | 
 74 | def _sigmoid(x):
 75 |     return 1. / (1. + np.exp(-x))
 76 | 
 77 | def _softmax(x, axis=-1, t=-100.):
 78 |     x = x - np.max(x)
 79 |     if np.min(x) < t:
 80 |         x = x/np.min(x)*t
 81 |     e_x = np.exp(x)
 82 |     return e_x / e_x.sum(axis, keepdims=True)
 83 | 
 84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):
 85 |     """
 86 |     Gets a multiplier to scale the bounding box positions to
 87 |     their correct position in the frame.
 88 | 
 89 |     Args:
 90 |         video: Video capture object, contains information about data source.
 91 |         input_binding_info: Contains shape of model input layer.
 92 | 
 93 |     Returns:
 94 |         Resizing factor to scale box coordinates to output frame size.
 95 |     """
 96 |     frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
 97 |     frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
 98 |     model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
 99 |     return max(frame_height, frame_width) / max(model_height, model_width)
100 | 


--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/README.md:
--------------------------------------------------------------------------------
 1 | # PyArmNN Face keypoint detection Sample Application
 2 | 
 3 | ## Introduction
 4 | This sample application guides the user and shows how to perform face keypoint detection using PyArmNN API. 
 5 | 
 6 | The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected faces and five keypoints(left eye, right eye, nose, left corner of the mouth, right corner of the mouth) with the corresponding labels and confidence scores overlaid.
 7 | 
 8 | ## Face keypoint detection from Video File
 9 | Face keypoint detection demo that takes a video file, runs inference on each frame producing
10 | bounding boxes and five keypoints on detected faces, and saves the processed video.
11 | 
12 | Example usage:
13 | 
14 | ```bash
15 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite --video_file_path ../samples/test_s.mp4 
16 | ```
17 | 
18 | ## Face keypoint detection from Video Stream
19 | 
20 | Face keypoint detection demo that takes a video file, takes a video stream from a device, runs inference
21 | on each frame producing bounding boxes and five keypoints on detected faces, and displays a window with the latest processed frame.
22 | 
23 | Example usage:
24 | 
25 | ```bash
26 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite
27 | ```
28 | 
29 | This application has been verified to work against the YOLOv2 detection layer MobileNet models and MobileFaceNet keypoints detector, which can be downloaded from:
30 | 
31 | https://files.seeedstudio.com/ml/keypoint_detection_models.zip
32 | 


--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | # Todo : BoundBox & its related method extraction
  6 | class BoundBox:
  7 |     def __init__(self, x, y, w, h, c = None, classes = None):
  8 |         self.x     = x
  9 |         self.y     = y
 10 |         self.w     = w
 11 |         self.h     = h
 12 |         
 13 |         self.c     = c
 14 |         self.classes = classes
 15 | 
 16 |     def get_label(self):
 17 |         return np.argmax(self.classes)
 18 |     
 19 |     def get_score(self):
 20 |         return self.classes[self.get_label()]
 21 |     
 22 |     def iou(self, bound_box):
 23 |         b1 = self.as_centroid()
 24 |         b2 = bound_box.as_centroid()
 25 |         return centroid_box_iou(b1, b2)
 26 | 
 27 |     def as_centroid(self):
 28 |         return np.array([self.x, self.y, self.w, self.h])
 29 |     
 30 | 
 31 | def boxes_to_array(bound_boxes):
 32 |     """
 33 |     # Args
 34 |         boxes : list of BoundBox instances
 35 |     
 36 |     # Returns
 37 |         centroid_boxes : (N, 4)
 38 |         probs : (N, nb_classes)
 39 |     """
 40 |     centroid_boxes = []
 41 |     probs = []
 42 |     for box in bound_boxes:
 43 |         centroid_boxes.append([box.x, box.y, box.w, box.h])
 44 |         probs.append(box.classes)
 45 |     return np.array(centroid_boxes), np.array(probs)
 46 | 
 47 | 
 48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):
 49 |     """
 50 |     # Args
 51 |         boxes : list of BoundBox
 52 |     
 53 |     # Returns
 54 |         boxes : list of BoundBox
 55 |             non maximum supressed BoundBox instances
 56 |     """
 57 |     # suppress non-maximal boxes
 58 |     for c in range(n_classes):
 59 |         sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
 60 | 
 61 |         for i in range(len(sorted_indices)):
 62 |             index_i = sorted_indices[i]
 63 |             
 64 |             if boxes[index_i].classes[c] == 0: 
 65 |                 continue
 66 |             else:
 67 |                 for j in range(i+1, len(sorted_indices)):
 68 |                     index_j = sorted_indices[j]
 69 | 
 70 |                     if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:
 71 |                         boxes[index_j].classes[c] = 0
 72 |     # remove the boxes which are less likely than a obj_threshold
 73 |     boxes = [box for box in boxes if box.get_score() > obj_threshold]
 74 |     return boxes
 75 | 
 76 | 
 77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):
 78 |     img_size = min(image.shape[:2])
 79 |     if img_size < desired_size:
 80 |         scale_factor = float(desired_size) / img_size
 81 |     else:
 82 |         scale_factor = 1.0
 83 |     
 84 |     h, w = image.shape[:2]
 85 |     img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))
 86 |     if boxes != []:
 87 |         boxes_scaled = boxes*scale_factor
 88 |         boxes_scaled = boxes_scaled.astype(np.int)
 89 |     else:
 90 |         boxes_scaled = boxes
 91 |     return draw_boxes(img_scaled, boxes_scaled, probs, labels)
 92 |         
 93 | 
 94 | def draw_boxes(image, boxes, probs, labels):
 95 |     for box, classes in zip(boxes, probs):
 96 |         x1, y1, x2, y2 = box
 97 |         cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)
 98 |         cv2.putText(image, 
 99 |                     '{}:  {:.2f}'.format(labels[np.argmax(classes)], classes.max()), 
100 |                     (x1, y1 - 13), 
101 |                     cv2.FONT_HERSHEY_SIMPLEX, 
102 |                     1e-3 * image.shape[0], 
103 |                     (0,255,0), 2)
104 |     return image        
105 | 
106 | 
107 | def centroid_box_iou(box1, box2):
108 |     def _interval_overlap(interval_a, interval_b):
109 |         x1, x2 = interval_a
110 |         x3, x4 = interval_b
111 |     
112 |         if x3 < x1:
113 |             if x4 < x1:
114 |                 return 0
115 |             else:
116 |                 return min(x2,x4) - x1
117 |         else:
118 |             if x2 < x3:
119 |                 return 0
120 |             else:
121 |                 return min(x2,x4) - x3
122 |     
123 |     _, _, w1, h1 = box1.reshape(-1,)
124 |     _, _, w2, h2 = box2.reshape(-1,)
125 |     x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)
126 |     x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)
127 |             
128 |     intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])
129 |     intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])
130 |     intersect = intersect_w * intersect_h
131 |     union = w1 * h1 + w2 * h2 - intersect
132 |     
133 |     return float(intersect) / union
134 | 
135 | 
136 | def to_centroid(minmax_boxes):
137 |     """
138 |     minmax_boxes : (N, 4)
139 |     """
140 |     minmax_boxes = minmax_boxes.astype(np.float)
141 |     centroid_boxes = np.zeros_like(minmax_boxes)
142 |     
143 |     x1 = minmax_boxes[:,0]
144 |     y1 = minmax_boxes[:,1]
145 |     x2 = minmax_boxes[:,2]
146 |     y2 = minmax_boxes[:,3]
147 |     
148 |     centroid_boxes[:,0] = (x1 + x2) / 2
149 |     centroid_boxes[:,1] = (y1 + y2) / 2
150 |     centroid_boxes[:,2] = x2 - x1
151 |     centroid_boxes[:,3] = y2 - y1
152 |     return centroid_boxes
153 | 
154 | def to_minmax(centroid_boxes):
155 |     centroid_boxes = centroid_boxes.astype(np.float)
156 |     minmax_boxes = np.zeros_like(centroid_boxes)
157 |     
158 |     cx = centroid_boxes[:,0]
159 |     cy = centroid_boxes[:,1]
160 |     w = centroid_boxes[:,2]
161 |     h = centroid_boxes[:,3]
162 |     
163 |     minmax_boxes[:,0] = cx - w/2
164 |     minmax_boxes[:,1] = cy - h/2
165 |     minmax_boxes[:,2] = cx + w/2
166 |     minmax_boxes[:,3] = cy + h/2
167 |     return minmax_boxes
168 | 
169 | def create_anchor_boxes(anchors):
170 |     """
171 |     # Args
172 |         anchors : list of floats
173 |     # Returns
174 |         boxes : array, shape of (len(anchors)/2, 4)
175 |             centroid-type
176 |     """
177 |     boxes = []
178 |     n_boxes = int(len(anchors)/2)
179 |     for i in range(n_boxes):
180 |         boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))
181 |     return np.array(boxes)
182 | 
183 | def find_match_box(centroid_box, centroid_boxes):
184 |     """Find the index of the boxes with the largest overlap among the N-boxes.
185 |     # Args
186 |         box : array, shape of (1, 4)
187 |         boxes : array, shape of (N, 4)
188 |     
189 |     # Return
190 |         match_index : int
191 |     """
192 |     match_index = -1
193 |     max_iou     = -1
194 |     
195 |     for i, box in enumerate(centroid_boxes):
196 |         iou = centroid_box_iou(centroid_box, box)
197 |         
198 |         if max_iou < iou:
199 |             match_index = i
200 |             max_iou     = iou
201 |     return match_index
202 | 


--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 | 


--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/run_video_file.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
  3 | # SPDX-License-Identifier: MIT
  4 | 
  5 | """
  6 | Face keypoint detection demo that takes a video file, runs inference on each frame producing
  7 | bounding boxes and five keypoints on detected faces, and saves the processed video.
  8 | 
  9 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite --video_file_path ../samples/test_s.mp4 
 10 | 
 11 | """
 12 | 
 13 | import os
 14 | import sys
 15 | import time
 16 | script_dir = os.path.dirname(__file__)
 17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
 18 | 
 19 | import cv2
 20 | import numpy as np
 21 | from tqdm import tqdm
 22 | from argparse import ArgumentParser
 23 | 
 24 | from yolov2 import yolo_processing, yolo_resize_factor
 25 | from utils import dict_labels
 26 | from cv_utils import init_video_file_capture, resize_with_aspect_ratio, preprocess, preprocess_array
 27 | from network_executor import ArmnnNetworkExecutor
 28 | import pyarmnn as ann
 29 | 
 30 | def process_faces(frame, detections, executor_kp, resize_factor):
 31 |     kpts_list = []
 32 | 
 33 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 34 | 
 35 |     for detection in detections:
 36 |         box = detection[1].copy()
 37 |         for i in range(len(box)):
 38 |             box[i] = int(box[i] * resize_factor)
 39 | 
 40 |         x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]
 41 |         frame_height, frame_width = frame.shape[:2]
 42 |         x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
 43 |         # Ensure box stays within the frame
 44 |         x_min, y_min = max(0, x_min), max(0, y_min)
 45 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 46 | 
 47 |         face_img = frame[y_min:y_max, x_min:x_max]
 48 |         face_img = cv2.resize(face_img, (128, 128)) 
 49 | 
 50 |         face_img = face_img.astype(np.float32)
 51 |         face_img = preprocess_array(face_img)
 52 | 
 53 |         input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img])
 54 | 
 55 |         plist = executor_kp.run(input_tensors)[0][0]
 56 | 
 57 |         le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
 58 |         re = (x + int(plist[2] * w), y + int(plist[3] * h))
 59 |         n = (x + int(plist[4] * w), y + int(plist[5] * h))
 60 |         lm = (x + int(plist[6] * w), y + int(plist[7] * h))
 61 |         rm = (x + int(plist[8] * w), y + int(plist[9] * h))
 62 |         kpts = [le, re, n, lm, rm]
 63 | 
 64 |         kpts_list.append(kpts)
 65 | 
 66 |     return kpts_list
 67 | 
 68 | def draw_result(frame: np.ndarray, detections: list, resize_factor, kpts):
 69 |     """
 70 |     Draws bounding boxes around detected objects and adds a label and confidence score.
 71 | 
 72 |     Args:
 73 |         frame: The original captured frame from video source.
 74 |         detections: A list of detected objects in the form [class, [box positions], confidence].
 75 |         resize_factor: Resizing factor to scale box coordinates to output frame size.
 76 |         kpts: List containing information about face keypoints in format [[le, re, n, lm, rm], [le, re, n, lm, rm], ...]
 77 |     """
 78 |     for i in range(len(detections)):
 79 |         class_idx, box, confidence = [d for d in detections[i]]
 80 |         label, color = 'Person', (0, 255, 0)
 81 | 
 82 |         # Obtain frame size and resized bounding box positions
 83 |         frame_height, frame_width = frame.shape[:2]
 84 |         x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
 85 | 
 86 |         # Ensure box stays within the frame
 87 |         x_min, y_min = max(0, x_min), max(0, y_min)
 88 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 89 | 
 90 |         # Draw bounding box around detected object
 91 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 92 | 
 93 |         # Create label for detected object class
 94 |         label = f'{label} {confidence * 100:.1f}%'
 95 |         label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
 96 | 
 97 |         # Make sure label always stays on-screen
 98 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
 99 | 
100 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
101 |         lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
102 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
103 | 
104 |         # Add label and confidence value
105 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
106 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
107 |                     label_color, 1, cv2.LINE_AA)
108 | 
109 |         for kpt in kpts[i]:
110 |             cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (0, 0, 255), 5)
111 | 
112 | 
113 | def main(args):
114 |     video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_keypoint_demo')
115 |     frame_num = len(frame_count)
116 | 
117 |     executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
118 |     executor_kp = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)    
119 | 
120 |     process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
121 | 
122 |     times = []
123 | 
124 |     for _ in tqdm(frame_count, desc='Processing frames'):
125 |         frame_present, frame = video.read()
126 |         if not frame_present:
127 |             continue
128 | 
129 |         input_tensors = preprocess(frame, executor_fd.input_binding_info)
130 | 
131 |         start_time = time.time() # start time of the loop
132 |         output_result = executor_fd.run(input_tensors)
133 |         detections = process_output(output_result)
134 |         kpts = process_faces(frame, detections, executor_kp, resize_factor)
135 |         end_time = (time.time() - start_time)*1000
136 | 
137 |         draw_result(frame, detections, resize_factor, kpts)
138 |         times.append(end_time)
139 |         video_writer.write(frame)
140 | 
141 |     print('Finished processing frames')
142 |     video.release(), video_writer.release()
143 | 
144 |     print("Average time(ms): ", sum(times)//frame_num) 
145 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
146 | 
147 | if __name__ == '__main__':
148 |     parser = ArgumentParser()
149 |     parser.add_argument('--video_file_path', required=True, type=str,
150 |                         help='Path to the video file to run object detection on')
151 | 
152 |     parser.add_argument('--first_model_file_path', required=True, type=str,
153 |                         help='Path to the first stage model to use')
154 |     parser.add_argument('--second_model_file_path', required=True, type=str,
155 |                         help='Path to the second stage model to use')
156 | 
157 |     parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
158 |                         help='Takes the preferred backends in preference order, separated by whitespace, '
159 |                              'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
160 |                              'Defaults to [CpuAcc, CpuRef]')
161 |     args = parser.parse_args()
162 |     main(args)
163 | 


--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/run_video_stream.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
  3 | # SPDX-License-Identifier: MIT
  4 | 
  5 | """
  6 | Face keypoint detection demo that takes a video file, takes a video stream from a device, runs inference
  7 | on each frame producing bounding boxes and five keypoints on detected faces, and displays a window with the latest processed frame.
  8 | 
  9 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite
 10 | 
 11 | """
 12 | 
 13 | import os
 14 | import sys
 15 | import time
 16 | script_dir = os.path.dirname(__file__)
 17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
 18 | 
 19 | import cv2
 20 | import numpy as np
 21 | from tqdm import tqdm
 22 | from argparse import ArgumentParser
 23 | 
 24 | from yolov2 import yolo_processing, yolo_resize_factor
 25 | 
 26 | from cv_utils import init_video_stream_capture, resize_with_aspect_ratio, preprocess, preprocess_array
 27 | from network_executor import ArmnnNetworkExecutor
 28 | import pyarmnn as ann
 29 | 
 30 | def process_faces(frame, detections, executor_kp, resize_factor):
 31 |     kpts_list = []
 32 | 
 33 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 34 | 
 35 |     for detection in detections:
 36 |         box = detection[1].copy()
 37 |         for i in range(len(box)):
 38 |             box[i] = int(box[i] * resize_factor)
 39 | 
 40 |         x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]
 41 |         frame_height, frame_width = frame.shape[:2]
 42 |         x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
 43 |         # Ensure box stays within the frame
 44 |         x_min, y_min = max(0, x_min), max(0, y_min)
 45 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 46 | 
 47 |         face_img = frame[y_min:y_max, x_min:x_max]
 48 |         face_img = cv2.resize(face_img, (128, 128)) 
 49 | 
 50 |         face_img = face_img.astype(np.float32)
 51 |         face_img = preprocess_array(face_img)
 52 | 
 53 |         input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img])
 54 | 
 55 |         plist = executor_kp.run(input_tensors)[0][0]
 56 | 
 57 |         le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
 58 |         re = (x + int(plist[2] * w), y + int(plist[3] * h))
 59 |         n = (x + int(plist[4] * w), y + int(plist[5] * h))
 60 |         lm = (x + int(plist[6] * w), y + int(plist[7] * h))
 61 |         rm = (x + int(plist[8] * w), y + int(plist[9] * h))
 62 |         kpts = [le, re, n, lm, rm]
 63 | 
 64 |         kpts_list.append(kpts)
 65 | 
 66 |     return kpts_list
 67 | 
 68 | def draw_result(frame: np.ndarray, detections: list, resize_factor, kpts):
 69 |     """
 70 |     Draws bounding boxes around detected objects and adds a label and confidence score.
 71 | 
 72 |     Args:
 73 |         frame: The original captured frame from video source.
 74 |         detections: A list of detected objects in the form [class, [box positions], confidence].
 75 |         resize_factor: Resizing factor to scale box coordinates to output frame size.
 76 |         kpts: List containing information about face keypoints in format [[le, re, n, lm, rm], [le, re, n, lm, rm], ...]
 77 |     """
 78 | 
 79 |     for i in range(len(detections)):
 80 |         class_idx, box, confidence = [d for d in detections[i]]
 81 |         label, color = 'Person', (0, 255, 0)
 82 | 
 83 |         # Obtain frame size and resized bounding box positions
 84 |         frame_height, frame_width = frame.shape[:2]
 85 |         x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
 86 | 
 87 |         # Ensure box stays within the frame
 88 |         x_min, y_min = max(0, x_min), max(0, y_min)
 89 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 90 | 
 91 |         # Draw bounding box around detected object
 92 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 93 | 
 94 |         # Create label for detected object class
 95 |         label = f'{label} {confidence * 100:.1f}%'
 96 |         label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
 97 | 
 98 |         # Make sure label always stays on-screen
 99 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
100 | 
101 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
102 |         lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
103 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
104 | 
105 |         # Add label and confidence value
106 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
107 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
108 |                     label_color, 1, cv2.LINE_AA)
109 | 
110 |         for kpt in kpts[i]:
111 |             cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (0, 0, 255), 5)
112 | 
113 | def main(args):
114 |     video = init_video_stream_capture(args.video_source)
115 | 
116 |     executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
117 |     executor_kp = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)    
118 | 
119 |     process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
120 | 
121 |     while True:
122 | 
123 |         frame_present, frame = video.read()
124 |         frame = cv2.flip(frame, 1)  # Horizontally flip the frame
125 |         if not frame_present:
126 |             raise RuntimeError('Error reading frame from video stream')
127 |         input_tensors = preprocess(frame, executor_fd.input_binding_info)
128 |         print("Running inference...")
129 | 
130 |         start_time = time.time() # start time of the loop
131 |         output_result = executor_fd.run(input_tensors)
132 |         detections = process_output(output_result)
133 |         kpts = process_faces(frame, detections, executor_kp, resize_factor)
134 | 
135 |         print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
136 |         print("Time(ms): ", (time.time() - start_time)*1000) 
137 | 
138 |         draw_result(frame, detections, resize_factor, kpts)
139 |         cv2.imshow('PyArmNN Object Detection Demo', frame)
140 | 
141 |         if cv2.waitKey(1) == 27:
142 |             print('\nExit key activated. Closing video...')
143 |             break
144 |     video.release(), cv2.destroyAllWindows()
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     parser = ArgumentParser()
149 |     parser.add_argument('--video_source', type=int, default=0,
150 |                         help='Device index to access video stream. Defaults to primary device camera at index 0')
151 | 
152 |     parser.add_argument('--first_model_file_path', required=True, type=str,
153 |                         help='Path to the first stage model to use')
154 |     parser.add_argument('--second_model_file_path', required=True, type=str,
155 |                         help='Path to the second stage model to use')
156 | 
157 |     parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
158 |                         help='Takes the preferred backends in preference order, separated by whitespace, '
159 |                              'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
160 |                              'Defaults to [CpuAcc, CpuRef]')
161 |     args = parser.parse_args()
162 |     main(args)
163 | 


--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/yolov2.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | """
  5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models.
  6 | """
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes
 11 | 
 12 | 
 13 | def yolo_processing(netout):
 14 |     anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]
 15 |     nms_threshold=0.2
 16 |     """Convert Yolo network output to bounding box
 17 | 
 18 |     # Args
 19 |         netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
 20 |             YOLO neural network output array
 21 | 
 22 |     # Returns
 23 |         boxes : array, shape of (N, 4)
 24 |             coordinate scale is normalized [0, 1]
 25 |         probs : array, shape of (N, nb_classes)
 26 |     """
 27 |     netout = netout[0].reshape(7,7,5,6)
 28 |     grid_h, grid_w, nb_box = netout.shape[:3]
 29 |     boxes = []
 30 | 
 31 |     # decode the output by the network
 32 |     netout[..., 4]  = _sigmoid(netout[..., 4])
 33 |     netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
 34 |     netout[..., 5:] *= netout[..., 5:] > 0.3
 35 | 
 36 |     for row in range(grid_h):
 37 |         for col in range(grid_w):
 38 |             for b in range(nb_box):
 39 |                 # from 4th element onwards are confidence and class classes
 40 |                 classes = netout[row,col,b,5:]
 41 |                 
 42 |                 if np.sum(classes) > 0:
 43 |                     # first 4 elements are x, y, w, and h
 44 |                     x, y, w, h = netout[row,col,b,:4]
 45 | 
 46 |                     x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
 47 |                     y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
 48 |                     w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
 49 |                     h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
 50 |                     confidence = netout[row,col,b,4]
 51 |                     box = BoundBox(x, y, w, h, confidence, classes)
 52 |                     boxes.append(box)
 53 | 
 54 |     boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)
 55 |     boxes, probs = boxes_to_array(boxes)
 56 |     #print(boxes)
 57 |     predictions = []
 58 |     def _to_original_scale(boxes):
 59 |         minmax_boxes = to_minmax(boxes)
 60 |         minmax_boxes[:,0] *= 224
 61 |         minmax_boxes[:,2] *= 224
 62 |         minmax_boxes[:,1] *= 224
 63 |         minmax_boxes[:,3] *= 224
 64 |         return minmax_boxes.astype(np.int)
 65 | 
 66 |     if len(boxes) > 0:
 67 |         boxes = _to_original_scale(boxes)
 68 | 
 69 |         for i in range(len(boxes)):
 70 |             predictions.append([0, boxes[i], probs[i][0]])
 71 | 
 72 |     return predictions
 73 | 
 74 | def _sigmoid(x):
 75 |     return 1. / (1. + np.exp(-x))
 76 | 
 77 | def _softmax(x, axis=-1, t=-100.):
 78 |     x = x - np.max(x)
 79 |     if np.min(x) < t:
 80 |         x = x/np.min(x)*t
 81 |     e_x = np.exp(x)
 82 |     return e_x / e_x.sum(axis, keepdims=True)
 83 | 
 84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):
 85 |     """
 86 |     Gets a multiplier to scale the bounding box positions to
 87 |     their correct position in the frame.
 88 | 
 89 |     Args:
 90 |         video: Video capture object, contains information about data source.
 91 |         input_binding_info: Contains shape of model input layer.
 92 | 
 93 |     Returns:
 94 |         Resizing factor to scale box coordinates to output frame size.
 95 |     """
 96 |     frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
 97 |     frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
 98 |     model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
 99 |     return max(frame_height, frame_width) / max(model_height, model_width)
100 | 


--------------------------------------------------------------------------------
/examples/armnn/face_recognition/README.md:
--------------------------------------------------------------------------------
 1 | # PyArmNN Face recognition Sample Application
 2 | 
 3 | ## Introduction
 4 | This sample application guides the user and shows how to perform face recognition using PyArmNN API. 
 5 | 
 6 | The application takes three models and video file or camera feed as input, runs inference on each frame producing bounding boxes and ID numbers corresponding to entries in database.
 7 | 
 8 | ## Database population
 9 | 
10 | Before we can run face recognition, we need to exctract features from faces we want to recognize and save the features embedding vectors in encoded form in .json file, which serves as a small database. You can do that with calcuate_features.py.
11 | 
12 | Example usage:
13 | 
14 | ```bash
15 | python3 calculate_features.py --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite --db_file_path database.db --id 0 --name Paul --picture_file_path paul.png
16 | ```
17 | 
18 | ## Face recognition from Video File
19 | Face recognition demo that takes a video file, runs inference on each frame producing
20 | bounding boxes and ID numbers corresponding to entries in database, and saves the processed video.
21 | 
22 | Example usage:
23 | 
24 | ```bash
25 | python3 run_video_file.py --video_file_path test_s.mp4 --db_file_path database.db --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite 
26 | ```
27 | 
28 | ## Face recognition from Video Stream
29 | 
30 | Face recognition demo that takes a video stream from a device, runs inference
31 | on each frame producing bounding boxes and ID numbers corresponding to entries in database,
32 | and displays a window with the latest processed frame.
33 | 
34 | Example usage:
35 | 
36 | ```bash
37 | DISPLAY=:0 python3 run_video_stream.py --db_file_path database.db --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite 
38 | ```
39 | 
40 | This application has been verified to work against the YOLOv2 detection layer MobileNet models, MobileFaceNet keypoints detector and MobileFaceNet face feature embedding extractor which can be downloaded from:
41 | 
42 | WIP
43 | 


--------------------------------------------------------------------------------
/examples/armnn/face_recognition/box.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | # Todo : BoundBox & its related method extraction
  6 | class BoundBox:
  7 |     def __init__(self, x, y, w, h, c = None, classes = None):
  8 |         self.x     = x
  9 |         self.y     = y
 10 |         self.w     = w
 11 |         self.h     = h
 12 |         
 13 |         self.c     = c
 14 |         self.classes = classes
 15 | 
 16 |     def get_label(self):
 17 |         return np.argmax(self.classes)
 18 |     
 19 |     def get_score(self):
 20 |         return self.classes[self.get_label()]
 21 |     
 22 |     def iou(self, bound_box):
 23 |         b1 = self.as_centroid()
 24 |         b2 = bound_box.as_centroid()
 25 |         return centroid_box_iou(b1, b2)
 26 | 
 27 |     def as_centroid(self):
 28 |         return np.array([self.x, self.y, self.w, self.h])
 29 |     
 30 | 
 31 | def boxes_to_array(bound_boxes):
 32 |     """
 33 |     # Args
 34 |         boxes : list of BoundBox instances
 35 |     
 36 |     # Returns
 37 |         centroid_boxes : (N, 4)
 38 |         probs : (N, nb_classes)
 39 |     """
 40 |     centroid_boxes = []
 41 |     probs = []
 42 |     for box in bound_boxes:
 43 |         centroid_boxes.append([box.x, box.y, box.w, box.h])
 44 |         probs.append(box.classes)
 45 |     return np.array(centroid_boxes), np.array(probs)
 46 | 
 47 | 
 48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):
 49 |     """
 50 |     # Args
 51 |         boxes : list of BoundBox
 52 |     
 53 |     # Returns
 54 |         boxes : list of BoundBox
 55 |             non maximum supressed BoundBox instances
 56 |     """
 57 |     # suppress non-maximal boxes
 58 |     for c in range(n_classes):
 59 |         sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
 60 | 
 61 |         for i in range(len(sorted_indices)):
 62 |             index_i = sorted_indices[i]
 63 |             
 64 |             if boxes[index_i].classes[c] == 0: 
 65 |                 continue
 66 |             else:
 67 |                 for j in range(i+1, len(sorted_indices)):
 68 |                     index_j = sorted_indices[j]
 69 | 
 70 |                     if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:
 71 |                         boxes[index_j].classes[c] = 0
 72 |     # remove the boxes which are less likely than a obj_threshold
 73 |     boxes = [box for box in boxes if box.get_score() > obj_threshold]
 74 |     return boxes
 75 | 
 76 | 
 77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):
 78 |     img_size = min(image.shape[:2])
 79 |     if img_size < desired_size:
 80 |         scale_factor = float(desired_size) / img_size
 81 |     else:
 82 |         scale_factor = 1.0
 83 |     
 84 |     h, w = image.shape[:2]
 85 |     img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))
 86 |     if boxes != []:
 87 |         boxes_scaled = boxes*scale_factor
 88 |         boxes_scaled = boxes_scaled.astype(np.int)
 89 |     else:
 90 |         boxes_scaled = boxes
 91 |     return draw_boxes(img_scaled, boxes_scaled, probs, labels)
 92 |         
 93 | 
 94 | def draw_boxes(image, boxes, probs, labels):
 95 |     for box, classes in zip(boxes, probs):
 96 |         x1, y1, x2, y2 = box
 97 |         cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)
 98 |         cv2.putText(image, 
 99 |                     '{}:  {:.2f}'.format(labels[np.argmax(classes)], classes.max()), 
100 |                     (x1, y1 - 13), 
101 |                     cv2.FONT_HERSHEY_SIMPLEX, 
102 |                     1e-3 * image.shape[0], 
103 |                     (0,255,0), 2)
104 |     return image        
105 | 
106 | 
107 | def centroid_box_iou(box1, box2):
108 |     def _interval_overlap(interval_a, interval_b):
109 |         x1, x2 = interval_a
110 |         x3, x4 = interval_b
111 |     
112 |         if x3 < x1:
113 |             if x4 < x1:
114 |                 return 0
115 |             else:
116 |                 return min(x2,x4) - x1
117 |         else:
118 |             if x2 < x3:
119 |                 return 0
120 |             else:
121 |                 return min(x2,x4) - x3
122 |     
123 |     _, _, w1, h1 = box1.reshape(-1,)
124 |     _, _, w2, h2 = box2.reshape(-1,)
125 |     x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)
126 |     x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)
127 |             
128 |     intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])
129 |     intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])
130 |     intersect = intersect_w * intersect_h
131 |     union = w1 * h1 + w2 * h2 - intersect
132 |     
133 |     return float(intersect) / union
134 | 
135 | 
136 | def to_centroid(minmax_boxes):
137 |     """
138 |     minmax_boxes : (N, 4)
139 |     """
140 |     minmax_boxes = minmax_boxes.astype(np.float)
141 |     centroid_boxes = np.zeros_like(minmax_boxes)
142 |     
143 |     x1 = minmax_boxes[:,0]
144 |     y1 = minmax_boxes[:,1]
145 |     x2 = minmax_boxes[:,2]
146 |     y2 = minmax_boxes[:,3]
147 |     
148 |     centroid_boxes[:,0] = (x1 + x2) / 2
149 |     centroid_boxes[:,1] = (y1 + y2) / 2
150 |     centroid_boxes[:,2] = x2 - x1
151 |     centroid_boxes[:,3] = y2 - y1
152 |     return centroid_boxes
153 | 
154 | def to_minmax(centroid_boxes):
155 |     centroid_boxes = centroid_boxes.astype(np.float)
156 |     minmax_boxes = np.zeros_like(centroid_boxes)
157 |     
158 |     cx = centroid_boxes[:,0]
159 |     cy = centroid_boxes[:,1]
160 |     w = centroid_boxes[:,2]
161 |     h = centroid_boxes[:,3]
162 |     
163 |     minmax_boxes[:,0] = cx - w/2
164 |     minmax_boxes[:,1] = cy - h/2
165 |     minmax_boxes[:,2] = cx + w/2
166 |     minmax_boxes[:,3] = cy + h/2
167 |     return minmax_boxes
168 | 
169 | def create_anchor_boxes(anchors):
170 |     """
171 |     # Args
172 |         anchors : list of floats
173 |     # Returns
174 |         boxes : array, shape of (len(anchors)/2, 4)
175 |             centroid-type
176 |     """
177 |     boxes = []
178 |     n_boxes = int(len(anchors)/2)
179 |     for i in range(n_boxes):
180 |         boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))
181 |     return np.array(boxes)
182 | 
183 | def find_match_box(centroid_box, centroid_boxes):
184 |     """Find the index of the boxes with the largest overlap among the N-boxes.
185 |     # Args
186 |         box : array, shape of (1, 4)
187 |         boxes : array, shape of (N, 4)
188 |     
189 |     # Return
190 |         match_index : int
191 |     """
192 |     match_index = -1
193 |     max_iou     = -1
194 |     
195 |     for i, box in enumerate(centroid_boxes):
196 |         iou = centroid_box_iou(centroid_box, box)
197 |         
198 |         if max_iou < iou:
199 |             match_index = i
200 |             max_iou     = iou
201 |     return match_index
202 | 


--------------------------------------------------------------------------------
/examples/armnn/face_recognition/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 | scikit_image=>0.18.3


--------------------------------------------------------------------------------
/examples/armnn/face_recognition/yolov2.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | """
  5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models.
  6 | """
  7 | 
  8 | import cv2
  9 | import numpy as np
 10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes
 11 | 
 12 | 
 13 | def yolo_processing(netout):
 14 |     anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]
 15 |     nms_threshold=0.2
 16 |     """Convert Yolo network output to bounding box
 17 | 
 18 |     # Args
 19 |         netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
 20 |             YOLO neural network output array
 21 | 
 22 |     # Returns
 23 |         boxes : array, shape of (N, 4)
 24 |             coordinate scale is normalized [0, 1]
 25 |         probs : array, shape of (N, nb_classes)
 26 |     """
 27 |     netout = netout[0].reshape(7,7,5,6)
 28 |     grid_h, grid_w, nb_box = netout.shape[:3]
 29 |     boxes = []
 30 | 
 31 |     # decode the output by the network
 32 |     netout[..., 4]  = _sigmoid(netout[..., 4])
 33 |     netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
 34 |     netout[..., 5:] *= netout[..., 5:] > 0.3
 35 | 
 36 |     for row in range(grid_h):
 37 |         for col in range(grid_w):
 38 |             for b in range(nb_box):
 39 |                 # from 4th element onwards are confidence and class classes
 40 |                 classes = netout[row,col,b,5:]
 41 |                 
 42 |                 if np.sum(classes) > 0:
 43 |                     # first 4 elements are x, y, w, and h
 44 |                     x, y, w, h = netout[row,col,b,:4]
 45 | 
 46 |                     x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
 47 |                     y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
 48 |                     w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
 49 |                     h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
 50 |                     confidence = netout[row,col,b,4]
 51 |                     box = BoundBox(x, y, w, h, confidence, classes)
 52 |                     boxes.append(box)
 53 | 
 54 |     boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)
 55 |     boxes, probs = boxes_to_array(boxes)
 56 |     #print(boxes)
 57 |     predictions = []
 58 |     def _to_original_scale(boxes):
 59 |         minmax_boxes = to_minmax(boxes)
 60 |         minmax_boxes[:,0] *= 224
 61 |         minmax_boxes[:,2] *= 224
 62 |         minmax_boxes[:,1] *= 224
 63 |         minmax_boxes[:,3] *= 224
 64 |         return minmax_boxes.astype(np.int)
 65 | 
 66 |     if len(boxes) > 0:
 67 |         boxes = _to_original_scale(boxes)
 68 | 
 69 |         for i in range(len(boxes)):
 70 |             predictions.append([0, boxes[i], probs[i][0]])
 71 | 
 72 |     return predictions
 73 | 
 74 | def _sigmoid(x):
 75 |     return 1. / (1. + np.exp(-x))
 76 | 
 77 | def _softmax(x, axis=-1, t=-100.):
 78 |     x = x - np.max(x)
 79 |     if np.min(x) < t:
 80 |         x = x/np.min(x)*t
 81 |     e_x = np.exp(x)
 82 |     return e_x / e_x.sum(axis, keepdims=True)
 83 | 
 84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):
 85 |     """
 86 |     Gets a multiplier to scale the bounding box positions to
 87 |     their correct position in the frame.
 88 | 
 89 |     Args:
 90 |         video: Video capture object, contains information about data source.
 91 |         input_binding_info: Contains shape of model input layer.
 92 | 
 93 |     Returns:
 94 |         Resizing factor to scale box coordinates to output frame size.
 95 |     """
 96 |     frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
 97 |     frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
 98 |     model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
 99 |     return max(frame_height, frame_width) / max(model_height, model_width)
100 | 


--------------------------------------------------------------------------------
/examples/edge_impulse/multi_stage_inference_vehicle_type/multi_stage.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import cv2
  4 | import os
  5 | import sys, getopt
  6 | import signal
  7 | import time
  8 | from edge_impulse_linux.image import ImageImpulseRunner
  9 | 
 10 | show_camera = True
 11 | 
 12 | def draw_result(frame, class_name, bb, confidence):
 13 |     """
 14 |     Draws bounding boxes around detected objects and adds a label and confidence score.
 15 |     Args:
 16 |         frame: The original captured frame from video source.
 17 |         detections: A list of detected objects in the form [class, [box positions], confidence].
 18 |         resize_factor: Resizing factor to scale box coordinates to output frame size.
 19 |         face_data: List containing information about age and gender
 20 |     """
 21 |     color = (255, 0, 0)
 22 | 
 23 |     x_min, y_min, x_max, y_max = bb['x'], bb['y'], bb['x']+ bb['width'], bb['y']+ bb['height']
 24 | 
 25 |     # Draw bounding box around detected object
 26 |     cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 27 | 
 28 |     # Create label for detected object class
 29 |     label = "{}, {}".format(class_name, confidence)
 30 |     label_color = (255, 255, 255)
 31 | 
 32 |     # Make sure label always stays on-screen
 33 |     x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
 34 | 
 35 |     lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
 36 |     lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
 37 |     lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
 38 | 
 39 |     # Add label and confidence value
 40 |     cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
 41 |     cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
 42 | 
 43 | 
 44 | def now():
 45 |     return round(time.time() * 1000)
 46 | 
 47 | def get_webcams():
 48 |     port_ids = []
 49 |     for port in range(5):
 50 |         print("Looking for a camera in port %s:" %port)
 51 |         camera = cv2.VideoCapture(port)
 52 |         if camera.isOpened():
 53 |             ret = camera.read()[0]
 54 |             if ret:
 55 |                 backendName =camera.getBackendName()
 56 |                 w = camera.get(3)
 57 |                 h = camera.get(4)
 58 |                 print("Camera %s (%s x %s) found in port %s " %(backendName,h,w, port))
 59 |                 port_ids.append(port)
 60 |             camera.release()
 61 |     return port_ids
 62 | 
 63 | def sigint_handler(sig, frame):
 64 |     print('Interrupted')
 65 |     if (runner):
 66 |         runner.stop()
 67 |     sys.exit(0)
 68 | 
 69 | signal.signal(signal.SIGINT, sigint_handler)
 70 | 
 71 | def help():
 72 |     print('python classify.py <path_to_model.eim> <Camera port ID, only required when more than 1 camera is present>')
 73 | 
 74 | def main(argv):
 75 |     try:
 76 |         opts, args = getopt.getopt(argv, "h", ["--help"])
 77 |     except getopt.GetoptError:
 78 |         help()
 79 |         sys.exit(2)
 80 | 
 81 |     for opt, arg in opts:
 82 |         if opt in ('-h', '--help'):
 83 |             help()
 84 |             sys.exit()
 85 | 
 86 |     if len(args) == 0:
 87 |         help()
 88 |         sys.exit(2)
 89 | 
 90 |     def get_path(model_name):
 91 | 
 92 |         dir_path = os.path.dirname(os.path.realpath(__file__))
 93 |         modelfile = os.path.join(dir_path, model_name)
 94 |         print('MODEL: ' + modelfile)
 95 |         return modelfile
 96 | 
 97 |     detection_model = get_path(args[0])
 98 |     classification_model = get_path(args[1])
 99 | 
100 |     with ImageImpulseRunner(detection_model) as detection_runner, ImageImpulseRunner(classification_model) as classification_runner:
101 | 
102 |         detection_model_info = detection_runner.init()
103 |         classification_model_info = classification_runner.init()
104 | 
105 |         print('Loaded detection model runner for "' + detection_model_info['project']['owner'] + ' / ' + detection_model_info['project']['name'] + '"')
106 |         detection_labels = detection_model_info['model_parameters']['labels']
107 | 
108 |         print('Loaded detection model runner for "' + classification_model_info['project']['owner'] + ' / ' + classification_model_info['project']['name'] + '"')
109 |         classification_labels = classification_model_info['model_parameters']['labels']
110 | 
111 |         class_model_input_height = classification_model_info['model_parameters']['image_input_height']
112 |         class_model_input_width = classification_model_info['model_parameters']['image_input_width']
113 | 
114 |         if len(args)>= 3:
115 |             videoCaptureDeviceId = int(args[2])
116 |         else:
117 |             port_ids = get_webcams()
118 |             if len(port_ids) == 0:
119 |                 raise Exception('Cannot find any webcams')
120 |             if len(args)<= 1 and len(port_ids)> 1:
121 |                 raise Exception("Multiple cameras found. Add the camera port ID as a second argument to use to this script")
122 |             videoCaptureDeviceId = int(port_ids[0])
123 | 
124 |         camera = cv2.VideoCapture(videoCaptureDeviceId)
125 | 
126 |         ret = camera.read()[0]
127 |         if ret:
128 |             backendName = camera.getBackendName()
129 |             w = camera.get(3)
130 |             h = camera.get(4)
131 |             print("Camera %s (%s x %s) in port %s selected." %(backendName,h,w, videoCaptureDeviceId))
132 |             camera.release()
133 |         else:
134 |             raise Exception("Couldn't initialize selected camera.")
135 | 
136 |         for det_res, img in detection_runner.classifier(videoCaptureDeviceId):
137 |             print('Found %d bounding boxes (%d ms.)' % (len(det_res["result"]["bounding_boxes"]), det_res['timing']['dsp'] + det_res['timing']['classification']))
138 |             for bb in det_res["result"]["bounding_boxes"]:
139 |                 print('%s (%.2f): x=%d y=%d w=%d h=%d\n' % (bb['label'], bb['value'], bb['x'], bb['y'], bb['width'], bb['height']))
140 | 
141 |                 cropped_img = img[bb['y']:bb['y']+bb['height'], bb['x']:bb['x']+bb['width']]
142 |                 resized_img = cv2.resize(cropped_img, (class_model_input_width, class_model_input_height))
143 | 
144 |                 features, cropped = classification_runner.get_features_from_image(resized_img)
145 | 
146 |                 # the image will be resized and cropped, save a copy of the picture here
147 |                 # so you can see what's being passed into the classifier
148 |                 #cv2.imwrite('debug.jpg', cropped)
149 | 
150 |                 class_res = classification_runner.classify(features)
151 | 
152 |                 if "classification" in class_res["result"].keys():
153 |                     print('Classification result (%d ms.) \n' % (class_res['timing']['dsp'] + class_res['timing']['classification']), end='')
154 |                     top_score = 0
155 |                     top_label = ''
156 | 
157 |                     for label in classification_labels:
158 |                         score = class_res['result']['classification'][label]
159 |                         print('%s: %.2f\n' % (label, score), end='')
160 |                         if score >= top_score:
161 |                             top_score = score
162 |                             top_label = label
163 | 
164 |                     print('----------------------\n', flush=True)
165 |                     print('Top result: %s with confidence %.2f\n' % (top_label, top_score), end='')
166 |                     print('----------------------\n', flush=True)
167 | 
168 |                 draw_result(img, top_label, bb, top_score)
169 | 
170 |             if (show_camera):
171 |                 cv2.imshow('edgeimpulse', img)
172 |                 if cv2.waitKey(1) == ord('q'):
173 |                     break
174 | 
175 |         detection_runner.stop()
176 |         classification_runner.stop()
177 | 
178 | if __name__ == "__main__":
179 |    main(sys.argv[1:])


--------------------------------------------------------------------------------
/examples/mediapipe/README.md:
--------------------------------------------------------------------------------
 1 | # MediaPipe Sample Applications
 2 | 
 3 | ## Introduction
 4 | Google MediaPipe offers ready-to-use yet customizable Python solutions as a prebuilt Python package. 
 5 | 
 6 | We provide example scripts for performing inference from video file and video stream with `run_video_file.py` and `run_video_stream.py`. For detailed instructions execute ```run_video_file.py --help``` or ```run_video_stream.py --help```
 7 | 
 8 | ## Prerequisites
 9 | 
10 | ##### MediaPipe
11 | 
12 | Before proceeding to the next steps, make sure that you have successfully installed the MediaPipe on your system by following the instructions in the README.
13 | 
14 | You can verify that MediaPipe library is installed using:
15 | ```bash
16 | $ pip3 show mediapipe
17 | ```
18 | 
19 | ##### Dependencies
20 | 
21 | Install the following libraries on your system:
22 | ```bash
23 | sudo apt install ffmpeg python3-opencv
24 | ```
25 | 
26 | Create a virtual environment:
27 | ```bash
28 | python3 -m venv devenv --system-site-packages
29 | source devenv/bin/activate
30 | ```
31 | 
32 | ### Python bindings for 32bit version
33 | 
34 | ```
35 | pip3 install mediapipe-rpi4 tqdm
36 | ```
37 | 
38 | ### Python bindings for 64bit version
39 | 
40 | Pre-built wheels for Python 3.7 64bit OS were not available at the moment of writing of this article, so we compiled and shared them ourselves.
41 | 
42 | ```
43 | wget https://files.seeedstudio.com/ml/mediapipe/mediapipe-0.8-cp37-cp37m-linux_aarch64.whl
44 | pip3 install mediapipe-0.8-cp37-cp37m-linux_aarch64.whl
45 | pip3 install tqdm
46 | ```
47 | 


--------------------------------------------------------------------------------
/examples/mediapipe/common/cv_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
  2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
  3 | # SPDX-License-Identifier: MIT
  4 | 
  5 | """
  6 | This file contains helper functions for reading video/image data and
  7 |  pre/postprocessing of video/image data using OpenCV.
  8 | """
  9 | 
 10 | import os
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | 
 15 | def count_frames_manual(video):
 16 |     # initialize the total number of frames read
 17 |     total = 0
 18 |     # loop over the frames of the video
 19 |     while True:
 20 |         # grab the current frame
 21 |         (grabbed, frame) = video.read()
 22 |         
 23 |         # check to see if we have reached the end of the
 24 |         # video
 25 |         if not grabbed:
 26 |             break
 27 |         # increment the total number of frames read
 28 |         total += 1
 29 |     # return the total number of frames in the video file
 30 |     return total
 31 | 
 32 | def create_video_writer(video: cv2.VideoCapture, video_path: str, name: str):
 33 |     """
 34 |     Creates a video writer object to write processed frames to file.
 35 | 
 36 |     Args:
 37 |         video: Video capture object, contains information about data source.
 38 |         video_path: User-specified video file path.
 39 |         output_path: Optional path to save the processed video.
 40 | 
 41 |     Returns:
 42 |         Video writer object.
 43 |     """
 44 |     _, ext = os.path.splitext(video_path)
 45 | 
 46 |     i, filename = 0, os.path.join(str(), f'{name}{ext}')
 47 | 
 48 |     while os.path.exists(filename):
 49 |         i += 1
 50 |         filename = os.path.join(str(), f'{name}({i}){ext}')
 51 |     print(filename)
 52 |     video_writer = cv2.VideoWriter(filename=filename,
 53 |                                    fourcc=get_source_encoding_int(video),
 54 |                                    fps=int(video.get(cv2.CAP_PROP_FPS)),
 55 |                                    frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
 56 |                                               int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
 57 |     return video_writer
 58 | 
 59 | 
 60 | def init_video_file_capture(video_path: str, name: str):
 61 |     """
 62 |     Creates a video capture object from a video file.
 63 | 
 64 |     Args:
 65 |         video_path: User-specified video file path.
 66 |         output_path: Optional path to save the processed video.
 67 | 
 68 |     Returns:
 69 |         Video capture object to capture frames, video writer object to write processed
 70 |         frames to file, plus total frame count of video source to iterate through.
 71 |     """
 72 |     if not os.path.exists(video_path):
 73 |         raise FileNotFoundError(f'Video file not found for: {video_path}')
 74 | 
 75 |     video = cv2.VideoCapture(video_path)
 76 |     if not video.isOpened:
 77 |         raise RuntimeError(f'Failed to open video capture from file: {video_path}')
 78 | 
 79 |     video_writer = create_video_writer(video, video_path, name)
 80 | 
 81 |     iter_frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
 82 | 
 83 |     return video, video_writer, range(iter_frame_count)
 84 | 
 85 | 
 86 | def init_video_stream_capture(video_source: int):
 87 |     """
 88 |     Creates a video capture object from a device.
 89 | 
 90 |     Args:
 91 |         video_source: Device index used to read video stream.
 92 | 
 93 |     Returns:
 94 |         Video capture object used to capture frames from a video stream.
 95 |     """
 96 |     video = cv2.VideoCapture(video_source)
 97 |     if not video.isOpened:
 98 |         raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')
 99 |     print('Processing video stream. Press \'Esc\' key to exit the demo.')
100 |     return video
101 | 
102 | def get_source_encoding_int(video_capture):
103 |     return int(video_capture.get(cv2.CAP_PROP_FOURCC))
104 | 


--------------------------------------------------------------------------------
/examples/mediapipe/face_detection/run_video_file.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from tqdm import tqdm
14 | from argparse import ArgumentParser
15 | from cv_utils import init_video_file_capture
16 | 
17 | mp_drawing = mp.solutions.drawing_utils
18 | mp_face_detection = mp.solutions.face_detection
19 | 
20 | def main(args):
21 |     video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_detection_demo')
22 |     frame_num = len(frame_count)
23 | 
24 |     times = []
25 | 
26 |     with mp_face_detection.FaceDetection(model_selection=args.model_selection,
27 |          min_detection_confidence=args.min_detection_confidence) as face_detection:
28 | 
29 |       for _ in tqdm(frame_count, desc='Processing frames'):
30 |         frame_present, frame = video.read()
31 |         if not frame_present:
32 |             continue
33 | 
34 |         # Flip the image horizontally for a later selfie-view display, and convert
35 |         # the BGR image to RGB.
36 |         image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
37 |         # To improve performance, optionally mark the image as not writeable to
38 |         # pass by reference.
39 |         image.flags.writeable = False
40 | 
41 |         start_time = time.time()
42 |         results = face_detection.process(image)
43 |         end_time = (time.time() - start_time)*1000
44 | 
45 |         # Draw the face mesh annotations on the image.
46 |         image.flags.writeable = True
47 |         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
48 |         if results.detections:
49 |             for detection in results.detections:
50 |                 mp_drawing.draw_detection(image, detection)
51 | 
52 |         times.append(end_time)
53 |         video_writer.write(image)
54 | 
55 |     print('Finished processing frames')
56 |     video.release(), video_writer.release()
57 | 
58 |     print("Average time(ms): ", sum(times)//frame_num) 
59 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
60 | 
61 | if __name__ == '__main__':
62 |     parser = ArgumentParser()
63 |     parser.add_argument('--video_file_path', required=True, type=str,
64 |                         help='Path to the video file to run object detection on')
65 | 
66 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
67 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
68 | 
69 |     parser.add_argument('--model_selection', default=1, type=int,
70 |                         help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
71 | 
72 |     args = parser.parse_args()
73 |     main(args)
74 | 


--------------------------------------------------------------------------------
/examples/mediapipe/face_detection/run_video_stream.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 | 
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_face_detection = mp.solutions.face_detection
18 | 
19 | def main(args):
20 |     video = init_video_stream_capture(args.video_source)
21 | 
22 |     with mp_face_detection.FaceDetection(model_selection=args.model_selection,
23 |          min_detection_confidence=args.min_detection_confidence) as face_detection:
24 | 
25 |         while True:
26 | 
27 |             frame_present, frame = video.read()
28 |             if not frame_present:
29 |                 raise RuntimeError('Error reading frame from video stream')
30 | 
31 |             # Flip the image horizontally for a later selfie-view display, and convert
32 |             # the BGR image to RGB.
33 |             image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
34 |             # To improve performance, optionally mark the image as not writeable to
35 |             # pass by reference.
36 |             image.flags.writeable = False
37 | 
38 |             start_time = time.time()
39 |             results = face_detection.process(image)
40 |             end_time = (time.time() - start_time)*1000
41 | 
42 |             print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
43 |             print("Time(ms): ", (time.time() - start_time)*1000) 
44 | 
45 |             # Draw the face mesh annotations on the image.
46 |             image.flags.writeable = True
47 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
48 |             if results.detections:
49 |                 for detection in results.detections:
50 |                     mp_drawing.draw_detection(image, detection)
51 | 
52 |             cv2.imshow('MediaPipe Face Detection Demo', image)
53 | 
54 |             if cv2.waitKey(1) == 27:
55 |                 print('\nExit key activated. Closing video...')
56 |                 break
57 | 
58 |     video.release(), cv2.destroyAllWindows()
59 | 
60 | if __name__ == '__main__':
61 |     parser = ArgumentParser()
62 |     parser.add_argument('--video_source', type=int, default=0,
63 |                         help='Device index to access video stream. Defaults to primary device camera at index 0')
64 | 
65 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
66 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
67 | 
68 |     parser.add_argument('--model_selection', default=1, type=int,
69 |                         help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
70 | 
71 |     args = parser.parse_args()
72 |     main(args)


--------------------------------------------------------------------------------
/examples/mediapipe/face_mesh/run_video_file.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | import numpy as np
14 | from tqdm import tqdm
15 | from argparse import ArgumentParser
16 | from cv_utils import init_video_file_capture
17 | 
18 | mp_drawing = mp.solutions.drawing_utils
19 | mp_face_mesh = mp.solutions.face_mesh
20 | 
21 | def main(args):
22 |     video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_mesh_demo')
23 |     frame_num = len(frame_count)
24 |     print(frame_count)
25 |     drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
26 | 
27 |     times = []
28 | 
29 |     with mp_face_mesh.FaceMesh(min_detection_confidence=args.min_detection_confidence, 
30 |                               min_tracking_confidence=args.min_tracking_confidence) as face_mesh:
31 | 
32 |       for _ in tqdm(frame_count, desc='Processing frames'):
33 |           frame_present, frame = video.read()
34 |           if not frame_present:
35 |               continue
36 | 
37 |           # Flip the image horizontally for a later selfie-view display, and convert
38 |           # the BGR image to RGB.
39 |           image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
40 |           # To improve performance, optionally mark the image as not writeable to
41 |           # pass by reference.
42 |           image.flags.writeable = False
43 | 
44 |           start_time = time.time()
45 |           results = face_mesh.process(image)
46 |           end_time = (time.time() - start_time)*1000
47 | 
48 |           # Draw the face mesh annotations on the image.
49 |           image.flags.writeable = True
50 |           image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
51 |           if results.multi_face_landmarks:
52 |             for face_landmarks in results.multi_face_landmarks:
53 |               mp_drawing.draw_landmarks(
54 |                   image=image,
55 |                   landmark_list=face_landmarks,
56 |                   connections=mp_face_mesh.FACE_CONNECTIONS,
57 |                   landmark_drawing_spec=drawing_spec,
58 |                   connection_drawing_spec=drawing_spec)
59 | 
60 |           times.append(end_time)
61 |           video_writer.write(image)
62 | 
63 |     print('Finished processing frames')
64 |     video.release(), video_writer.release()
65 | 
66 |     print("Average time(ms): ", sum(times)//frame_num) 
67 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
68 | 
69 | if __name__ == '__main__':
70 |     parser = ArgumentParser()
71 |     parser.add_argument('--video_file_path', required=True, type=str,
72 |                         help='Path to the video file to run object detection on')
73 | 
74 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
75 |                         help='Path to the first stage model to use')
76 |     parser.add_argument('--min_tracking_confidence', default=0.5, type=float,
77 |                         help='Path to the second stage model to use')
78 | 
79 |     args = parser.parse_args()
80 |     main(args)
81 | 


--------------------------------------------------------------------------------
/examples/mediapipe/face_mesh/run_video_stream.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 | 
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_face_mesh = mp.solutions.face_mesh
18 | 
19 | def main(args):
20 |     video = init_video_stream_capture(args.video_source)
21 |     drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
22 | 
23 |     with mp_face_mesh.FaceMesh(min_detection_confidence=args.min_detection_confidence, 
24 |                               min_tracking_confidence=args.min_tracking_confidence,
25 |                               static_image_mode = False) as face_mesh:
26 | 
27 |         while True:
28 | 
29 |             frame_present, frame = video.read()
30 |             if not frame_present:
31 |                 raise RuntimeError('Error reading frame from video stream')
32 | 
33 |             # Flip the image horizontally for a later selfie-view display, and convert
34 |             # the BGR image to RGB.
35 |             image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
36 |             # To improve performance, optionally mark the image as not writeable to
37 |             # pass by reference.
38 |             image.flags.writeable = False
39 | 
40 |             start_time = time.time()
41 |             results = face_mesh.process(image)
42 |             end_time = (time.time() - start_time)*1000
43 | 
44 |             print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
45 |             print("Time(ms): ", (time.time() - start_time)*1000) 
46 | 
47 |             # Draw the face mesh annotations on the image.
48 |             image.flags.writeable = True
49 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
50 |             if results.multi_face_landmarks:
51 |                 for face_landmarks in results.multi_face_landmarks:
52 |                     mp_drawing.draw_landmarks(
53 |                         image=image,
54 |                         landmark_list=face_landmarks,
55 |                         connections=mp_face_mesh.FACE_CONNECTIONS,
56 |                         landmark_drawing_spec=drawing_spec,
57 |                         connection_drawing_spec=drawing_spec)
58 | 
59 |             cv2.imshow('MediaPipe Face Mesh Demo', image)
60 | 
61 |             if cv2.waitKey(1) == 27:
62 |                 print('\nExit key activated. Closing video...')
63 |                 break
64 | 
65 |     video.release(), cv2.destroyAllWindows()
66 | 
67 | if __name__ == '__main__':
68 |     parser = ArgumentParser()
69 |     parser.add_argument('--video_source', type=int, default=0,
70 |                         help='Device index to access video stream. Defaults to primary device camera at index 0')
71 | 
72 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
73 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
74 |     parser.add_argument('--min_tracking_confidence', default=0.5, type=float,
75 |                         help='Minimum confidence value ([0.0, 1.0]) from the landmark-tracking model for the face landmarks to be considered tracked successfully, or otherwise face detection will be invoked automatically on the next input image.')
76 | 
77 |     args = parser.parse_args()
78 |     main(args)


--------------------------------------------------------------------------------
/examples/mediapipe/hand_landmarks/run_video_file.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from tqdm import tqdm
14 | from argparse import ArgumentParser
15 | from cv_utils import init_video_file_capture
16 | 
17 | mp_drawing = mp.solutions.drawing_utils
18 | mp_drawing_styles = mp.solutions.drawing_styles
19 | mp_hands =  mp.solutions.hands
20 | 
21 | def main(args):
22 |     video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'hand_landmarks_demo')
23 |     frame_num = len(frame_count)
24 | 
25 |     times = []
26 | 
27 |     with mp_hands.Hands(model_complexity=args.model_selection,
28 |             min_detection_confidence=args.min_detection_confidence,
29 |             min_tracking_confidence=0.5) as hands:
30 | 
31 |       for _ in tqdm(frame_count, desc='Processing frames'):
32 |         frame_present, frame = video.read()
33 |         if not frame_present:
34 |             continue
35 | 
36 |         # Flip the image horizontally for a later selfie-view display, and convert
37 |         # the BGR image to RGB.
38 |         image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
39 |         # To improve performance, optionally mark the image as not writeable to
40 |         # pass by reference.
41 |         image.flags.writeable = False
42 | 
43 |         start_time = time.time()
44 |         results = hands.process(image)
45 |         end_time = (time.time() - start_time)*1000
46 | 
47 |         # Draw the hand landmarks annotations on the image.
48 |         image.flags.writeable = True
49 |         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
50 |         if results.multi_hand_landmarks:
51 |             for hand_landmarks in results.multi_hand_landmarks:
52 |                 mp_drawing.draw_landmarks(
53 |                     image,
54 |                     hand_landmarks,
55 |                     mp_hands.HAND_CONNECTIONS,
56 |                     mp_drawing_styles.get_default_hand_landmarks_style(),
57 |                     mp_drawing_styles.get_default_hand_connections_style())
58 | 
59 |         times.append(end_time)
60 |         video_writer.write(image)
61 | 
62 |     print('Finished processing frames')
63 |     video.release(), video_writer.release()
64 | 
65 |     print("Average time(ms): ", sum(times)//frame_num) 
66 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
67 | 
68 | if __name__ == '__main__':
69 |     parser = ArgumentParser()
70 |     parser.add_argument('--video_file_path', required=True, type=str,
71 |                         help='Path to the video file to run object detection on')
72 | 
73 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
74 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
75 | 
76 |     parser.add_argument('--model_selection', default=0, type=int,
77 |                         help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
78 | 
79 |     args = parser.parse_args()
80 |     main(args)
81 | 


--------------------------------------------------------------------------------
/examples/mediapipe/hand_landmarks/run_video_stream.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 | 
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_drawing_styles = mp.solutions.drawing_styles
18 | mp_hands =  mp.solutions.hands
19 | 
20 | def main(args):
21 |     video = init_video_stream_capture(args.video_source)
22 | 
23 |     with mp_hands.Hands(model_complexity=args.model_selection,
24 |             min_detection_confidence=args.min_detection_confidence,
25 |             min_tracking_confidence=0.5) as hands:
26 | 
27 |         while True:
28 | 
29 |             frame_present, frame = video.read()
30 |             if not frame_present:
31 |                 raise RuntimeError('Error reading frame from video stream')
32 | 
33 |             # Flip the image horizontally for a later selfie-view display, and convert
34 |             # the BGR image to RGB.
35 |             image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
36 |             # To improve performance, optionally mark the image as not writeable to
37 |             # pass by reference.
38 |             image.flags.writeable = False
39 | 
40 |             start_time = time.time()
41 |             results = hands.process(image)
42 |             end_time = (time.time() - start_time)*1000
43 | 
44 |             print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
45 |             print("Time(ms): ", (time.time() - start_time)*1000) 
46 | 
47 |             # Draw the hand landmarks annotations on the image.
48 |             image.flags.writeable = True
49 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
50 |             if results.multi_hand_landmarks:
51 |                 for hand_landmarks in results.multi_hand_landmarks:
52 |                     mp_drawing.draw_landmarks(
53 |                         image,
54 |                         hand_landmarks,
55 |                         mp_hands.HAND_CONNECTIONS,
56 |                         mp_drawing_styles.get_default_hand_landmarks_style(),
57 |                         mp_drawing_styles.get_default_hand_connections_style())
58 | 
59 |             cv2.imshow('MediaPipe Hands', image)
60 | 
61 |             if cv2.waitKey(1) == 27:
62 |                 print('\nExit key activated. Closing video...')
63 |                 break
64 | 
65 |     video.release(), cv2.destroyAllWindows()
66 | 
67 | if __name__ == '__main__':
68 |     parser = ArgumentParser()
69 |     parser.add_argument('--video_source', type=int, default=0,
70 |                         help='Device index to access video stream. Defaults to primary device camera at index 0')
71 | 
72 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
73 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
74 | 
75 |     parser.add_argument('--model_selection', default=0, type=int,
76 |                         help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
77 | 
78 |     args = parser.parse_args()
79 |     main(args)


--------------------------------------------------------------------------------
/examples/mediapipe/pose_estimation/run_video_file.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from tqdm import tqdm
14 | from argparse import ArgumentParser
15 | from cv_utils import init_video_file_capture
16 | 
17 | mp_drawing = mp.solutions.drawing_utils
18 | mp_pose = mp.solutions.pose
19 | 
20 | def main(args):
21 |     video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'pose_estimation_demo')
22 |     frame_num = len(frame_count)
23 | 
24 |     times = []
25 | 
26 |     with mp_pose.Pose(min_detection_confidence=args.min_detection_confidence, 
27 |                               model_complexity=args.model_complexity,
28 |                               static_image_mode = False) as pose:
29 | 
30 |       for _ in tqdm(frame_count, desc='Processing frames'):
31 |         frame_present, frame = video.read()
32 |         if not frame_present:
33 |             continue
34 | 
35 |         # Flip the image horizontally for a later selfie-view display, and convert
36 |         # the BGR image to RGB.
37 |         image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
38 |         # To improve performance, optionally mark the image as not writeable to
39 |         # pass by reference.
40 |         image.flags.writeable = False
41 | 
42 |         start_time = time.time()
43 |         results = pose.process(image)
44 |         end_time = (time.time() - start_time)*1000
45 | 
46 |         # Draw the face mesh annotations on the image.
47 |         image.flags.writeable = True
48 |         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
49 |         mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
50 | 
51 |         times.append(end_time)
52 |         video_writer.write(image)
53 | 
54 |     print('Finished processing frames')
55 |     video.release(), video_writer.release()
56 | 
57 |     print("Average time(ms): ", sum(times)//frame_num) 
58 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
59 | 
60 | if __name__ == '__main__':
61 |     parser = ArgumentParser()
62 |     parser.add_argument('--video_file_path', required=True, type=str,
63 |                         help='Path to the video file to run object detection on')
64 | 
65 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
66 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
67 | 
68 |     parser.add_argument('--model_complexity', default=0, type=int,
69 |                         help='Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1')
70 | 
71 |     args = parser.parse_args()
72 |     main(args)
73 | 


--------------------------------------------------------------------------------
/examples/mediapipe/pose_estimation/run_video_stream.py:
--------------------------------------------------------------------------------
 1 | # Based on MediPipe Example Scripts. All rights reserved.
 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
 3 | # SPDX-License-Identifier: MIT
 4 | 
 5 | import os
 6 | import sys
 7 | import time
 8 | script_dir = os.path.dirname(__file__)
 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 | 
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 | 
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_pose = mp.solutions.pose
18 | 
19 | def main(args):
20 |     video = init_video_stream_capture(args.video_source)
21 | 
22 |     with mp_pose.Pose(min_detection_confidence=args.min_detection_confidence, 
23 |                               model_complexity=args.model_complexity,
24 |                               static_image_mode = False) as pose:
25 | 
26 |         while True:
27 | 
28 |             frame_present, frame = video.read()
29 |             if not frame_present:
30 |                 raise RuntimeError('Error reading frame from video stream')
31 | 
32 |             # Flip the image horizontally for a later selfie-view display, and convert
33 |             # the BGR image to RGB.
34 |             image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
35 |             # To improve performance, optionally mark the image as not writeable to
36 |             # pass by reference.
37 |             image.flags.writeable = False
38 | 
39 |             start_time = time.time()
40 |             results = pose.process(image)
41 |             end_time = (time.time() - start_time)*1000
42 | 
43 |             print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
44 |             print("Time(ms): ", (time.time() - start_time)*1000) 
45 | 
46 |             # Draw the face mesh annotations on the image.
47 |             image.flags.writeable = True
48 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
49 |             mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
50 | 
51 |             cv2.imshow('MediaPipe Pose Estimation Demo', image)
52 | 
53 |             if cv2.waitKey(1) == 27:
54 |                 print('\nExit key activated. Closing video...')
55 |                 break
56 | 
57 |     video.release(), cv2.destroyAllWindows()
58 | 
59 | if __name__ == '__main__':
60 |     parser = ArgumentParser()
61 |     parser.add_argument('--video_source', type=int, default=0,
62 |                         help='Device index to access video stream. Defaults to primary device camera at index 0')
63 | 
64 |     parser.add_argument('--min_detection_confidence', default=0.5, type=float,
65 |                         help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
66 | 
67 |     parser.add_argument('--model_complexity', default=0, type=int,
68 |                         help='Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1')
69 | 
70 |     args = parser.parse_args()
71 |     main(args)


--------------------------------------------------------------------------------
/examples/sample_files/cars.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/cars.mp4


--------------------------------------------------------------------------------
/examples/sample_files/test_dance.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/test_dance.mp4


--------------------------------------------------------------------------------
/examples/sample_files/test_s.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/test_s.mp4


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/README.md:
--------------------------------------------------------------------------------
 1 | # TensorFlow Lite Face Recognition Multi-stage Demo
 2 | 
 3 | ## Introduction
 4 | 
 5 | This demo allows for face recognition from either a video stream or a video file. The face embeddings need to be calculated and saved to a database with calculate_features.py before any of the two examples can be run.
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | Install the dependecnies with
10 | ```
11 | pip3 install -r requirements.txt
12 | ```
13 | Make sure you have the necessary system packages for OpenCV to work properly.
14 | ```
15 | sudo apt-get install libatlas-base-dev libjasper-dev libqtgui4 python3-pyqt5 libqt4-test libilmbase-dev libopenexr-dev libgstreamer1.0-dev libavcodec58 libavformat58 libswscale5
16 | ```
17 | 
18 | ## Usage
19 | 
20 | ### Database population
21 | 
22 | Before we can run face recognition, we need to exctract features from faces we want to recognize and save the features embedding vectors in encoded form in .json file, which serves as a small database. You can do that with calcuate_features.py.
23 | 
24 | ```
25 | python calculate_features.py --help
26 | OpenCV version: 4.5.3
27 | usage: calculate_features.py [-h] --first_stage FIRST_STAGE --second_stage
28 |                              SECOND_STAGE --third_stage THIRD_STAGE
29 |                              [--db_file DB_FILE] --img_file IMG_FILE [--id ID]
30 |                              [--name NAME]
31 | 
32 | optional arguments:
33 |   -h, --help            show this help message and exit
34 |   --first_stage FIRST_STAGE
35 |                         File path of .tflite file. (default: None)
36 |   --second_stage SECOND_STAGE
37 |                         File path of .tflite file. (default: None)
38 |   --third_stage THIRD_STAGE
39 |                         File path of .tflite file. (default: None)
40 |   --db_file DB_FILE     File path to database (default: database.db)
41 |   --img_file IMG_FILE   File path to picture (default: None)
42 |   --id ID               Path to the video file to run object detection on
43 |                         (default: 0)
44 |   --name NAME           Path to the video file to run object detection on
45 |                         (default: John Doe)
46 | ```
47 | For example, to extract a single face embedding vector of Barrack Hussein Obama's face you can run:
48 | ```
49 | python calculate_features.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite --img_file obama.jpg --name Obama --id 0
50 | ```
51 | 
52 | ### Face Recognition from Video File
53 | 
54 | Once you have a database with at least one face embedding recorded you can try it on a video file, that contains people's faces. Mainly this is used for testing and benchmarking purposes.
55 | 
56 | Example:
57 | ```
58 | python multi_stage_file.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite --file ../../sample_files/test_s.mp4
59 | ```
60 | 
61 | ### Face Recognition from Video Stream
62 | 
63 | Finally, for actual application purpose you can use multi_stage_stream.py script. It can get video stream either from OpenCV or picamera, if executed on Raspberry Pi with picamera connected. 
64 | 
65 | Example:
66 | ```
67 | python multi_stage_stream.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite
68 | ```
69 | The output will be served on a Flask web-server on port 5000. This is done in order to simplify testing and running of an application on headless systems.


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/base_camera.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import threading
  3 | try:
  4 |     from greenlet import getcurrent as get_ident
  5 | except ImportError:
  6 |     try:
  7 |         from thread import get_ident
  8 |     except ImportError:
  9 |         from _thread import get_ident
 10 | 
 11 | 
 12 | class CameraEvent(object):
 13 |     """An Event-like class that signals all active clients when a new frame is
 14 |     available.
 15 |     """
 16 |     def __init__(self):
 17 |         self.events = {}
 18 | 
 19 |     def wait(self):
 20 |         """Invoked from each client's thread to wait for the next frame."""
 21 |         ident = get_ident()
 22 |         if ident not in self.events:
 23 |             # this is a new client
 24 |             # add an entry for it in the self.events dict
 25 |             # each entry has two elements, a threading.Event() and a timestamp
 26 |             self.events[ident] = [threading.Event(), time.time()]
 27 |         return self.events[ident][0].wait()
 28 | 
 29 |     def set(self):
 30 |         """Invoked by the camera thread when a new frame is available."""
 31 |         now = time.time()
 32 |         remove = None
 33 |         for ident, event in self.events.items():
 34 |             if not event[0].isSet():
 35 |                 # if this client's event is not set, then set it
 36 |                 # also update the last set timestamp to now
 37 |                 event[0].set()
 38 |                 event[1] = now
 39 |             else:
 40 |                 # if the client's event is already set, it means the client
 41 |                 # did not process a previous frame
 42 |                 # if the event stays set for more than 5 seconds, then assume
 43 |                 # the client is gone and remove it
 44 |                 if now - event[1] > 5:
 45 |                     remove = ident
 46 |         if remove:
 47 |             del self.events[remove]
 48 | 
 49 |     def clear(self):
 50 |         """Invoked from each client's thread after a frame was processed."""
 51 |         self.events[get_ident()][0].clear()
 52 | 
 53 | 
 54 | class BaseCamera(object):
 55 |     thread = None  # background thread that reads frames from camera
 56 |     frame = None  # current frame is stored here by background thread
 57 |     last_access = 0  # time of last client access to the camera
 58 |     event = CameraEvent()
 59 | 
 60 |     def __init__(self):
 61 |         """Start the background camera thread if it isn't running yet."""
 62 |         if BaseCamera.thread is None:
 63 |             BaseCamera.last_access = time.time()
 64 | 
 65 |             # start background frame thread
 66 |             BaseCamera.thread = threading.Thread(target=self._thread)
 67 |             BaseCamera.thread.start()
 68 | 
 69 |             # wait until frames are available
 70 |             while self.get_frame() is None:
 71 |                 time.sleep(0)
 72 | 
 73 |     def get_frame(self):
 74 |         """Return the current camera frame."""
 75 |         BaseCamera.last_access = time.time()
 76 | 
 77 |         # wait for a signal from the camera thread
 78 |         BaseCamera.event.wait()
 79 |         BaseCamera.event.clear()
 80 | 
 81 |         return BaseCamera.frame
 82 | 
 83 |     @staticmethod
 84 |     def frames():
 85 |         """"Generator that returns frames from the camera."""
 86 |         raise RuntimeError('Must be implemented by subclasses.')
 87 | 
 88 |     @classmethod
 89 |     def _thread(cls):
 90 |         """Camera background thread."""
 91 |         print('Starting camera thread.')
 92 |         frames_iterator = cls.frames()
 93 |         for frame in frames_iterator:
 94 |             BaseCamera.frame = frame
 95 |             BaseCamera.event.set()  # send signal to clients
 96 |             time.sleep(0)
 97 | 
 98 |             # if there hasn't been any clients asking for frames in
 99 |             # the last 10 seconds then stop the thread
100 |             if time.time() - BaseCamera.last_access > 10:
101 |                 frames_iterator.close()
102 |                 print('Stopping camera thread due to inactivity.')
103 |                 break
104 |         BaseCamera.thread = None
105 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/calculate_features.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | import numpy as np
  4 | import skimage 
  5 | import skimage.transform
  6 | import json, base64
  7 | 
  8 | from cv_utils import decode_yolov3, preprocess
  9 | from tflite_runtime.interpreter import Interpreter
 10 | 
 11 | FACE_ANCHORS = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
 12 |                [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]]
 13 | 
 14 | IMG_SHAPE = (128, 128) # in HW form 
 15 | offset_x = 0
 16 | offset_y = -15
 17 | src = np.array([(44+offset_x, 59+offset_y),
 18 |                 (84+offset_x, 59+offset_y),
 19 |                 (64+offset_x, 82+offset_y),
 20 |                 (47+offset_x, 105),
 21 |                 (81+offset_x, 105)], dtype=np.float32)
 22 | 
 23 | def write_db(db, id, name, vector):
 24 | 
 25 |     vector = base64.b64encode(vector).decode('utf-8')
 26 | 
 27 |     entry = {"name": name, "vector": vector}
 28 |     db[id] = entry
 29 |     print(db)
 30 |     f = open('database.db','w')
 31 |     entry = json.dumps(db)
 32 |     f.write(entry)
 33 |     f.close()
 34 | 
 35 |     return db
 36 | 
 37 | def read_db(db_path = 'database.db'):
 38 |     try:
 39 |         f = open(db_path, 'r')
 40 |     except FileNotFoundError:
 41 |         clear_db(db_path)
 42 |         f = open(db_path, 'r')
 43 | 
 44 |     content = f.read()
 45 |     #print(content)
 46 |     if content:
 47 |         db = json.loads(content)
 48 |     f.close()
 49 |     return db
 50 | 
 51 | def clear_db(db_path = 'database.db'):
 52 | 
 53 |     f = open(db_path,'w')
 54 |     db = {}
 55 |     content = json.dumps(db)
 56 |     f.write(content)
 57 |     f.close()
 58 | 
 59 | def draw_bounding_boxes(frame, detections, labels=['face'], kpts = None):
 60 | 
 61 |     def _to_original_scale(boxes, frame_height, frame_width):
 62 |         minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
 63 | 
 64 |         cx = boxes[0] * frame_width
 65 |         cy = boxes[1] * frame_height
 66 |         w = boxes[2] * frame_width
 67 |         h = boxes[3] * frame_height
 68 |         
 69 |         minmax_boxes[0] = cx - w/2
 70 |         minmax_boxes[1] = cy - h/2
 71 |         minmax_boxes[2] = cx + w/2
 72 |         minmax_boxes[3] = cy + h/2
 73 | 
 74 |         return minmax_boxes
 75 | 
 76 |     color = (0, 255, 0)
 77 |     label_color = (125, 125, 125)
 78 | 
 79 |     for i in range(len(detections)):
 80 |         class_idx, box, confidence = [d for d in detections[i]]
 81 | 
 82 |         # Obtain frame size and resized bounding box positions
 83 |         frame_height, frame_width = frame.shape[:2]
 84 | 
 85 |         x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
 86 |         # Ensure box stays within the frame
 87 |         x_min, y_min = max(0, x_min), max(0, y_min)
 88 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 89 | 
 90 |         # Draw bounding box around detected object
 91 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 92 | 
 93 |         # Create label for detected object class
 94 |         label = labels[class_idx].capitalize() 
 95 |         label = f'{label} {confidence * 100:.1f}%'
 96 | 
 97 |         # Make sure label always stays on-screen
 98 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
 99 | 
100 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
101 |         lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
102 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
103 | 
104 |         # Add label and confidence value
105 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
106 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
107 |                     label_color, 1, cv2.LINE_AA)
108 | 
109 |         for kpt_set in kpts:
110 |             for kpt in kpt_set:
111 |                 cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2)
112 | 
113 | def process_faces(frame, detections, db, id, name):
114 |     kpts_list = []
115 | 
116 |     def _to_original_scale(boxes, frame_height, frame_width):
117 |         minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
118 | 
119 |         cx = boxes[0] * frame_width
120 |         cy = boxes[1] * frame_height
121 |         w = boxes[2] * frame_width
122 |         h = boxes[3] * frame_height
123 |         
124 |         minmax_boxes[0] = cx - w/2
125 |         minmax_boxes[1] = cy - h/2
126 |         minmax_boxes[2] = cx + w/2
127 |         minmax_boxes[3] = cy + h/2
128 | 
129 |         return minmax_boxes
130 | 
131 |     for i in range(len(detections)):
132 |         _, box, _ = [d for d in detections[i]]
133 | 
134 |         # Obtain frame size and resized bounding box positions
135 |         frame_height, frame_width = frame.shape[:2]
136 | 
137 |         x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
138 |         # Ensure box stays within the frame
139 |         x_min, y_min = max(0, x_min), max(0, y_min)
140 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
141 | 
142 |         x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
143 | 
144 |         face_img = frame[y_min:y_max, x_min:x_max]
145 | 
146 |         plist = second_stage_network.run(face_img)[0]
147 | 
148 |         le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
149 |         re = (x + int(plist[2] * w), y + int(plist[3] * h+5))
150 |         n = (x + int(plist[4] * w), y + int(plist[5] * h))
151 |         lm = (x + int(plist[6] * w), y + int(plist[7] * h))
152 |         rm = (x + int(plist[8] * w), y + int(plist[9] * h))
153 |         kpts = [le, re, n, lm, rm]
154 |         kpts_list.append(kpts)
155 |         kpts = np.array(kpts, dtype = np.float32)
156 | 
157 |         transformer = skimage.transform.SimilarityTransform() 
158 |         transformer.estimate(kpts, src) 
159 |         M = transformer.params[0: 2, : ] 
160 |         warped_img = cv2.warpAffine(frame, M, (IMG_SHAPE[1], IMG_SHAPE[0]), borderValue = 0.0) 
161 | 
162 |         features = third_stage_network.run(warped_img)[0]
163 | 
164 |         write_db(db, id, name, features)
165 | 
166 |     return kpts_list
167 | 
168 | class NetworkExecutor(object):
169 | 
170 |     def __init__(self, model_file):
171 | 
172 |         self.interpreter = Interpreter(model_file, num_threads=3)
173 |         self.interpreter.allocate_tensors()
174 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
175 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
176 | 
177 |     def get_output_tensors(self):
178 | 
179 |       output_details = self.interpreter.get_output_details()
180 |       tensor_list = []
181 | 
182 |       for output in output_details:
183 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
184 |             tensor_list.append(tensor)
185 | 
186 |       return tensor_list
187 | 
188 |     def run(self, image):
189 |         if image.shape[1:2] != (self.input_height, self.input_width):
190 |             img = cv2.resize(image, (self.input_width, self.input_height))
191 |         img = preprocess(img)
192 |         self.interpreter.set_tensor(self.tensor_index, img)
193 |         self.interpreter.invoke()
194 |         return self.get_output_tensors()
195 | 
196 | def main(args):
197 |     #clear_db()
198 |     db = read_db(args.db_file)
199 | 
200 |     frame = cv2.imread(args.img_file)
201 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
202 | 
203 |     results = first_stage_network.run(frame)
204 |     detections = decode_yolov3(netout = results, nms_threshold = 0.1, 
205 |                               threshold = 0.7, anchors = FACE_ANCHORS)
206 | 
207 |     kpts = process_faces(frame, detections, db, args.id, args.name)
208 | 
209 |     draw_bounding_boxes(frame, detections, ['face'], kpts)
210 |     frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
211 |     cv2.imwrite(args.img_file.split('.')[0]+'_result.jpg', frame)
212 | 
213 | 
214 | if __name__ == "__main__" :
215 | 
216 |     print("OpenCV version: {}".format(cv2. __version__))
217 | 
218 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
219 |     parser.add_argument('--first_stage', help='Path to the YOLOv3 face detection model to use.', required=True)
220 |     parser.add_argument('--second_stage', help='Path to the keypoints detection model to use.', required=True)
221 |     parser.add_argument('--third_stage', help='Path to the feature vector embedding extractor model to use.', required=True)  
222 | 
223 |     parser.add_argument('--db_file', help='File path to database', default="database.db")
224 |     parser.add_argument('--img_file', help='File path to picture', required=True)
225 |     parser.add_argument('--id', default = '0', type=str,
226 |                         help='Unique ID for the face')
227 |     parser.add_argument('--name', default = 'John Doe', type=str,
228 |                         help='Name for the face feature vecotr (can be duplicate)')
229 | 
230 |     args = parser.parse_args()
231 | 
232 |     first_stage_network = NetworkExecutor(args.first_stage)
233 |     second_stage_network = NetworkExecutor(args.second_stage)
234 |     third_stage_network = NetworkExecutor(args.third_stage)
235 | 
236 |     main(args)


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/camera_opencv.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from base_camera import BaseCamera
 3 | 
 4 | 
 5 | class Camera(BaseCamera):
 6 |     video_source = 0
 7 | 
 8 |     @staticmethod
 9 |     def set_video_source(source):
10 |         Camera.video_source = source
11 | 
12 |     @staticmethod
13 |     def frames():
14 |         camera = cv2.VideoCapture(Camera.video_source)
15 |         if not camera.isOpened():
16 |             raise RuntimeError('Could not start camera.')
17 | 
18 |         while True:
19 |             # read current frame
20 |             _, img = camera.read()
21 |             #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 | 
23 |             # return img
24 |             yield img
25 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/camera_pi.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import time
 3 | import picamera
 4 | import picamera.array
 5 | import cv2
 6 | from base_camera import BaseCamera
 7 | 
 8 | 
 9 | class Camera(BaseCamera):
10 |     video_source = 0
11 | 
12 |     @staticmethod
13 |     def set_video_source(source):
14 |         pass
15 | 
16 |     @staticmethod
17 |     def frames():
18 |         with picamera.PiCamera(resolution = (1280,720)) as camera:
19 |             # let camera warm up
20 |             time.sleep(2)
21 | 
22 |             with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 |                 while True:
24 |                 
25 |                     camera.capture(stream, format='bgr', use_video_port=True)
26 |                     # At this point the image is available as stream.array
27 |                     image = stream.array
28 |                     stream.truncate(0)
29 |                     yield image
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/multi_stage_file.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | import numpy as np
  4 | import skimage 
  5 | import skimage.transform
  6 | import json, base64
  7 | import time
  8 | from tqdm import tqdm
  9 | 
 10 | from cv_utils import decode_yolov3, preprocess, init_video_file_capture
 11 | from tflite_runtime.interpreter import Interpreter
 12 | 
 13 | FACE_ANCHORS = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
 14 |                [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]]
 15 | 
 16 | IMG_SHAPE = (128, 128) # in HW form 
 17 | offset_x = 0
 18 | offset_y = -15
 19 | src = np.array([(44+offset_x, 59+offset_y),
 20 |                 (84+offset_x, 59+offset_y),
 21 |                 (64+offset_x, 82+offset_y),
 22 |                 (47+offset_x, 105),
 23 |                 (81+offset_x, 105)], dtype=np.float32)
 24 | 
 25 | def read_db(db_path = 'database.db'):
 26 |     try:
 27 |         f = open(db_path, 'r')
 28 |     except FileNotFoundError:
 29 |         clear_db(db_path)
 30 |         f = open(db_path, 'r')
 31 | 
 32 |     content = f.read()
 33 |     #print(content)
 34 |     if content:
 35 |         db = json.loads(content)
 36 |     f.close()
 37 |     return db
 38 | 
 39 | def clear_db(db_path = 'database.db'):
 40 | 
 41 |     f = open(db_path,'w')
 42 |     db = {}
 43 |     content = json.dumps(db)
 44 |     f.write(content)
 45 |     f.close()
 46 | 
 47 | def draw_bounding_boxes(frame, detections, kpts, ids):
 48 | 
 49 |     def _to_original_scale(boxes, frame_height, frame_width):
 50 |         minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
 51 | 
 52 |         cx = boxes[0] * frame_width
 53 |         cy = boxes[1] * frame_height
 54 |         w = boxes[2] * frame_width
 55 |         h = boxes[3] * frame_height
 56 |         
 57 |         minmax_boxes[0] = cx - w/2
 58 |         minmax_boxes[1] = cy - h/2
 59 |         minmax_boxes[2] = cx + w/2
 60 |         minmax_boxes[3] = cy + h/2
 61 | 
 62 |         return minmax_boxes
 63 | 
 64 |     color = (0, 255, 0)
 65 |     label_color = (125, 125, 125)
 66 | 
 67 |     for i in range(len(detections)):
 68 |         _, box, _ = [d for d in detections[i]]
 69 | 
 70 |         # Obtain frame size and resized bounding box positions
 71 |         frame_height, frame_width = frame.shape[:2]
 72 | 
 73 |         x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
 74 |         # Ensure box stays within the frame
 75 |         x_min, y_min = max(0, x_min), max(0, y_min)
 76 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
 77 | 
 78 |         # Draw bounding box around detected object
 79 |         cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
 80 | 
 81 |         # Create label for detected object class
 82 |         label = 'ID: {} Name: {} {}%'.format(*ids[i])
 83 |         label_color = (255, 255, 255)
 84 | 
 85 |         # Make sure label always stays on-screen
 86 |         x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
 87 | 
 88 |         lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
 89 |         lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
 90 |         lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
 91 | 
 92 |         # Add label and confidence value
 93 |         cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
 94 |         cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
 95 | 
 96 |         for kpt_set in kpts:
 97 |             for kpt in kpt_set:
 98 |                 cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2)
 99 | 
100 | def process_faces(frame, detections, db):
101 |     kpts_list = []
102 |     id_list = []
103 | 
104 |     def _to_original_scale(boxes, frame_height, frame_width):
105 |         minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
106 | 
107 |         cx = boxes[0] * frame_width
108 |         cy = boxes[1] * frame_height
109 |         w = boxes[2] * frame_width
110 |         h = boxes[3] * frame_height
111 |         
112 |         minmax_boxes[0] = cx - w/2
113 |         minmax_boxes[1] = cy - h/2
114 |         minmax_boxes[2] = cx + w/2
115 |         minmax_boxes[3] = cy + h/2
116 | 
117 |         return minmax_boxes
118 | 
119 |     for i in range(len(detections)):
120 |         _, box, _ = [d for d in detections[i]]
121 | 
122 |         # Obtain frame size and resized bounding box positions
123 |         frame_height, frame_width = frame.shape[:2]
124 | 
125 |         x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
126 |         # Ensure box stays within the frame
127 |         x_min, y_min = max(0, x_min), max(0, y_min)
128 |         x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
129 | 
130 |         x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
131 | 
132 |         face_img = frame[y_min:y_max, x_min:x_max]
133 | 
134 |         plist = second_stage_network.run(face_img)[0]
135 | 
136 |         le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
137 |         re = (x + int(plist[2] * w), y + int(plist[3] * h+5))
138 |         n = (x + int(plist[4] * w), y + int(plist[5] * h))
139 |         lm = (x + int(plist[6] * w), y + int(plist[7] * h))
140 |         rm = (x + int(plist[8] * w), y + int(plist[9] * h))
141 |         kpts = [le, re, n, lm, rm]
142 |         kpts_list.append(kpts)
143 |         kpts = np.array(kpts, dtype = np.float32)
144 | 
145 |         transformer = skimage.transform.SimilarityTransform() 
146 |         transformer.estimate(kpts, src) 
147 |         M = transformer.params[0: 2, : ] 
148 |         warped_img = cv2.warpAffine(frame, M, (IMG_SHAPE[1], IMG_SHAPE[0]), borderValue = 0.0) 
149 | 
150 |         features = third_stage_network.run(warped_img)[0]
151 | 
152 |         highest_score = 0
153 | 
154 |         for id in db.keys():
155 |             cos_sim = np.dot(features, db[id]['vector'])/(np.linalg.norm(features)*np.linalg.norm(db[id]['vector']))
156 |             cos_sim /= 2
157 |             cos_sim += 0.5
158 |             cos_sim *= 100
159 |             if highest_score < cos_sim:
160 |                 highest_score = cos_sim
161 |                 recognized_id = id
162 |                 
163 |         if highest_score > 70.0:
164 |             print(recognized_id, db[recognized_id]['name'], highest_score)
165 |             id_list.append([recognized_id, db[recognized_id]['name'], highest_score])
166 |         else:
167 |             id_list.append(['X', '', 0.0])
168 |     return kpts_list, id_list
169 | 
170 | class NetworkExecutor(object):
171 | 
172 |     def __init__(self, model_file):
173 | 
174 |         self.interpreter = Interpreter(model_file, num_threads=3)
175 |         self.interpreter.allocate_tensors()
176 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
177 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
178 | 
179 |     def get_output_tensors(self):
180 | 
181 |       output_details = self.interpreter.get_output_details()
182 |       tensor_indices = []
183 |       tensor_list = []
184 | 
185 |       for output in output_details:
186 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
187 |             tensor_list.append(tensor)
188 | 
189 |       return tensor_list
190 | 
191 |     def run(self, image):
192 |         if image.shape[1:2] != (self.input_height, self.input_width):
193 |             img = cv2.resize(image, (self.input_width, self.input_height))
194 |         img = preprocess(img)
195 |         self.interpreter.set_tensor(self.tensor_index, img)
196 |         self.interpreter.invoke()
197 |         return self.get_output_tensors()
198 | 
199 | def main(args):
200 |     video, video_writer, frame_count = init_video_file_capture(args.file, 'age_gender_demo')
201 | 
202 |     frame_num = len(frame_count)
203 |     times = []
204 | 
205 |     for _ in tqdm(frame_count, desc='Processing frames'):
206 |         frame_present, frame = video.read()
207 |         if not frame_present:
208 |             continue
209 | 
210 |         start_time = time.time()
211 |         
212 |         results = first_stage_network.run(frame)
213 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1,
214 |                                   threshold = args.threshold, anchors = FACE_ANCHORS)
215 |         kpts, ids = process_faces(frame, detections, db)
216 | 
217 |         elapsed_ms = (time.time() - start_time) * 1000
218 | 
219 |         draw_bounding_boxes(frame, detections, kpts, ids)
220 |         times.append(elapsed_ms)
221 |         video_writer.write(frame)
222 | 
223 |     print('Finished processing frames')
224 |     video.release(), video_writer.release()
225 | 
226 |     print("Average time(ms): ", sum(times)//frame_num) 
227 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
228 | 
229 | if __name__ == "__main__" :
230 | 
231 |     print("OpenCV version: {}".format(cv2. __version__))
232 | 
233 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
234 |     parser.add_argument('--first_stage', help='Path to the YOLOv3 face detection model to use.', required=True)
235 |     parser.add_argument('--second_stage', help='Path to the keypoints detection model to use.', required=True)
236 |     parser.add_argument('--third_stage', help='Path to the feature vector embedding extractor model to use.', required=True)  
237 | 
238 |     parser.add_argument('--db_file', help='File path to database', default="database.db") 
239 |     
240 |     parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
241 |     parser.add_argument('--file', help='File path of video file', required=True)
242 |     args = parser.parse_args()
243 | 
244 |     first_stage_network = NetworkExecutor(args.first_stage)
245 |     second_stage_network = NetworkExecutor(args.second_stage)
246 |     third_stage_network = NetworkExecutor(args.third_stage)
247 | 
248 |     db = read_db(args.db_file)
249 |     for item in db:
250 |         db[item]['vector'] = np.frombuffer(base64.b64decode(db[item]['vector']), np.float32)
251 | 
252 |     main(args)
253 |     
254 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 | scikit_image=>0.18.3
4 | opencv-python=>4.5.3
5 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/templates/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>Video Streaming Demonstration</title>
 4 |   </head>
 5 |   <body>
 6 |     <h1>Tflite Face Recognition Inference Demo</h1>
 7 |     <img src="{{ url_for('video_feed') }}">
 8 |   </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/base_camera.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import threading
  3 | try:
  4 |     from greenlet import getcurrent as get_ident
  5 | except ImportError:
  6 |     try:
  7 |         from thread import get_ident
  8 |     except ImportError:
  9 |         from _thread import get_ident
 10 | 
 11 | 
 12 | class CameraEvent(object):
 13 |     """An Event-like class that signals all active clients when a new frame is
 14 |     available.
 15 |     """
 16 |     def __init__(self):
 17 |         self.events = {}
 18 | 
 19 |     def wait(self):
 20 |         """Invoked from each client's thread to wait for the next frame."""
 21 |         ident = get_ident()
 22 |         if ident not in self.events:
 23 |             # this is a new client
 24 |             # add an entry for it in the self.events dict
 25 |             # each entry has two elements, a threading.Event() and a timestamp
 26 |             self.events[ident] = [threading.Event(), time.time()]
 27 |         return self.events[ident][0].wait()
 28 | 
 29 |     def set(self):
 30 |         """Invoked by the camera thread when a new frame is available."""
 31 |         now = time.time()
 32 |         remove = None
 33 |         for ident, event in self.events.items():
 34 |             if not event[0].isSet():
 35 |                 # if this client's event is not set, then set it
 36 |                 # also update the last set timestamp to now
 37 |                 event[0].set()
 38 |                 event[1] = now
 39 |             else:
 40 |                 # if the client's event is already set, it means the client
 41 |                 # did not process a previous frame
 42 |                 # if the event stays set for more than 5 seconds, then assume
 43 |                 # the client is gone and remove it
 44 |                 if now - event[1] > 5:
 45 |                     remove = ident
 46 |         if remove:
 47 |             del self.events[remove]
 48 | 
 49 |     def clear(self):
 50 |         """Invoked from each client's thread after a frame was processed."""
 51 |         self.events[get_ident()][0].clear()
 52 | 
 53 | 
 54 | class BaseCamera(object):
 55 |     thread = None  # background thread that reads frames from camera
 56 |     frame = None  # current frame is stored here by background thread
 57 |     last_access = 0  # time of last client access to the camera
 58 |     event = CameraEvent()
 59 | 
 60 |     def __init__(self):
 61 |         """Start the background camera thread if it isn't running yet."""
 62 |         if BaseCamera.thread is None:
 63 |             BaseCamera.last_access = time.time()
 64 | 
 65 |             # start background frame thread
 66 |             BaseCamera.thread = threading.Thread(target=self._thread)
 67 |             BaseCamera.thread.start()
 68 | 
 69 |             # wait until frames are available
 70 |             while self.get_frame() is None:
 71 |                 time.sleep(0)
 72 | 
 73 |     def get_frame(self):
 74 |         """Return the current camera frame."""
 75 |         BaseCamera.last_access = time.time()
 76 | 
 77 |         # wait for a signal from the camera thread
 78 |         BaseCamera.event.wait()
 79 |         BaseCamera.event.clear()
 80 | 
 81 |         return BaseCamera.frame
 82 | 
 83 |     @staticmethod
 84 |     def frames():
 85 |         """"Generator that returns frames from the camera."""
 86 |         raise RuntimeError('Must be implemented by subclasses.')
 87 | 
 88 |     @classmethod
 89 |     def _thread(cls):
 90 |         """Camera background thread."""
 91 |         print('Starting camera thread.')
 92 |         frames_iterator = cls.frames()
 93 |         for frame in frames_iterator:
 94 |             BaseCamera.frame = frame
 95 |             BaseCamera.event.set()  # send signal to clients
 96 |             time.sleep(0)
 97 | 
 98 |             # if there hasn't been any clients asking for frames in
 99 |             # the last 10 seconds then stop the thread
100 |             if time.time() - BaseCamera.last_access > 10:
101 |                 frames_iterator.close()
102 |                 print('Stopping camera thread due to inactivity.')
103 |                 break
104 |         BaseCamera.thread = None
105 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/camera_opencv.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from base_camera import BaseCamera
 3 | 
 4 | 
 5 | class Camera(BaseCamera):
 6 |     video_source = 0
 7 | 
 8 |     @staticmethod
 9 |     def set_video_source(source):
10 |         Camera.video_source = source
11 | 
12 |     @staticmethod
13 |     def frames():
14 |         camera = cv2.VideoCapture(Camera.video_source)
15 |         if not camera.isOpened():
16 |             raise RuntimeError('Could not start camera.')
17 | 
18 |         while True:
19 |             # read current frame
20 |             _, img = camera.read()
21 |             #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 | 
23 |             # return img
24 |             yield img
25 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/camera_pi.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import time
 3 | import picamera
 4 | import picamera.array
 5 | import cv2
 6 | from base_camera import BaseCamera
 7 | 
 8 | 
 9 | class Camera(BaseCamera):
10 |     video_source = 0
11 | 
12 |     @staticmethod
13 |     def set_video_source(source):
14 |         pass
15 | 
16 |     @staticmethod
17 |     def frames():
18 |         with picamera.PiCamera(resolution = (1280,720)) as camera:
19 |             # let camera warm up
20 |             time.sleep(2)
21 | 
22 |             with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 |                 while True:
24 |                 
25 |                     camera.capture(stream, format='bgr', use_video_port=True)
26 |                     # At this point the image is available as stream.array
27 |                     image = stream.array
28 |                     stream.truncate(0)
29 |                     yield image
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/multi_stage_file.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import argparse
 3 | import os
 4 | import cv2
 5 | import numpy as np
 6 | from tqdm import tqdm
 7 | 
 8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess
 9 | from tflite_runtime.interpreter import Interpreter
10 | 
11 | def process_age_gender(roi_img):
12 | 
13 |     ages = ['0-10', '11-20', '21-45', '46-60', '60-100']
14 |     genders = ['M', 'F']
15 | 
16 |     results = second_stage_network.run(roi_img)
17 |     age = np.argmax(results[0])
18 |     gender = 0 if results[1] < 0.5 else 1
19 | 
20 |     label = f'{ages[age]} : {genders[gender]}'
21 | 
22 |     return label
23 | 
24 | class NetworkExecutor(object):
25 | 
26 |     def __init__(self, model_file):
27 | 
28 |         self.interpreter = Interpreter(model_file, num_threads=3)
29 |         self.interpreter.allocate_tensors()
30 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
31 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
32 | 
33 |     def get_output_tensors(self):
34 | 
35 |       output_details = self.interpreter.get_output_details()
36 |       tensor_indices = []
37 |       tensor_list = []
38 | 
39 |       for output in output_details:
40 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
41 |             tensor_list.append(tensor)
42 | 
43 |       return tensor_list
44 | 
45 |     def run(self, image):
46 |         if image.shape[1:2] != (self.input_height, self.input_width):
47 |             img = cv2.resize(image, (self.input_width, self.input_height))
48 |         img = preprocess(img)
49 |         self.interpreter.set_tensor(self.tensor_index, img)
50 |         self.interpreter.invoke()
51 |         return self.get_output_tensors()
52 | 
53 | def main(args):
54 |     video, video_writer, frame_count = init_video_file_capture(args.file, 'age_gender_demo')
55 | 
56 |     frame_num = len(frame_count)
57 |     times = []
58 | 
59 |     for _ in tqdm(frame_count, desc='Processing frames'):
60 |         frame_present, frame = video.read()
61 |         if not frame_present:
62 |             continue
63 | 
64 |         start_time = time.time()
65 |         
66 |         results = first_stage_network.run(frame)
67 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold)
68 |         draw_bounding_boxes(frame, detections, None, process_age_gender)
69 | 
70 |         elapsed_ms = (time.time() - start_time) * 1000
71 | 
72 |         times.append(elapsed_ms)
73 |         video_writer.write(frame)
74 | 
75 |     print('Finished processing frames')
76 |     video.release(), video_writer.release()
77 | 
78 |     print("Average time(ms): ", sum(times)//frame_num) 
79 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
80 | 
81 | if __name__ == "__main__" :
82 | 
83 |     print("OpenCV version: {}".format(cv2. __version__))
84 | 
85 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
86 |     parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
87 |     parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)    
88 |     parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
89 |     parser.add_argument('--file', help='File path of video file', required=True)
90 |     args = parser.parse_args()
91 | 
92 |     first_stage_network = NetworkExecutor(args.first_stage)
93 |     second_stage_network = NetworkExecutor(args.second_stage)
94 | 
95 |     main(args)
96 |     
97 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/multi_stage_stream.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import argparse
  3 | import os
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes
  8 | from tflite_runtime.interpreter import Interpreter
  9 | from flask import Flask, render_template, request, Response
 10 | 
 11 | app = Flask (__name__, static_url_path = '')
 12 | 
 13 | def process_age_gender(roi_img):
 14 | 
 15 |     ages = ['0-10', '11-20', '21-45', '46-60', '60-100']
 16 |     genders = ['M', 'F']
 17 | 
 18 |     results = second_stage_network.run(roi_img)
 19 |     age = np.argmax(results[0])
 20 |     gender = 0 if results[1] < 0.5 else 1
 21 | 
 22 |     label = f'{ages[age]} : {genders[gender]}'
 23 | 
 24 |     return label
 25 | 
 26 | class NetworkExecutor(object):
 27 | 
 28 |     def __init__(self, model_file):
 29 | 
 30 |         self.interpreter = Interpreter(model_file, num_threads=3)
 31 |         self.interpreter.allocate_tensors()
 32 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
 33 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
 34 | 
 35 |     def get_output_tensors(self):
 36 | 
 37 |       output_details = self.interpreter.get_output_details()
 38 |       tensor_indices = []
 39 |       tensor_list = []
 40 | 
 41 |       for output in output_details:
 42 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
 43 |             tensor_list.append(tensor)
 44 | 
 45 |       return tensor_list
 46 | 
 47 |     def run(self, image):
 48 |         if image.shape[1:2] != (self.input_height, self.input_width):
 49 |             img = cv2.resize(image, (self.input_width, self.input_height))
 50 |         img = preprocess(img)
 51 |         self.interpreter.set_tensor(self.tensor_index, img)
 52 |         self.interpreter.invoke()
 53 |         return self.get_output_tensors()
 54 | 
 55 | class Detector(NetworkExecutor):
 56 | 
 57 |     def __init__(self, label_file, model_file, threshold):
 58 |         super().__init__(model_file)
 59 |         self.threshold = float(threshold)
 60 | 
 61 |     def detect(self, frame):
 62 |         start_time = time.time()
 63 |         results = self.run(frame)
 64 |         elapsed_ms = (time.time() - start_time) * 1000
 65 | 
 66 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = self.threshold)
 67 |         draw_bounding_boxes(frame, detections, None, process_age_gender)
 68 | 
 69 |         fps  = 1 / elapsed_ms*1000
 70 |         print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms))
 71 | 
 72 |         return cv2.imencode('.jpg', frame)[1].tobytes()
 73 | 
 74 | @app.route("/")
 75 | def index():
 76 |    return render_template('index.html', name = None)
 77 | 
 78 | def gen(camera):
 79 |     while True:
 80 |         frame = camera.get_frame()
 81 |         image = detector.detect(frame)
 82 |         yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n')
 83 | 
 84 | @app.route('/video_feed')
 85 | def video_feed():
 86 |     return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')
 87 | 
 88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
 90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 
 91 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.5)
 92 | parser.add_argument('--source', help='picamera or cv', default='cv')
 93 | args = parser.parse_args()
 94 | 
 95 | if args.source == "cv":
 96 |     from camera_opencv import Camera
 97 |     source = 0
 98 | elif args.source == "picamera":
 99 |     from camera_pi import Camera
100 |     source = 0
101 |     
102 | Camera.set_video_source(source)
103 | 
104 | detector = Detector(None, args.first_stage, args.threshold)
105 | second_stage_network = NetworkExecutor(args.second_stage)
106 | 
107 | if __name__ == "__main__" :
108 |    app.run(host = '0.0.0.0', port = 5000, debug = True)
109 |     
110 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/templates/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>Video Streaming Demonstration</title>
 4 |   </head>
 5 |   <body>
 6 |     <h1>Tflite Multi-stage Inference Demo</h1>
 7 |     <img src="{{ url_for('video_feed') }}">
 8 |   </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/base_camera.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import threading
  3 | try:
  4 |     from greenlet import getcurrent as get_ident
  5 | except ImportError:
  6 |     try:
  7 |         from thread import get_ident
  8 |     except ImportError:
  9 |         from _thread import get_ident
 10 | 
 11 | 
 12 | class CameraEvent(object):
 13 |     """An Event-like class that signals all active clients when a new frame is
 14 |     available.
 15 |     """
 16 |     def __init__(self):
 17 |         self.events = {}
 18 | 
 19 |     def wait(self):
 20 |         """Invoked from each client's thread to wait for the next frame."""
 21 |         ident = get_ident()
 22 |         if ident not in self.events:
 23 |             # this is a new client
 24 |             # add an entry for it in the self.events dict
 25 |             # each entry has two elements, a threading.Event() and a timestamp
 26 |             self.events[ident] = [threading.Event(), time.time()]
 27 |         return self.events[ident][0].wait()
 28 | 
 29 |     def set(self):
 30 |         """Invoked by the camera thread when a new frame is available."""
 31 |         now = time.time()
 32 |         remove = None
 33 |         for ident, event in self.events.items():
 34 |             if not event[0].isSet():
 35 |                 # if this client's event is not set, then set it
 36 |                 # also update the last set timestamp to now
 37 |                 event[0].set()
 38 |                 event[1] = now
 39 |             else:
 40 |                 # if the client's event is already set, it means the client
 41 |                 # did not process a previous frame
 42 |                 # if the event stays set for more than 5 seconds, then assume
 43 |                 # the client is gone and remove it
 44 |                 if now - event[1] > 5:
 45 |                     remove = ident
 46 |         if remove:
 47 |             del self.events[remove]
 48 | 
 49 |     def clear(self):
 50 |         """Invoked from each client's thread after a frame was processed."""
 51 |         self.events[get_ident()][0].clear()
 52 | 
 53 | 
 54 | class BaseCamera(object):
 55 |     thread = None  # background thread that reads frames from camera
 56 |     frame = None  # current frame is stored here by background thread
 57 |     last_access = 0  # time of last client access to the camera
 58 |     event = CameraEvent()
 59 | 
 60 |     def __init__(self):
 61 |         """Start the background camera thread if it isn't running yet."""
 62 |         if BaseCamera.thread is None:
 63 |             BaseCamera.last_access = time.time()
 64 | 
 65 |             # start background frame thread
 66 |             BaseCamera.thread = threading.Thread(target=self._thread)
 67 |             BaseCamera.thread.start()
 68 | 
 69 |             # wait until frames are available
 70 |             while self.get_frame() is None:
 71 |                 time.sleep(0)
 72 | 
 73 |     def get_frame(self):
 74 |         """Return the current camera frame."""
 75 |         BaseCamera.last_access = time.time()
 76 | 
 77 |         # wait for a signal from the camera thread
 78 |         BaseCamera.event.wait()
 79 |         BaseCamera.event.clear()
 80 | 
 81 |         return BaseCamera.frame
 82 | 
 83 |     @staticmethod
 84 |     def frames():
 85 |         """"Generator that returns frames from the camera."""
 86 |         raise RuntimeError('Must be implemented by subclasses.')
 87 | 
 88 |     @classmethod
 89 |     def _thread(cls):
 90 |         """Camera background thread."""
 91 |         print('Starting camera thread.')
 92 |         frames_iterator = cls.frames()
 93 |         for frame in frames_iterator:
 94 |             BaseCamera.frame = frame
 95 |             BaseCamera.event.set()  # send signal to clients
 96 |             time.sleep(0)
 97 | 
 98 |             # if there hasn't been any clients asking for frames in
 99 |             # the last 10 seconds then stop the thread
100 |             if time.time() - BaseCamera.last_access > 10:
101 |                 frames_iterator.close()
102 |                 print('Stopping camera thread due to inactivity.')
103 |                 break
104 |         BaseCamera.thread = None
105 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/camera_opencv.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from base_camera import BaseCamera
 3 | 
 4 | 
 5 | class Camera(BaseCamera):
 6 |     video_source = 0
 7 | 
 8 |     @staticmethod
 9 |     def set_video_source(source):
10 |         Camera.video_source = source
11 | 
12 |     @staticmethod
13 |     def frames():
14 |         camera = cv2.VideoCapture(Camera.video_source)
15 |         if not camera.isOpened():
16 |             raise RuntimeError('Could not start camera.')
17 | 
18 |         while True:
19 |             # read current frame
20 |             _, img = camera.read()
21 |             #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 | 
23 |             # return img
24 |             yield img
25 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/camera_pi.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import time
 3 | import picamera
 4 | import picamera.array
 5 | import cv2
 6 | from base_camera import BaseCamera
 7 | 
 8 | 
 9 | class Camera(BaseCamera):
10 |     video_source = 0
11 | 
12 |     @staticmethod
13 |     def set_video_source(source):
14 |         pass
15 | 
16 |     @staticmethod
17 |     def frames():
18 |         with picamera.PiCamera(resolution = (1280,720)) as camera:
19 |             # let camera warm up
20 |             time.sleep(2)
21 | 
22 |             with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 |                 while True:
24 |                 
25 |                     camera.capture(stream, format='bgr', use_video_port=True)
26 |                     # At this point the image is available as stream.array
27 |                     image = stream.array
28 |                     stream.truncate(0)
29 |                     yield image
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/multi_stage_file.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import argparse
 3 | import os
 4 | import cv2
 5 | import numpy as np
 6 | from tqdm import tqdm
 7 | 
 8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess
 9 | from tflite_runtime.interpreter import Interpreter
10 | 
11 | def process_face_expression(roi_img):
12 | 
13 |     emotion_list = ['neutral', 'happiness',	'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown']
14 | 
15 |     results = np.squeeze(second_stage_network.run(roi_img))
16 |     emotion_idx = np.argmax(results)
17 |     emotion_confience = np.max(results)
18 | 
19 |     label = f'{emotion_list[emotion_idx]} {emotion_confience:.4f}%'
20 | 
21 |     return label
22 | 
23 | class NetworkExecutor(object):
24 | 
25 |     def __init__(self, model_file):
26 | 
27 |         self.interpreter = Interpreter(model_file, num_threads=3)
28 |         self.interpreter.allocate_tensors()
29 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
30 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
31 | 
32 |     def get_output_tensors(self):
33 | 
34 |       output_details = self.interpreter.get_output_details()
35 |       tensor_indices = []
36 |       tensor_list = []
37 | 
38 |       for output in output_details:
39 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
40 |             tensor_list.append(tensor)
41 | 
42 |       return tensor_list
43 | 
44 |     def run(self, image):
45 |         if image.shape[1:2] != (self.input_height, self.input_width):
46 |             img = cv2.resize(image, (self.input_width, self.input_height))
47 |         img = preprocess(img)
48 |         self.interpreter.set_tensor(self.tensor_index, img)
49 |         self.interpreter.invoke()
50 |         return self.get_output_tensors()
51 | 
52 | def main(args):
53 |     video, video_writer, frame_count = init_video_file_capture(args.file, 'emotion_demo')
54 | 
55 |     frame_num = len(frame_count)
56 |     times = []
57 | 
58 |     for _ in tqdm(frame_count, desc='Processing frames'):
59 |         frame_present, frame = video.read()
60 |         if not frame_present:
61 |             continue
62 | 
63 |         start_time = time.time()
64 |         
65 |         results = first_stage_network.run(frame)
66 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold, anchors = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
67 |                                     [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]])
68 |         draw_bounding_boxes(frame, detections, None, process_face_expression)
69 | 
70 |         elapsed_ms = (time.time() - start_time) * 1000
71 | 
72 |         times.append(elapsed_ms)
73 |         video_writer.write(frame)
74 | 
75 |     print('Finished processing frames')
76 |     video.release(), video_writer.release()
77 | 
78 |     print("Average time(ms): ", sum(times)//frame_num) 
79 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
80 | 
81 | if __name__ == "__main__" :
82 | 
83 |     print("OpenCV version: {}".format(cv2. __version__))
84 | 
85 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
86 |     parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
87 |     parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)    
88 |     parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
89 |     parser.add_argument('--file', help='File path of video file', required=True)
90 |     args = parser.parse_args()
91 | 
92 |     first_stage_network = NetworkExecutor(args.first_stage)
93 |     second_stage_network = NetworkExecutor(args.second_stage)
94 | 
95 |     main(args)
96 |     
97 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/multi_stage_stream.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import argparse
  3 | import os
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes
  8 | from tflite_runtime.interpreter import Interpreter
  9 | from flask import Flask, render_template, request, Response
 10 | 
 11 | app = Flask (__name__, static_url_path = '')
 12 | 
 13 | def process_face_expression(roi_img):
 14 | 
 15 |     emotion_list = ['neutral', 'happiness',	'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown']
 16 | 
 17 |     results = np.squeeze(second_stage_network.run(roi_img))
 18 |     emotion_idx = np.argmax(results)
 19 |     emotion_confience = np.max(results)
 20 | 
 21 |     label = f'{emotion_list[emotion_idx]} {emotion_confience:.4f}%'
 22 | 
 23 |     return label
 24 | 
 25 | class NetworkExecutor(object):
 26 | 
 27 |     def __init__(self, model_file):
 28 | 
 29 |         self.interpreter = Interpreter(model_file, num_threads=3)
 30 |         self.interpreter.allocate_tensors()
 31 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
 32 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
 33 | 
 34 |     def get_output_tensors(self):
 35 | 
 36 |       output_details = self.interpreter.get_output_details()
 37 |       tensor_indices = []
 38 |       tensor_list = []
 39 | 
 40 |       for output in output_details:
 41 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
 42 |             tensor_list.append(tensor)
 43 | 
 44 |       return tensor_list
 45 | 
 46 |     def run(self, image):
 47 |         if image.shape[1:2] != (self.input_height, self.input_width):
 48 |             img = cv2.resize(image, (self.input_width, self.input_height))
 49 |         img = preprocess(img)
 50 |         self.interpreter.set_tensor(self.tensor_index, img)
 51 |         self.interpreter.invoke()
 52 |         return self.get_output_tensors()
 53 | 
 54 | class Detector(NetworkExecutor):
 55 | 
 56 |     def __init__(self, label_file, model_file, threshold):
 57 |         super().__init__(model_file)
 58 |         self.threshold = float(threshold)
 59 | 
 60 |     def detect(self, frame):
 61 |         start_time = time.time()
 62 |         results = self.run(frame)
 63 |         elapsed_ms = (time.time() - start_time) * 1000
 64 | 
 65 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold, anchors = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
 66 |                                     [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]])
 67 |         draw_bounding_boxes(frame, detections, None, process_face_expression)
 68 | 
 69 |         fps  = 1 / elapsed_ms*1000
 70 |         print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms))
 71 | 
 72 |         return cv2.imencode('.jpg', frame)[1].tobytes()
 73 | 
 74 | @app.route("/")
 75 | def index():
 76 |    return render_template('index.html', name = None)
 77 | 
 78 | def gen(camera):
 79 |     while True:
 80 |         frame = camera.get_frame()
 81 |         image = detector.detect(frame)
 82 |         yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n')
 83 | 
 84 | @app.route('/video_feed')
 85 | def video_feed():
 86 |     return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')
 87 | 
 88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
 90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 
 91 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.8)
 92 | parser.add_argument('--source', help='picamera or cv', default='cv')
 93 | args = parser.parse_args()
 94 | 
 95 | if args.source == "cv":
 96 |     from camera_opencv import Camera
 97 |     source = 0
 98 | elif args.source == "picamera":
 99 |     from camera_pi import Camera
100 |     source = 0
101 |     
102 | Camera.set_video_source(source)
103 | 
104 | detector = Detector(None, args.first_stage, args.threshold)
105 | second_stage_network = NetworkExecutor(args.second_stage)
106 | 
107 | if __name__ == "__main__" :
108 |    app.run(host = '0.0.0.0', port = 5000, debug = True)
109 |     
110 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/templates/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>Video Streaming Demonstration</title>
 4 |   </head>
 5 |   <body>
 6 |     <h1>Tflite Multi-stage Inference Demo</h1>
 7 |     <img src="{{ url_for('video_feed') }}">
 8 |   </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/base_camera.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import threading
  3 | try:
  4 |     from greenlet import getcurrent as get_ident
  5 | except ImportError:
  6 |     try:
  7 |         from thread import get_ident
  8 |     except ImportError:
  9 |         from _thread import get_ident
 10 | 
 11 | 
 12 | class CameraEvent(object):
 13 |     """An Event-like class that signals all active clients when a new frame is
 14 |     available.
 15 |     """
 16 |     def __init__(self):
 17 |         self.events = {}
 18 | 
 19 |     def wait(self):
 20 |         """Invoked from each client's thread to wait for the next frame."""
 21 |         ident = get_ident()
 22 |         if ident not in self.events:
 23 |             # this is a new client
 24 |             # add an entry for it in the self.events dict
 25 |             # each entry has two elements, a threading.Event() and a timestamp
 26 |             self.events[ident] = [threading.Event(), time.time()]
 27 |         return self.events[ident][0].wait()
 28 | 
 29 |     def set(self):
 30 |         """Invoked by the camera thread when a new frame is available."""
 31 |         now = time.time()
 32 |         remove = None
 33 |         for ident, event in self.events.items():
 34 |             if not event[0].isSet():
 35 |                 # if this client's event is not set, then set it
 36 |                 # also update the last set timestamp to now
 37 |                 event[0].set()
 38 |                 event[1] = now
 39 |             else:
 40 |                 # if the client's event is already set, it means the client
 41 |                 # did not process a previous frame
 42 |                 # if the event stays set for more than 5 seconds, then assume
 43 |                 # the client is gone and remove it
 44 |                 if now - event[1] > 5:
 45 |                     remove = ident
 46 |         if remove:
 47 |             del self.events[remove]
 48 | 
 49 |     def clear(self):
 50 |         """Invoked from each client's thread after a frame was processed."""
 51 |         self.events[get_ident()][0].clear()
 52 | 
 53 | 
 54 | class BaseCamera(object):
 55 |     thread = None  # background thread that reads frames from camera
 56 |     frame = None  # current frame is stored here by background thread
 57 |     last_access = 0  # time of last client access to the camera
 58 |     event = CameraEvent()
 59 | 
 60 |     def __init__(self):
 61 |         """Start the background camera thread if it isn't running yet."""
 62 |         if BaseCamera.thread is None:
 63 |             BaseCamera.last_access = time.time()
 64 | 
 65 |             # start background frame thread
 66 |             BaseCamera.thread = threading.Thread(target=self._thread)
 67 |             BaseCamera.thread.start()
 68 | 
 69 |             # wait until frames are available
 70 |             while self.get_frame() is None:
 71 |                 time.sleep(0)
 72 | 
 73 |     def get_frame(self):
 74 |         """Return the current camera frame."""
 75 |         BaseCamera.last_access = time.time()
 76 | 
 77 |         # wait for a signal from the camera thread
 78 |         BaseCamera.event.wait()
 79 |         BaseCamera.event.clear()
 80 | 
 81 |         return BaseCamera.frame
 82 | 
 83 |     @staticmethod
 84 |     def frames():
 85 |         """"Generator that returns frames from the camera."""
 86 |         raise RuntimeError('Must be implemented by subclasses.')
 87 | 
 88 |     @classmethod
 89 |     def _thread(cls):
 90 |         """Camera background thread."""
 91 |         print('Starting camera thread.')
 92 |         frames_iterator = cls.frames()
 93 |         for frame in frames_iterator:
 94 |             BaseCamera.frame = frame
 95 |             BaseCamera.event.set()  # send signal to clients
 96 |             time.sleep(0)
 97 | 
 98 |             # if there hasn't been any clients asking for frames in
 99 |             # the last 10 seconds then stop the thread
100 |             if time.time() - BaseCamera.last_access > 10:
101 |                 frames_iterator.close()
102 |                 print('Stopping camera thread due to inactivity.')
103 |                 break
104 |         BaseCamera.thread = None
105 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/camera_opencv.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from base_camera import BaseCamera
 3 | 
 4 | 
 5 | class Camera(BaseCamera):
 6 |     video_source = 0
 7 | 
 8 |     @staticmethod
 9 |     def set_video_source(source):
10 |         Camera.video_source = source
11 | 
12 |     @staticmethod
13 |     def frames():
14 |         camera = cv2.VideoCapture(Camera.video_source)
15 |         if not camera.isOpened():
16 |             raise RuntimeError('Could not start camera.')
17 | 
18 |         while True:
19 |             # read current frame
20 |             _, img = camera.read()
21 |             #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 | 
23 |             # return img
24 |             yield img
25 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/camera_pi.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import time
 3 | import picamera
 4 | import picamera.array
 5 | import cv2
 6 | from base_camera import BaseCamera
 7 | 
 8 | 
 9 | class Camera(BaseCamera):
10 |     video_source = 0
11 | 
12 |     @staticmethod
13 |     def set_video_source(source):
14 |         pass
15 | 
16 |     @staticmethod
17 |     def frames():
18 |         with picamera.PiCamera(resolution = (1280,720)) as camera:
19 |             # let camera warm up
20 |             time.sleep(2)
21 | 
22 |             with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 |                 while True:
24 |                 
25 |                     camera.capture(stream, format='bgr', use_video_port=True)
26 |                     # At this point the image is available as stream.array
27 |                     image = stream.array
28 |                     stream.truncate(0)
29 |                     yield image
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/labels.txt:
--------------------------------------------------------------------------------
  1 | AM-General-Hummer-SUV-2000
  2 | Acura-Integra-Type-R-2001
  3 | Acura-RL-Sedan-2012
  4 | Acura-TL-Sedan-2012
  5 | Acura-TL-Type-S-2008
  6 | Acura-TSX-Sedan-2012
  7 | Acura-ZDX-Hatchback-2012
  8 | Aston-Martin-V8-Vantage-Convertible-2012
  9 | Aston-Martin-V8-Vantage-Coupe-2012
 10 | Aston-Martin-Virage-Convertible-2012
 11 | Aston-Martin-Virage-Coupe-2012
 12 | Audi-100-Sedan-1994
 13 | Audi-100-Wagon-1994
 14 | Audi-A5-Coupe-2012
 15 | Audi-R8-Coupe-2012
 16 | Audi-RS-4-Convertible-2008
 17 | Audi-S4-Sedan-2007
 18 | Audi-S4-Sedan-2012
 19 | Audi-S5-Convertible-2012
 20 | Audi-S5-Coupe-2012
 21 | Audi-S6-Sedan-2011
 22 | Audi-TT-Hatchback-2011
 23 | Audi-TT-RS-Coupe-2012
 24 | Audi-TTS-Coupe-2012
 25 | Audi-V8-Sedan-1994
 26 | BMW-1-Series-Convertible-2012
 27 | BMW-1-Series-Coupe-2012
 28 | BMW-3-Series-Sedan-2012
 29 | BMW-3-Series-Wagon-2012
 30 | BMW-6-Series-Convertible-2007
 31 | BMW-ActiveHybrid-5-Sedan-2012
 32 | BMW-M3-Coupe-2012
 33 | BMW-M5-Sedan-2010
 34 | BMW-M6-Convertible-2010
 35 | BMW-X3-SUV-2012
 36 | BMW-X5-SUV-2007
 37 | BMW-X6-SUV-2012
 38 | BMW-Z4-Convertible-2012
 39 | Bentley-Arnage-Sedan-2009
 40 | Bentley-Continental-Flying-Spur-Sedan-2007
 41 | Bentley-Continental-GT-Coupe-2007
 42 | Bentley-Continental-GT-Coupe-2012
 43 | Bentley-Continental-Supersports-Conv.-Convertible-2012
 44 | Bentley-Mulsanne-Sedan-2011
 45 | Bugatti-Veyron-16.4-Convertible-2009
 46 | Bugatti-Veyron-16.4-Coupe-2009
 47 | Buick-Enclave-SUV-2012
 48 | Buick-Rainier-SUV-2007
 49 | Buick-Regal-GS-2012
 50 | Buick-Verano-Sedan-2012
 51 | Cadillac-CTS-V-Sedan-2012
 52 | Cadillac-Escalade-EXT-Crew-Cab-2007
 53 | Cadillac-SRX-SUV-2012
 54 | Chevrolet-Avalanche-Crew-Cab-2012
 55 | Chevrolet-Camaro-Convertible-2012
 56 | Chevrolet-Cobalt-SS-2010
 57 | Chevrolet-Corvette-Convertible-2012
 58 | Chevrolet-Corvette-Ron-Fellows-Edition-Z06-2007
 59 | Chevrolet-Corvette-ZR1-2012
 60 | Chevrolet-Express-Cargo-Van-2007
 61 | Chevrolet-Express-Van-2007
 62 | Chevrolet-HHR-SS-2010
 63 | Chevrolet-Impala-Sedan-2007
 64 | Chevrolet-Malibu-Hybrid-Sedan-2010
 65 | Chevrolet-Malibu-Sedan-2007
 66 | Chevrolet-Monte-Carlo-Coupe-2007
 67 | Chevrolet-Silverado-1500-Classic-Extended-Cab-2007
 68 | Chevrolet-Silverado-1500-Extended-Cab-2012
 69 | Chevrolet-Silverado-1500-Hybrid-Crew-Cab-2012
 70 | Chevrolet-Silverado-1500-Regular-Cab-2012
 71 | Chevrolet-Silverado-2500HD-Regular-Cab-2012
 72 | Chevrolet-Sonic-Sedan-2012
 73 | Chevrolet-Tahoe-Hybrid-SUV-2012
 74 | Chevrolet-TrailBlazer-SS-2009
 75 | Chevrolet-Traverse-SUV-2012
 76 | Chrysler-300-SRT-8-2010
 77 | Chrysler-Aspen-SUV-2009
 78 | Chrysler-Crossfire-Convertible-2008
 79 | Chrysler-PT-Cruiser-Convertible-2008
 80 | Chrysler-Sebring-Convertible-2010
 81 | Chrysler-Town-and-Country-Minivan-2012
 82 | Daewoo-Nubira-Wagon-2002
 83 | Dodge-Caliber-Wagon-2007
 84 | Dodge-Caliber-Wagon-2012
 85 | Dodge-Caravan-Minivan-1997
 86 | Dodge-Challenger-SRT8-2011
 87 | Dodge-Charger-SRT-8-2009
 88 | Dodge-Charger-Sedan-2012
 89 | Dodge-Dakota-Club-Cab-2007
 90 | Dodge-Dakota-Crew-Cab-2010
 91 | Dodge-Durango-SUV-2007
 92 | Dodge-Durango-SUV-2012
 93 | Dodge-Journey-SUV-2012
 94 | Dodge-Magnum-Wagon-2008
 95 | Dodge-Ram-Pickup-3500-Crew-Cab-2010
 96 | Dodge-Ram-Pickup-3500-Quad-Cab-2009
 97 | Dodge-Sprinter-Cargo-Van-2009
 98 | Eagle-Talon-Hatchback-1998
 99 | FIAT-500-Abarth-2012
100 | FIAT-500-Convertible-2012
101 | Ferrari-458-Italia-Convertible-2012
102 | Ferrari-458-Italia-Coupe-2012
103 | Ferrari-California-Convertible-2012
104 | Ferrari-FF-Coupe-2012
105 | Fisker-Karma-Sedan-2012
106 | Ford-E-Series-Wagon-Van-2012
107 | Ford-Edge-SUV-2012
108 | Ford-Expedition-EL-SUV-2009
109 | Ford-F-150-Regular-Cab-2007
110 | Ford-F-150-Regular-Cab-2012
111 | Ford-F-450-Super-Duty-Crew-Cab-2012
112 | Ford-Fiesta-Sedan-2012
113 | Ford-Focus-Sedan-2007
114 | Ford-Freestar-Minivan-2007
115 | Ford-GT-Coupe-2006
116 | Ford-Mustang-Convertible-2007
117 | Ford-Ranger-SuperCab-2011
118 | GMC-Acadia-SUV-2012
119 | GMC-Canyon-Extended-Cab-2012
120 | GMC-Savana-Van-2012
121 | GMC-Terrain-SUV-2012
122 | GMC-Yukon-Hybrid-SUV-2012
123 | Geo-Metro-Convertible-1993
124 | HUMMER-H2-SUT-Crew-Cab-2009
125 | HUMMER-H3T-Crew-Cab-2010
126 | Honda-Accord-Coupe-2012
127 | Honda-Accord-Sedan-2012
128 | Honda-Odyssey-Minivan-2007
129 | Honda-Odyssey-Minivan-2012
130 | Hyundai-Accent-Sedan-2012
131 | Hyundai-Azera-Sedan-2012
132 | Hyundai-Elantra-Sedan-2007
133 | Hyundai-Elantra-Touring-Hatchback-2012
134 | Hyundai-Genesis-Sedan-2012
135 | Hyundai-Santa-Fe-SUV-2012
136 | Hyundai-Sonata-Hybrid-Sedan-2012
137 | Hyundai-Sonata-Sedan-2012
138 | Hyundai-Tucson-SUV-2012
139 | Hyundai-Veloster-Hatchback-2012
140 | Hyundai-Veracruz-SUV-2012
141 | Infiniti-G-Coupe-IPL-2012
142 | Infiniti-QX56-SUV-2011
143 | Isuzu-Ascender-SUV-2008
144 | Jaguar-XK-XKR-2012
145 | Jeep-Compass-SUV-2012
146 | Jeep-Grand-Cherokee-SUV-2012
147 | Jeep-Liberty-SUV-2012
148 | Jeep-Patriot-SUV-2012
149 | Jeep-Wrangler-SUV-2012
150 | Lamborghini-Aventador-Coupe-2012
151 | Lamborghini-Diablo-Coupe-2001
152 | Lamborghini-Gallardo-LP-570-4-Superleggera-2012
153 | Lamborghini-Reventon-Coupe-2008
154 | Land-Rover-LR2-SUV-2012
155 | Land-Rover-Range-Rover-SUV-2012
156 | Lincoln-Town-Car-Sedan-2011
157 | MINI-Cooper-Roadster-Convertible-2012
158 | Maybach-Landaulet-Convertible-2012
159 | Mazda-Tribute-SUV-2011
160 | McLaren-MP4-12C-Coupe-2012
161 | Mercedes-Benz-300-Class-Convertible-1993
162 | Mercedes-Benz-C-Class-Sedan-2012
163 | Mercedes-Benz-E-Class-Sedan-2012
164 | Mercedes-Benz-S-Class-Sedan-2012
165 | Mercedes-Benz-SL-Class-Coupe-2009
166 | Mercedes-Benz-Sprinter-Van-2012
167 | Mitsubishi-Lancer-Sedan-2012
168 | Nissan-240SX-Coupe-1998
169 | Nissan-Juke-Hatchback-2012
170 | Nissan-Leaf-Hatchback-2012
171 | Nissan-NV-Passenger-Van-2012
172 | Plymouth-Neon-Coupe-1999
173 | Porsche-Panamera-Sedan-2012
174 | Ram-C-V-Cargo-Van-Minivan-2012
175 | Rolls-Royce-Ghost-Sedan-2012
176 | Rolls-Royce-Phantom-Drophead-Coupe-Convertible-2012
177 | Rolls-Royce-Phantom-Sedan-2012
178 | Scion-xD-Hatchback-2012
179 | Spyker-C8-Convertible-2009
180 | Spyker-C8-Coupe-2009
181 | Suzuki-Aerio-Sedan-2007
182 | Suzuki-Kizashi-Sedan-2012
183 | Suzuki-SX4-Hatchback-2012
184 | Suzuki-SX4-Sedan-2012
185 | Tesla-Model-S-Sedan-2012
186 | Toyota-4Runner-SUV-2012
187 | Toyota-Camry-Sedan-2012
188 | Toyota-Corolla-Sedan-2012
189 | Toyota-Sequoia-SUV-2012
190 | Volkswagen-Beetle-Hatchback-2012
191 | Volkswagen-Golf-Hatchback-1991
192 | Volkswagen-Golf-Hatchback-2012
193 | Volvo-240-Sedan-1993
194 | Volvo-C30-Hatchback-2012
195 | Volvo-XC90-SUV-2007
196 | smart-fortwo-Convertible-2012


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/multi_stage_file.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import argparse
  3 | import os
  4 | import cv2
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess
  9 | from tflite_runtime.interpreter import Interpreter
 10 | 
 11 | def load_labels(path):
 12 |     with open(path, 'r') as f:
 13 |         return {i: line.strip() for i, line in enumerate(f.readlines())}
 14 | 
 15 | def process_vehicle_type(roi_img):
 16 | 
 17 |     results = second_stage_network.run(roi_img)
 18 |     vehicle_type = np.argmax(results[0])
 19 |     confidence = np.max(results[0])
 20 |     label = f'{labels[vehicle_type]} : {confidence}'
 21 | 
 22 |     return label
 23 | 
 24 | class NetworkExecutor(object):
 25 | 
 26 |     def __init__(self, model_file, num_threads=3):
 27 | 
 28 |         self.interpreter = Interpreter(model_file, num_threads=num_threads)
 29 |         self.interpreter.allocate_tensors()
 30 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
 31 |         print(self.input_height, self.input_width)
 32 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
 33 | 
 34 |     def get_output_tensors(self):
 35 | 
 36 |       output_details = self.interpreter.get_output_details()
 37 |       tensor_indices = []
 38 |       tensor_list = []
 39 | 
 40 |       for output in output_details:
 41 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
 42 |             tensor_list.append(tensor)
 43 | 
 44 |       return tensor_list
 45 | 
 46 |     def run(self, image):
 47 |         if image.shape[1:2] != (self.input_height, self.input_width):
 48 |             img = cv2.resize(image, (self.input_width, self.input_height))
 49 |         img = preprocess(img)
 50 |         self.interpreter.set_tensor(self.tensor_index, img)
 51 |         self.interpreter.invoke()
 52 |         return self.get_output_tensors()
 53 | 
 54 | def main(args):
 55 |     video, video_writer, frame_count = init_video_file_capture(args.file, 'vehicle_type_demo')
 56 | 
 57 |     frame_num = len(frame_count)
 58 |     times = []
 59 | 
 60 |     for _ in tqdm(frame_count, desc='Processing frames'):
 61 |         frame_present, frame = video.read()
 62 |         if not frame_present:
 63 |             continue
 64 | 
 65 |         start_time = time.time()
 66 |         
 67 |         results = first_stage_network.run(frame)
 68 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold)
 69 |         draw_bounding_boxes(frame, detections, None, process_vehicle_type)
 70 | 
 71 |         elapsed_ms = (time.time() - start_time) * 1000
 72 | 
 73 |         times.append(elapsed_ms)
 74 |         video_writer.write(frame)
 75 | 
 76 |     print('Finished processing frames')
 77 |     video.release(), video_writer.release()
 78 | 
 79 |     print("Average time(ms): ", sum(times)//frame_num) 
 80 |     print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
 81 | 
 82 | if __name__ == "__main__" :
 83 | 
 84 |     print("OpenCV version: {}".format(cv2. __version__))
 85 | 
 86 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 87 |     parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
 88 |     parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 
 89 |     parser.add_argument('--labels', nargs="+", help='File path of labels file.', required=True)   
 90 |     parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
 91 |     parser.add_argument('--file', help='File path of video file', required=True)
 92 |     args = parser.parse_args()
 93 | 
 94 |     first_stage_network = NetworkExecutor(args.first_stage, num_threads=2)
 95 |     second_stage_network = NetworkExecutor(args.second_stage, num_threads=2)
 96 | 
 97 |     if not os.path.exists(args.labels[0]):
 98 |         labels = args.labels
 99 |     else:   
100 |         labels = load_labels(args.labels[0])
101 | 
102 |     main(args)
103 |     
104 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/multi_stage_stream.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import argparse
  3 | import os
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes
  8 | from tflite_runtime.interpreter import Interpreter
  9 | from flask import Flask, render_template, request, Response
 10 | 
 11 | app = Flask (__name__, static_url_path = '')
 12 | 
 13 | def load_labels(path):
 14 |     with open(path, 'r') as f:
 15 |         return {i: line.strip() for i, line in enumerate(f.readlines())}
 16 | 
 17 | def process_vehicle_type(roi_img):
 18 | 
 19 |     results = second_stage_network.run(roi_img)
 20 |     vehicle_type = np.argmax(results[0])
 21 |     confidence = np.max(results[0])
 22 |     label = f'{labels[vehicle_type]} : {confidence}'
 23 | 
 24 |     return label
 25 |     
 26 | class NetworkExecutor(object):
 27 | 
 28 |     def __init__(self, model_file, num_threads=3):
 29 | 
 30 |         self.interpreter = Interpreter(model_file, num_threads=num_threads)
 31 |         self.interpreter.allocate_tensors()
 32 |         _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
 33 |         self.tensor_index = self.interpreter.get_input_details()[0]['index']
 34 | 
 35 |     def get_output_tensors(self):
 36 | 
 37 |       output_details = self.interpreter.get_output_details()
 38 |       tensor_indices = []
 39 |       tensor_list = []
 40 | 
 41 |       for output in output_details:
 42 |             tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
 43 |             tensor_list.append(tensor)
 44 | 
 45 |       return tensor_list
 46 | 
 47 |     def run(self, image):
 48 |         if image.shape[1:2] != (self.input_height, self.input_width):
 49 |             img = cv2.resize(image, (self.input_width, self.input_height))
 50 |         img = preprocess(img)
 51 |         self.interpreter.set_tensor(self.tensor_index, img)
 52 |         self.interpreter.invoke()
 53 |         return self.get_output_tensors()
 54 | 
 55 | class Detector(NetworkExecutor):
 56 | 
 57 |     def __init__(self, label_file, model_file, threshold):
 58 |         super().__init__(model_file)
 59 |         self.threshold = float(threshold)
 60 | 
 61 |     def detect(self, frame):
 62 |         start_time = time.time()
 63 |         results = self.run(frame)
 64 |         elapsed_ms = (time.time() - start_time) * 1000
 65 | 
 66 |         detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = self.threshold)
 67 |         draw_bounding_boxes(frame, detections, None, process_vehicle_type)
 68 | 
 69 |         fps  = 1 / elapsed_ms*1000
 70 |         print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms))
 71 | 
 72 |         return cv2.imencode('.jpg', frame)[1].tobytes()
 73 | 
 74 | @app.route("/")
 75 | def index():
 76 |    return render_template('index.html', name = None)
 77 | 
 78 | def gen(camera):
 79 |     while True:
 80 |         frame = camera.get_frame()
 81 |         image = detector.detect(frame)
 82 |         yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n')
 83 | 
 84 | @app.route('/video_feed')
 85 | def video_feed():
 86 |     return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')
 87 | 
 88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
 90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
 91 | parser.add_argument('--labels', nargs="+", help='File path of labels file.', required=True)   
 92 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.9)
 93 | parser.add_argument('--source', help='picamera or cv', default='cv')
 94 | args = parser.parse_args()
 95 | 
 96 | if args.source == "cv":
 97 |     from camera_opencv import Camera
 98 |     source = 0
 99 | elif args.source == "picamera":
100 |     from camera_pi import Camera
101 |     source = 0
102 |     
103 | Camera.set_video_source(source)
104 | 
105 | detector = Detector(None, args.first_stage, args.threshold)
106 | second_stage_network = NetworkExecutor(args.second_stage)
107 | 
108 | if not os.path.exists(args.labels[0]):
109 |     labels = args.labels
110 | else:   
111 |     labels = load_labels(args.labels[0])
112 | 
113 | if __name__ == "__main__" :
114 |    app.run(host = '0.0.0.0', port = 5000, debug = True)
115 |     
116 | 


--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/templates/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>Video Streaming Demonstration</title>
 4 |   </head>
 5 |   <body>
 6 |     <h1>Tflite Multi-stage Inference Demo</h1>
 7 |     <img src="{{ url_for('video_feed') }}">
 8 |   </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------