├── .github
└── workflows
│ ├── stale.yml
│ └── sync_issues.yml
├── .gitignore
├── .gitmodules
├── README.md
├── examples
├── armnn
│ ├── README.md
│ ├── common
│ │ ├── cv_utils.py
│ │ ├── network_executor.py
│ │ ├── tests
│ │ │ ├── conftest.py
│ │ │ ├── context.py
│ │ │ ├── test_network_executor.py
│ │ │ └── test_utils.py
│ │ └── utils.py
│ ├── face_age-gender
│ │ ├── README.md
│ │ ├── box.py
│ │ ├── requirements.txt
│ │ ├── run_video_file.py
│ │ ├── run_video_stream.py
│ │ └── yolov2.py
│ ├── face_keypoints
│ │ ├── README.md
│ │ ├── box.py
│ │ ├── requirements.txt
│ │ ├── run_video_file.py
│ │ ├── run_video_stream.py
│ │ └── yolov2.py
│ └── face_recognition
│ │ ├── README.md
│ │ ├── box.py
│ │ ├── calculate_features.py
│ │ ├── requirements.txt
│ │ ├── run_video_file.py
│ │ ├── run_video_stream.py
│ │ └── yolov2.py
├── edge_impulse
│ └── multi_stage_inference_vehicle_type
│ │ └── multi_stage.py
├── mediapipe
│ ├── README.md
│ ├── common
│ │ └── cv_utils.py
│ ├── face_detection
│ │ ├── run_video_file.py
│ │ └── run_video_stream.py
│ ├── face_mesh
│ │ ├── run_video_file.py
│ │ └── run_video_stream.py
│ ├── hand_landmarks
│ │ ├── run_video_file.py
│ │ └── run_video_stream.py
│ └── pose_estimation
│ │ ├── run_video_file.py
│ │ └── run_video_stream.py
├── sample_files
│ ├── cars.mp4
│ ├── test_dance.mp4
│ └── test_s.mp4
└── tensorflow_lite
│ ├── face_recognition
│ ├── README.md
│ ├── base_camera.py
│ ├── calculate_features.py
│ ├── camera_opencv.py
│ ├── camera_pi.py
│ ├── cv_utils.py
│ ├── multi_stage_file.py
│ ├── multi_stage_stream.py
│ ├── requirements.txt
│ └── templates
│ │ └── index.html
│ ├── multi_stage_inference_age_gender
│ ├── base_camera.py
│ ├── camera_opencv.py
│ ├── camera_pi.py
│ ├── cv_utils.py
│ ├── multi_stage_file.py
│ ├── multi_stage_stream.py
│ └── templates
│ │ └── index.html
│ ├── multi_stage_inference_emotion
│ ├── base_camera.py
│ ├── camera_opencv.py
│ ├── camera_pi.py
│ ├── cv_utils.py
│ ├── multi_stage_file.py
│ ├── multi_stage_stream.py
│ └── templates
│ │ └── index.html
│ └── multi_stage_inference_vehicle_type
│ ├── base_camera.py
│ ├── camera_opencv.py
│ ├── camera_pi.py
│ ├── cv_utils.py
│ ├── labels.txt
│ ├── multi_stage_file.py
│ ├── multi_stage_stream.py
│ └── templates
│ └── index.html
└── jupyter_notebooks
├── aXeleRate_conveyor_belt_rip_recognition.ipynb
├── aXeleRate_face_anti_spoofing.ipynb
├── aXeleRate_lung_segmentation.ipynb
└── aXeleRate_multi_stage.ipynb
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: 'Close stale issues and PRs'
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: '0 4 * * *'
7 |
8 | jobs:
9 | stale:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout repository
14 | uses: actions/checkout@v4
15 |
16 | - name: Checkout script repository
17 | uses: actions/checkout@v4
18 | with:
19 | repository: Seeed-Studio/sync-github-all-issues
20 | path: ci
21 |
22 | - name: Run script
23 | run: ./ci/tools/stale.sh
24 | env:
25 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 |
--------------------------------------------------------------------------------
/.github/workflows/sync_issues.yml:
--------------------------------------------------------------------------------
1 | name: Automate Issue Management
2 |
3 | on:
4 | issues:
5 | types:
6 | - opened
7 | - edited
8 | - assigned
9 | - unassigned
10 | - labeled
11 | - unlabeled
12 | - reopened
13 |
14 | jobs:
15 | add_issue_to_project:
16 | runs-on: ubuntu-latest
17 | steps:
18 | - name: Add issue to GitHub Project
19 | uses: actions/add-to-project@v1.0.2
20 | with:
21 | project-url: https://github.com/orgs/Seeed-Studio/projects/17
22 | github-token: ${{ secrets.ISSUE_ASSEMBLE }}
23 | labeled: bug
24 | label-operator: NOT
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | *.tflite
132 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "training_code/edge_ml_age_gender_recognition"]
2 | path = training_code/edge_ml_age_gender_recognition
3 | url = https://github.com/AIWintermuteAI/edge_ml_age_gender_recognition.git
4 | [submodule "training_code/edge_ml_emotion_recognition"]
5 | path = training_code/edge_ml_emotion_recognition
6 | url = https://github.com/AIWintermuteAI/edge_ml_emotion_recognition.git
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Seeed_Python_MachineLearning
2 | Examples and training code for Machine Learning samples that can be run on various Edge devices
3 |
--------------------------------------------------------------------------------
/examples/armnn/README.md:
--------------------------------------------------------------------------------
1 | # PyArmNN Sample Applications
2 |
3 | ## Introduction
4 | This sample application guides the user and shows how to perform inference using PyArmNN API. We assume the user has already built PyArmNN by following the instructions of the README in the main PyArmNN directory.
5 |
6 | We provide example scripts for performing inference from video file and video stream with `run_video_file.py` and `run_video_stream.py`. For detailed instructions and download links to reference models, check README file inside each application folder.
7 |
8 | ## Prerequisites
9 |
10 | ##### PyArmNN
11 |
12 | Before proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory.
13 |
14 | You can verify that PyArmNN library is installed and check PyArmNN version using:
15 | ```bash
16 | $ pip show pyarmnn
17 | ```
18 |
19 | You can also verify it by running the following and getting output similar to below:
20 | ```bash
21 | $ python -c "import pyarmnn as ann;print(ann.GetVersion())"
22 | '24.0.0'
23 | ```
24 |
25 | ##### Dependencies
26 |
27 | Install the following libraries on your system:
28 | ```bash
29 | $ sudo apt-get install python3-opencv libqtgui4 libqt4-test
30 | ```
31 |
32 | Create a virtual environment:
33 | ```bash
34 | $ python3.7 -m venv devenv --system-site-packages
35 | $ source devenv/bin/activate
36 | ```
37 |
38 | Install the dependencies:
39 | ```bash
40 | $ pip install -r requirements.txt
41 | ```
42 |
43 | ---
44 |
45 | ## Implementing Your Own Network
46 | The examples provide support for `yolo_v2` detection layer models. However, the user is able to add their own network to the object detection scripts by following the steps:
47 |
48 | 1. Create a new file for your network, for example `network.py`, to contain functions to process the output of the model
49 | 2. In that file, the user will need to write a function that decodes the output vectors obtained from running inference on their network and return the bounding box positions of detected objects plus their class index and confidence. Additionally, include a function that returns a resize factor that will scale the obtained bounding boxes to their correct positions in the original frame
50 | 3. Import the functions into the main file and, such as with the provided networks, add a conditional statement to the `get_model_processing()` function with the new model name and functions
51 | 4. The labels associated with the model can then be passed in with `--label_path` argument
52 |
53 | ---
54 |
55 | # Application Overview
56 |
57 | This section provides a walkthrough of the application, explaining in detail the steps:
58 |
59 | 1. Initialisation
60 | 2. Creating a Network
61 | 3. Preparing the Workload Tensors
62 | 4. Executing Inference
63 | 5. Postprocessing
64 |
65 |
66 | ### Initialisation
67 |
68 | ##### Reading from Video Source
69 | After parsing user arguments, the chosen video file or stream is loaded into an OpenCV `cv2.VideoCapture()` object. We use this object to capture frames from the source using the `read()` function.
70 |
71 | The `VideoCapture` object also tells us information about the source, such as the framerate and resolution of the input video. Using this information, we create a `cv2.VideoWriter()` object which will be used at the end of every loop to write the processed frame to an output video file of the same format as the input.
72 |
73 | ##### Preparing Labels and Model Specific Functions
74 | In order to interpret the result of running inference on the loaded network, it is required to load the labels associated with the model. In the provided example code, the `dict_labels()` function creates a dictionary that is keyed on the classification index at the output node of the model, with values of the dictionary corresponding to a label and a randomly generated RGB color. This ensures that each class has a unique color which will prove helpful when plotting the bounding boxes of various detected objects in a frame.
75 |
76 | Depending on the model being used, the user-specified model name accesses and returns functions to decode and process the inference output, along with a resize factor used when plotting bounding boxes to ensure they are scaled to their correct position in the original frame.
77 |
78 |
79 | ### Creating a Network
80 |
81 | ##### Creating Parser and Importing Graph
82 | The first step with PyArmNN is to import a graph from file by using the appropriate parser.
83 |
84 | The Arm NN SDK provides parsers for reading graphs from a variety of model formats. In our application we specifically focus on `.tflite, .pb, .onnx` models.
85 |
86 | Based on the extension of the provided model file, the corresponding parser is created and the network file loaded with `CreateNetworkFromBinaryFile()` function. The parser will handle the creation of the underlying Arm NN graph.
87 |
88 | ##### Optimizing Graph for Compute Device
89 | Arm NN supports optimized execution on multiple CPU and GPU devices. Prior to executing a graph, we must select the appropriate device context. We do this by creating a runtime context with default options with `IRuntime()`.
90 |
91 | We can optimize the imported graph by specifying a list of backends in order of preference and implement backend-specific optimizations. The backends are identified by a string unique to the backend, for example `CpuAcc, GpuAcc, CpuRef`.
92 |
93 | Internally and transparently, Arm NN splits the graph into subgraph based on backends, it calls a optimize subgraphs function on each of them and, if possible, substitutes the corresponding subgraph in the original graph with its optimized version.
94 |
95 | Using the `Optimize()` function we optimize the graph for inference and load the optimized network onto the compute device with `LoadNetwork()`. This function creates the backend-specific workloads for the layers and a backend specific workload factory which is called to create the workloads.
96 |
97 | ##### Creating Input and Output Binding Information
98 | Parsers can also be used to extract the input information for the network. By calling `GetSubgraphInputTensorNames` we extract all the input names and, with `GetNetworkInputBindingInfo`, bind the input points of the graph.
99 |
100 | The input binding information contains all the essential information about the input. It is a tuple consisting of integer identifiers for bindable layers (inputs, outputs) and the tensor info (data type, quantization information, number of dimensions, total number of elements).
101 |
102 | Similarly, we can get the output binding information for an output layer by using the parser to retrieve output tensor names and calling `GetNetworkOutputBindingInfo()`.
103 |
104 |
105 | ### Preparing the Workload Tensors
106 |
107 | ##### Preprocessing the Captured Frame
108 | Each frame captured from source is read as an `ndarray` in BGR format and therefore has to be preprocessed before being passed into the network.
109 |
110 | This preprocessing step consists of swapping channels (BGR to RGB in this example), resizing the frame to the required resolution, expanding dimensions of the array and doing data type conversion to match the model input layer. This information about the input tensor can be readily obtained from reading the `input_binding_info`. For example, SSD MobileNet V1 takes for input a tensor with shape `[1, 300, 300, 3]` and data type `uint8`.
111 |
112 | ##### Making Input and Output Tensors
113 | To produce the workload tensors, calling the functions `make_input_tensors()` and `make_output_tensors()` will return the input and output tensors respectively.
114 |
115 |
116 | ### Executing Inference
117 | After making the workload tensors, a compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context. By calling the `workload_tensors_to_ndarray()` function, we obtain the results from inference as a list of `ndarrays`.
118 |
119 |
120 | ### Postprocessing
121 |
122 | ##### Decoding and Processing Inference Output
123 | The output from inference must be decoded to obtain information about detected objects in the frame. In the examples there are implementations for two networks but you may also implement your own network decoding solution here. Please refer to Implementing Your Own Network section of this document to learn how to do this.
124 |
125 | For SSD MobileNet V1 models, we decode the results to obtain the bounding box positions, classification index, confidence and number of detections in the input frame.
126 |
127 | For YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold.
128 |
129 | It is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU) to achieve the best visual results.
130 |
131 | The detection results are always returned as a list in the form `[class index, [box positions], confidence score]`, with the box positions list containing bounding box coordinates in the form `[x_min, y_min, x_max, y_max]`.
132 |
133 | ##### Drawing Bounding Boxes
134 | With the obtained results and using `draw_bounding_boxes()`, we are able to draw bounding boxes around detected objects and add the associated label and confidence score. The labels dictionary created earlier uses the class index of the detected object as a key to return the associated label and color for that class. The resize factor defined at the beginning scales the bounding box coordinates to their correct positions in the original frame. The processed frames are written to file or displayed in a separate window.
135 |
--------------------------------------------------------------------------------
/examples/armnn/common/cv_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | """
6 | This file contains helper functions for reading video/image data and
7 | pre/postprocessing of video/image data using OpenCV.
8 | """
9 |
10 | import os
11 |
12 | import cv2
13 | import numpy as np
14 |
15 | import pyarmnn as ann
16 |
17 |
18 | def preprocess_array(x, **kwargs):
19 | x /= 127.5
20 | x -= 1.
21 | return x
22 |
23 | def preprocess(frame: np.ndarray, input_binding_info: tuple):
24 | """
25 | Takes a frame, resizes, swaps channels and converts data type to match
26 | model input layer. The converted frame is wrapped in a const tensor
27 | and bound to the input tensor.
28 |
29 | Args:
30 | frame: Captured frame from video.
31 | input_binding_info: Contains shape and data type of model input layer.
32 |
33 | Returns:
34 | Input tensor.
35 | """
36 | # Swap channels and resize frame to model resolution
37 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
38 | resized_frame = resize_with_aspect_ratio(frame, input_binding_info)
39 |
40 | # Expand dimensions and convert data type to match model input
41 | data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8
42 | resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)
43 | resized_frame = preprocess_array(resized_frame)
44 | assert resized_frame.shape == tuple(input_binding_info[1].GetShape())
45 |
46 | input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])
47 | return input_tensors
48 |
49 |
50 | def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):
51 | """
52 | Resizes frame while maintaining aspect ratio, padding any empty space.
53 |
54 | Args:
55 | frame: Captured frame.
56 | input_binding_info: Contains shape of model input layer.
57 |
58 | Returns:
59 | Frame resized to the size of model input layer.
60 | """
61 | aspect_ratio = frame.shape[1] / frame.shape[0]
62 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
63 |
64 | if aspect_ratio >= 1.0:
65 | new_height, new_width = int(model_width / aspect_ratio), model_width
66 | b_padding, r_padding = model_height - new_height, 0
67 | else:
68 | new_height, new_width = model_height, int(model_height * aspect_ratio)
69 | b_padding, r_padding = 0, model_width - new_width
70 |
71 | # Resize and pad any empty space
72 | frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
73 | frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,
74 | borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
75 | return frame
76 |
77 |
78 | def create_video_writer(video: cv2.VideoCapture, video_path: str, name: str):
79 | """
80 | Creates a video writer object to write processed frames to file.
81 |
82 | Args:
83 | video: Video capture object, contains information about data source.
84 | video_path: User-specified video file path.
85 | output_path: Optional path to save the processed video.
86 |
87 | Returns:
88 | Video writer object.
89 | """
90 | _, ext = os.path.splitext(video_path)
91 |
92 | i, filename = 0, os.path.join(str(), f'{name}{ext}')
93 |
94 | while os.path.exists(filename):
95 | i += 1
96 | filename = os.path.join(str(), f'{name}({i}){ext}')
97 | print(filename)
98 | video_writer = cv2.VideoWriter(filename=filename,
99 | fourcc=get_source_encoding_int(video),
100 | fps=int(video.get(cv2.CAP_PROP_FPS)),
101 | frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
102 | int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
103 | return video_writer
104 |
105 |
106 | def init_video_file_capture(video_path: str, name: str):
107 | """
108 | Creates a video capture object from a video file.
109 |
110 | Args:
111 | video_path: User-specified video file path.
112 | output_path: Optional path to save the processed video.
113 |
114 | Returns:
115 | Video capture object to capture frames, video writer object to write processed
116 | frames to file, plus total frame count of video source to iterate through.
117 | """
118 | if not os.path.exists(video_path):
119 | raise FileNotFoundError(f'Video file not found for: {video_path}')
120 | video = cv2.VideoCapture(video_path)
121 | if not video.isOpened:
122 | raise RuntimeError(f'Failed to open video capture from file: {video_path}')
123 |
124 | video_writer = create_video_writer(video, video_path, name)
125 | iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))
126 | return video, video_writer, iter_frame_count
127 |
128 |
129 | def init_video_stream_capture(video_source: int):
130 | """
131 | Creates a video capture object from a device.
132 |
133 | Args:
134 | video_source: Device index used to read video stream.
135 |
136 | Returns:
137 | Video capture object used to capture frames from a video stream.
138 | """
139 | video = cv2.VideoCapture(video_source)
140 | if not video.isOpened:
141 | raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')
142 | print('Processing video stream. Press \'Esc\' key to exit the demo.')
143 | return video
144 |
145 |
146 | def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):
147 | """
148 | Draws bounding boxes around detected objects and adds a label and confidence score.
149 |
150 | Args:
151 | frame: The original captured frame from video source.
152 | detections: A list of detected objects in the form [class, [box positions], confidence].
153 | resize_factor: Resizing factor to scale box coordinates to output frame size.
154 | labels: Dictionary of labels and colors keyed on the classification index.
155 | """
156 | for detection in detections:
157 | class_idx, box, confidence = [d for d in detection]
158 | label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]
159 |
160 | # Obtain frame size and resized bounding box positions
161 | frame_height, frame_width = frame.shape[:2]
162 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
163 |
164 | # Ensure box stays within the frame
165 | x_min, y_min = max(0, x_min), max(0, y_min)
166 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
167 |
168 | # Draw bounding box around detected object
169 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
170 |
171 | # Create label for detected object class
172 | label = f'{label} {confidence * 100:.1f}%'
173 | label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
174 |
175 | # Make sure label always stays on-screen
176 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
177 |
178 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
179 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
180 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
181 |
182 | # Add label and confidence value
183 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
184 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
185 | label_color, 1, cv2.LINE_AA)
186 |
187 |
188 | def get_source_encoding_int(video_capture):
189 | return int(video_capture.get(cv2.CAP_PROP_FOURCC))
190 |
--------------------------------------------------------------------------------
/examples/armnn/common/network_executor.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | import os
5 | from typing import List, Tuple
6 |
7 | import pyarmnn as ann
8 | import numpy as np
9 |
10 |
11 | def create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()):
12 | """
13 | Creates a network based on the model file and a list of backends.
14 |
15 | Args:
16 | model_file: User-specified model file.
17 | backends: List of backends to optimize network.
18 | input_names:
19 | output_names:
20 |
21 | Returns:
22 | net_id: Unique ID of the network to run.
23 | runtime: Runtime context for executing inference.
24 | input_binding_info: Contains essential information about the model input.
25 | output_binding_info: Used to map output tensor and its memory.
26 | """
27 | if not os.path.exists(model_file):
28 | raise FileNotFoundError(f'Model file not found for: {model_file}')
29 |
30 | _, ext = os.path.splitext(model_file)
31 | if ext == '.tflite':
32 | parser = ann.ITfLiteParser()
33 | else:
34 | raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]")
35 |
36 | network = parser.CreateNetworkFromBinaryFile(model_file)
37 |
38 | # Specify backends to optimize network
39 | preferred_backends = []
40 | for b in backends:
41 | preferred_backends.append(ann.BackendId(b))
42 |
43 | # Select appropriate device context and optimize the network for that device
44 | options = ann.CreationOptions()
45 | runtime = ann.IRuntime(options)
46 | opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
47 | ann.OptimizerOptions())
48 | print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n'
49 | f'Optimization warnings: {messages}')
50 |
51 | # Load the optimized network onto the Runtime device
52 | net_id, _ = runtime.LoadNetwork(opt_network)
53 |
54 | # Get input and output binding information
55 | graph_id = parser.GetSubgraphCount() - 1
56 | input_names = parser.GetSubgraphInputTensorNames(graph_id)
57 | input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
58 | output_names = parser.GetSubgraphOutputTensorNames(graph_id)
59 | output_binding_info = []
60 |
61 | for output_name in output_names:
62 | out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
63 | output_binding_info.append(out_bind_info)
64 |
65 | return net_id, runtime, input_binding_info, output_binding_info
66 |
67 |
68 | def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]:
69 | """
70 | Executes inference for the loaded network.
71 |
72 | Args:
73 | input_tensors: The input frame tensor.
74 | output_tensors: The output tensor from output node.
75 | runtime: Runtime context for executing inference.
76 | net_id: Unique ID of the network to run.
77 |
78 | Returns:
79 | list: Inference results as a list of ndarrays.
80 | """
81 | runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
82 | output = ann.workload_tensors_to_ndarray(output_tensors)
83 | return output
84 |
85 |
86 | class ArmnnNetworkExecutor:
87 |
88 | def __init__(self, model_file: str, backends: list):
89 | """
90 | Creates an inference executor for a given network and a list of backends.
91 |
92 | Args:
93 | model_file: User-specified model file.
94 | backends: List of backends to optimize network.
95 | """
96 | self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file,
97 | backends)
98 | self.output_tensors = ann.make_output_tensors(self.output_binding_info)
99 |
100 | def run(self, input_tensors: list) -> List[np.ndarray]:
101 | """
102 | Executes inference for the loaded network.
103 |
104 | Args:
105 | input_tensors: The input frame tensor.
106 |
107 | Returns:
108 | list: Inference results as a list of ndarrays.
109 | """
110 | return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id)
111 |
--------------------------------------------------------------------------------
/examples/armnn/common/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | import os
5 | import ntpath
6 |
7 | import urllib.request
8 | import zipfile
9 |
10 | import pytest
11 |
12 | script_dir = os.path.dirname(__file__)
13 | @pytest.fixture(scope="session")
14 | def test_data_folder(request):
15 | """
16 | This fixture returns path to folder with shared test resources among all tests
17 | """
18 |
19 | data_dir = os.path.join(script_dir, "testdata")
20 | if not os.path.exists(data_dir):
21 | os.mkdir(data_dir)
22 |
23 | files_to_download = ["https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/messi5.jpg",
24 | "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/basketball1.png",
25 | "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/Megamind.avi",
26 | "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"
27 | ]
28 |
29 | for file in files_to_download:
30 | path, filename = ntpath.split(file)
31 | file_path = os.path.join(data_dir, filename)
32 | if not os.path.exists(file_path):
33 | print("\nDownloading test file: " + file_path + "\n")
34 | urllib.request.urlretrieve(file, file_path)
35 |
36 | # Any unzipping needed, and moving around of files
37 | with zipfile.ZipFile(os.path.join(data_dir, "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"), 'r') as zip_ref:
38 | zip_ref.extractall(data_dir)
39 |
40 | return data_dir
41 |
--------------------------------------------------------------------------------
/examples/armnn/common/tests/context.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
4 |
5 | import cv_utils
6 | import network_executor
7 | import utils
8 |
--------------------------------------------------------------------------------
/examples/armnn/common/tests/test_network_executor.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | import os
5 |
6 | import cv2
7 |
8 | from context import network_executor
9 | from context import cv_utils
10 |
11 |
12 | def test_execute_network(test_data_folder):
13 | model_path = os.path.join(test_data_folder, "detect.tflite")
14 | backends = ["CpuAcc", "CpuRef"]
15 |
16 | executor = network_executor.ArmnnNetworkExecutor(model_path, backends)
17 | img = cv2.imread(os.path.join(test_data_folder, "messi5.jpg"))
18 | input_tensors = cv_utils.preprocess(img, executor.input_binding_info)
19 |
20 | output_result = executor.run(input_tensors)
21 |
22 | # Ensure it detects a person
23 | classes = output_result[1]
24 | assert classes[0][0] == 0
25 |
--------------------------------------------------------------------------------
/examples/armnn/common/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | import os
5 |
6 | from context import cv_utils
7 | from context import utils
8 |
9 |
10 | def test_get_source_encoding(test_data_folder):
11 | video_file = os.path.join(test_data_folder, "Megamind.avi")
12 | video, video_writer, frame_count = cv_utils.init_video_file_capture(video_file, "/tmp")
13 | assert cv_utils.get_source_encoding_int(video) == 1145656920
14 |
15 |
16 | def test_read_existing_labels_file(test_data_folder):
17 | label_file = os.path.join(test_data_folder, "labelmap.txt")
18 | labels_map = utils.dict_labels(label_file)
19 | assert labels_map is not None
20 |
--------------------------------------------------------------------------------
/examples/armnn/common/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | """Contains helper functions that can be used across the example apps."""
5 |
6 | import os
7 | import errno
8 | from pathlib import Path
9 |
10 | import numpy as np
11 |
12 |
13 | def dict_labels(labels_file_path: str, include_rgb=False) -> dict:
14 | """Creates a dictionary of labels from the input labels file.
15 |
16 | Args:
17 | labels_file: Path to file containing labels to map model outputs.
18 | include_rgb: Adds randomly generated RGB values to the values of the
19 | dictionary. Used for plotting bounding boxes of different colours.
20 |
21 | Returns:
22 | Dictionary with classification indices for keys and labels for values.
23 |
24 | Raises:
25 | FileNotFoundError:
26 | Provided `labels_file_path` does not exist.
27 | """
28 | labels_file = Path(labels_file_path)
29 | if not labels_file.is_file():
30 | raise FileNotFoundError(
31 | errno.ENOENT, os.strerror(errno.ENOENT), labels_file_path
32 | )
33 |
34 | labels = {}
35 | with open(labels_file, "r") as f:
36 | for idx, line in enumerate(f, 0):
37 | if include_rgb:
38 | labels[idx] = line.strip("\n"), tuple(np.random.random(size=3) * 255)
39 | else:
40 | labels[idx] = line.strip("\n")
41 | return labels
42 |
--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/README.md:
--------------------------------------------------------------------------------
1 | # PyArmNN Human face age/gender recognition Sample Application
2 |
3 | ## Introduction
4 | This sample application guides the user and shows how to perform age/gender recognition using PyArmNN API.
5 |
6 | The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected faces and age/gender labels overlaid.
7 |
8 | ## Human face age/gender recognition from Video File
9 | Human face age/gender recognition demo that takes a video file, runs inference on each frame producing
10 | bounding boxes and labels around detected faces, and saves the processed video.
11 |
12 | Example usage:
13 |
14 | ```bash
15 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite --video_file_path ../samples/test_s.mp4
16 | ```
17 |
18 | ## Human face age/gender recognition from Video Stream
19 |
20 | Human face age/gender recognition demo that takes a video stream from a device, runs inference
21 | on each frame producing bounding boxes and labels around detected faces,
22 | and displays a window with the latest processed frame.
23 |
24 | Example usage:
25 |
26 | ```bash
27 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite
28 | ```
29 |
30 | This application has been verified to work against the YOLOv2 detection layer MobileNet models and MobileFaceNet keypoints detector, which can be downloaded from:
31 |
32 | https://files.seeedstudio.com/ml/age_gender_recognition_models.zip
33 |
--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/box.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | # Todo : BoundBox & its related method extraction
6 | class BoundBox:
7 | def __init__(self, x, y, w, h, c = None, classes = None):
8 | self.x = x
9 | self.y = y
10 | self.w = w
11 | self.h = h
12 |
13 | self.c = c
14 | self.classes = classes
15 |
16 | def get_label(self):
17 | return np.argmax(self.classes)
18 |
19 | def get_score(self):
20 | return self.classes[self.get_label()]
21 |
22 | def iou(self, bound_box):
23 | b1 = self.as_centroid()
24 | b2 = bound_box.as_centroid()
25 | return centroid_box_iou(b1, b2)
26 |
27 | def as_centroid(self):
28 | return np.array([self.x, self.y, self.w, self.h])
29 |
30 |
31 | def boxes_to_array(bound_boxes):
32 | """
33 | # Args
34 | boxes : list of BoundBox instances
35 |
36 | # Returns
37 | centroid_boxes : (N, 4)
38 | probs : (N, nb_classes)
39 | """
40 | centroid_boxes = []
41 | probs = []
42 | for box in bound_boxes:
43 | centroid_boxes.append([box.x, box.y, box.w, box.h])
44 | probs.append(box.classes)
45 | return np.array(centroid_boxes), np.array(probs)
46 |
47 |
48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):
49 | """
50 | # Args
51 | boxes : list of BoundBox
52 |
53 | # Returns
54 | boxes : list of BoundBox
55 | non maximum supressed BoundBox instances
56 | """
57 | # suppress non-maximal boxes
58 | for c in range(n_classes):
59 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
60 |
61 | for i in range(len(sorted_indices)):
62 | index_i = sorted_indices[i]
63 |
64 | if boxes[index_i].classes[c] == 0:
65 | continue
66 | else:
67 | for j in range(i+1, len(sorted_indices)):
68 | index_j = sorted_indices[j]
69 |
70 | if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:
71 | boxes[index_j].classes[c] = 0
72 | # remove the boxes which are less likely than a obj_threshold
73 | boxes = [box for box in boxes if box.get_score() > obj_threshold]
74 | return boxes
75 |
76 |
77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):
78 | img_size = min(image.shape[:2])
79 | if img_size < desired_size:
80 | scale_factor = float(desired_size) / img_size
81 | else:
82 | scale_factor = 1.0
83 |
84 | h, w = image.shape[:2]
85 | img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))
86 | if boxes != []:
87 | boxes_scaled = boxes*scale_factor
88 | boxes_scaled = boxes_scaled.astype(np.int)
89 | else:
90 | boxes_scaled = boxes
91 | return draw_boxes(img_scaled, boxes_scaled, probs, labels)
92 |
93 |
94 | def draw_boxes(image, boxes, probs, labels):
95 | for box, classes in zip(boxes, probs):
96 | x1, y1, x2, y2 = box
97 | cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)
98 | cv2.putText(image,
99 | '{}: {:.2f}'.format(labels[np.argmax(classes)], classes.max()),
100 | (x1, y1 - 13),
101 | cv2.FONT_HERSHEY_SIMPLEX,
102 | 1e-3 * image.shape[0],
103 | (0,255,0), 2)
104 | return image
105 |
106 |
107 | def centroid_box_iou(box1, box2):
108 | def _interval_overlap(interval_a, interval_b):
109 | x1, x2 = interval_a
110 | x3, x4 = interval_b
111 |
112 | if x3 < x1:
113 | if x4 < x1:
114 | return 0
115 | else:
116 | return min(x2,x4) - x1
117 | else:
118 | if x2 < x3:
119 | return 0
120 | else:
121 | return min(x2,x4) - x3
122 |
123 | _, _, w1, h1 = box1.reshape(-1,)
124 | _, _, w2, h2 = box2.reshape(-1,)
125 | x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)
126 | x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)
127 |
128 | intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])
129 | intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])
130 | intersect = intersect_w * intersect_h
131 | union = w1 * h1 + w2 * h2 - intersect
132 |
133 | return float(intersect) / union
134 |
135 |
136 | def to_centroid(minmax_boxes):
137 | """
138 | minmax_boxes : (N, 4)
139 | """
140 | minmax_boxes = minmax_boxes.astype(np.float)
141 | centroid_boxes = np.zeros_like(minmax_boxes)
142 |
143 | x1 = minmax_boxes[:,0]
144 | y1 = minmax_boxes[:,1]
145 | x2 = minmax_boxes[:,2]
146 | y2 = minmax_boxes[:,3]
147 |
148 | centroid_boxes[:,0] = (x1 + x2) / 2
149 | centroid_boxes[:,1] = (y1 + y2) / 2
150 | centroid_boxes[:,2] = x2 - x1
151 | centroid_boxes[:,3] = y2 - y1
152 | return centroid_boxes
153 |
154 | def to_minmax(centroid_boxes):
155 | centroid_boxes = centroid_boxes.astype(np.float)
156 | minmax_boxes = np.zeros_like(centroid_boxes)
157 |
158 | cx = centroid_boxes[:,0]
159 | cy = centroid_boxes[:,1]
160 | w = centroid_boxes[:,2]
161 | h = centroid_boxes[:,3]
162 |
163 | minmax_boxes[:,0] = cx - w/2
164 | minmax_boxes[:,1] = cy - h/2
165 | minmax_boxes[:,2] = cx + w/2
166 | minmax_boxes[:,3] = cy + h/2
167 | return minmax_boxes
168 |
169 | def create_anchor_boxes(anchors):
170 | """
171 | # Args
172 | anchors : list of floats
173 | # Returns
174 | boxes : array, shape of (len(anchors)/2, 4)
175 | centroid-type
176 | """
177 | boxes = []
178 | n_boxes = int(len(anchors)/2)
179 | for i in range(n_boxes):
180 | boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))
181 | return np.array(boxes)
182 |
183 | def find_match_box(centroid_box, centroid_boxes):
184 | """Find the index of the boxes with the largest overlap among the N-boxes.
185 | # Args
186 | box : array, shape of (1, 4)
187 | boxes : array, shape of (N, 4)
188 |
189 | # Return
190 | match_index : int
191 | """
192 | match_index = -1
193 | max_iou = -1
194 |
195 | for i, box in enumerate(centroid_boxes):
196 | iou = centroid_box_iou(centroid_box, box)
197 |
198 | if max_iou < iou:
199 | match_index = i
200 | max_iou = iou
201 | return match_index
202 |
--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 |
--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/run_video_file.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | """
6 | Human face age/gender recognition demo that takes a video file, runs inference on each frame producing
7 | bounding boxes and labels around detected faces, and saves the processed video.
8 |
9 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite --video_file_path ../samples/test_s.mp4
10 |
11 | """
12 |
13 | import os
14 | import sys
15 | import time
16 | script_dir = os.path.dirname(__file__)
17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
18 |
19 | import cv2
20 | import numpy as np
21 | from tqdm import tqdm
22 | from argparse import ArgumentParser
23 |
24 | from yolov2 import yolo_processing, yolo_resize_factor
25 | from cv_utils import init_video_file_capture, resize_with_aspect_ratio, preprocess, preprocess_array
26 | from network_executor import ArmnnNetworkExecutor
27 |
28 | import pyarmnn as ann
29 |
30 | gender_list = ["female","male"]
31 | age_list = ["0-10","11-20","21-45","46-60","60-100"]
32 |
33 | def process_faces(frame, detections, executor_age_gender, resize_factor):
34 | global age_list, gender_list
35 |
36 | result_list = []
37 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
38 |
39 | for detection in detections:
40 | box = detection[1].copy()
41 | for i in range(len(box)):
42 | box[i] = int(box[i] * resize_factor)
43 |
44 | frame_height, frame_width = frame.shape[:2]
45 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
46 |
47 | # Ensure box stays within the frame
48 | x_min, y_min = max(0, x_min), max(0, y_min)
49 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
50 |
51 | face_img = frame[y_min:y_max, x_min:x_max]
52 | face_img = cv2.resize(face_img, (128, 128))
53 |
54 | face_img = face_img.astype(np.float32)
55 | face_img = preprocess_array(face_img)
56 |
57 | input_tensors = ann.make_input_tensors([executor_age_gender.input_binding_info], [face_img])
58 |
59 | result = executor_age_gender.run(input_tensors)
60 | gender = gender_list[np.argmax(result[0][0])]
61 | age = age_list[np.argmax(result[1][0])]
62 |
63 | result_list.append([gender, age])
64 |
65 | return result_list
66 |
67 | def draw_result(frame: np.ndarray, detections: list, resize_factor, face_data):
68 | """
69 | Draws bounding boxes around detected objects and adds a label and confidence score.
70 |
71 | Args:
72 | frame: The original captured frame from video source.
73 | detections: A list of detected objects in the form [class, [box positions], confidence].
74 | resize_factor: Resizing factor to scale box coordinates to output frame size.
75 | face_data: List containing information about age and gender
76 | """
77 | for i in range(len(detections)):
78 | class_idx, box, confidence = [d for d in detections[i]]
79 | color = (255, 0, 0) if face_data[i][0] == 'male' else (0, 0, 255)
80 |
81 | # Obtain frame size and resized bounding box positions
82 | frame_height, frame_width = frame.shape[:2]
83 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
84 |
85 | # Ensure box stays within the frame
86 | x_min, y_min = max(0, x_min), max(0, y_min)
87 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
88 |
89 | # Draw bounding box around detected object
90 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
91 |
92 | # Create label for detected object class
93 | label = "Gender: {}, Age: {}".format(face_data[i][0], face_data[i][1])
94 | label_color = (255, 255, 255)
95 |
96 | # Make sure label always stays on-screen
97 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
98 |
99 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
100 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
101 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
102 |
103 | # Add label and confidence value
104 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
105 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
106 |
107 |
108 | def main(args):
109 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'age_gender_demo')
110 | frame_num = len(frame_count)
111 |
112 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
113 | executor_age_gender = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)
114 |
115 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
116 |
117 | times = []
118 |
119 | for _ in tqdm(frame_count, desc='Processing frames'):
120 | frame_present, frame = video.read()
121 | if not frame_present:
122 | continue
123 |
124 | input_tensors = preprocess(frame, executor_fd.input_binding_info)
125 |
126 | start_time = time.time() # measure only inference and intermediary processing times
127 | output_result = executor_fd.run(input_tensors)
128 | detections = process_output(output_result)
129 | face_data = process_faces(frame, detections, executor_age_gender, resize_factor)
130 | end_time = (time.time() - start_time)*1000
131 |
132 | draw_result(frame, detections, resize_factor, face_data)
133 |
134 | times.append(end_time)
135 | video_writer.write(frame)
136 |
137 | print('Finished processing frames')
138 | video.release(), video_writer.release()
139 |
140 | print("Average time(ms): ", sum(times)//frame_num)
141 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
142 |
143 | if __name__ == '__main__':
144 | parser = ArgumentParser()
145 | parser.add_argument('--video_file_path', required=True, type=str,
146 | help='Path to the video file to run object detection on')
147 |
148 | parser.add_argument('--first_model_file_path', required=True, type=str,
149 | help='Path to the first stage model to use')
150 | parser.add_argument('--second_model_file_path', required=True, type=str,
151 | help='Path to the second stage model to use')
152 |
153 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
154 | help='Takes the preferred backends in preference order, separated by whitespace, '
155 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
156 | 'Defaults to [CpuAcc, CpuRef]')
157 | args = parser.parse_args()
158 | main(args)
159 |
--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/run_video_stream.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | """
6 | Human face age/gender recognition demo that takes a video stream from a device, runs inference
7 | on each frame producing bounding boxes and labels around detected faces,
8 | and displays a window with the latest processed frame.
9 |
10 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite
11 |
12 | """
13 |
14 | import os
15 | import sys
16 | import time
17 | script_dir = os.path.dirname(__file__)
18 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
19 |
20 | import cv2
21 | import numpy as np
22 | from tqdm import tqdm
23 | from argparse import ArgumentParser
24 |
25 | from yolov2 import yolo_processing, yolo_resize_factor
26 |
27 | from cv_utils import init_video_stream_capture, resize_with_aspect_ratio, preprocess, preprocess_array
28 | from network_executor import ArmnnNetworkExecutor
29 | import pyarmnn as ann
30 |
31 | gender_list = ["female","male"]
32 | age_list = ["0-10","11-20","21-45","46-60","60-100"]
33 |
34 | def process_faces(frame, detections, executor_age_gender, resize_factor):
35 | global age_list, gender_list
36 |
37 | result_list = []
38 |
39 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
40 |
41 | for detection in detections:
42 | box = detection[1].copy()
43 | for i in range(len(box)):
44 | box[i] = int(box[i] * resize_factor)
45 |
46 | frame_height, frame_width = frame.shape[:2]
47 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
48 |
49 | # Ensure box stays within the frame
50 | x_min, y_min = max(0, x_min), max(0, y_min)
51 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
52 |
53 | face_img = frame[y_min:y_max, x_min:x_max]
54 | face_img = cv2.resize(face_img, (128, 128))
55 |
56 | #cv2.imshow('PyArmNN Object Detection Demo face', face_img)
57 |
58 | face_img = face_img.astype(np.float32)
59 | face_img = preprocess_array(face_img)
60 |
61 | input_tensors = ann.make_input_tensors([executor_age_gender.input_binding_info], [face_img])
62 |
63 | result = executor_age_gender.run(input_tensors)
64 | gender = gender_list[np.argmax(result[0][0])]
65 | age = age_list[np.argmax(result[1][0])]
66 |
67 | result_list.append([gender, age])
68 |
69 | return result_list
70 |
71 | def draw_result(frame: np.ndarray, detections: list, resize_factor, face_data):
72 | """
73 | Draws bounding boxes around detected objects and adds a label and confidence score.
74 |
75 | Args:
76 | frame: The original captured frame from video source.
77 | detections: A list of detected objects in the form [class, [box positions], confidence].
78 | resize_factor: Resizing factor to scale box coordinates to output frame size.
79 | face_data: List containing information about age and gender
80 | """
81 |
82 | for i in range(len(detections)):
83 | class_idx, box, confidence = [d for d in detections[i]]
84 | color = (255, 0, 0) if face_data[i][0] == 'male' else (0, 0, 255)
85 |
86 | # Obtain frame size and resized bounding box positions
87 | frame_height, frame_width = frame.shape[:2]
88 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
89 |
90 | # Ensure box stays within the frame
91 | x_min, y_min = max(0, x_min), max(0, y_min)
92 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
93 |
94 | # Draw bounding box around detected object
95 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
96 |
97 | # Create label for detected object class
98 | label = "Gender: {}, Age: {}".format(face_data[i][0], face_data[i][1])
99 | label_color = (255, 255, 255)
100 |
101 | # Make sure label always stays on-screen
102 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
103 |
104 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
105 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
106 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
107 |
108 | # Add label and confidence value
109 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
110 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
111 |
112 |
113 | def main(args):
114 | video = init_video_stream_capture(args.video_source)
115 |
116 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
117 | executor_age_gender = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)
118 |
119 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
120 |
121 | while True:
122 |
123 | frame_present, frame = video.read()
124 | frame = cv2.flip(frame, 1) # Horizontally flip the frame
125 | if not frame_present:
126 | raise RuntimeError('Error reading frame from video stream')
127 | input_tensors = preprocess(frame, executor_fd.input_binding_info)
128 | print("Running inference...")
129 |
130 | start_time = time.time() # start time of the inference
131 | output_result = executor_fd.run(input_tensors)
132 | detections = process_output(output_result)
133 | face_data = process_faces(frame, detections, executor_age_gender, resize_factor)
134 |
135 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
136 | print("Time(ms): ", (time.time() - start_time)*1000)
137 |
138 | draw_result(frame, detections, resize_factor, face_data)
139 | cv2.imshow('PyArmNN Object Detection Demo', frame)
140 |
141 | if cv2.waitKey(1) == 27:
142 | print('\nExit key activated. Closing video...')
143 | break
144 | video.release(), cv2.destroyAllWindows()
145 |
146 |
147 | if __name__ == '__main__':
148 | parser = ArgumentParser()
149 | parser.add_argument('--video_source', type=int, default=0,
150 | help='Device index to access video stream. Defaults to primary device camera at index 0')
151 |
152 | parser.add_argument('--first_model_file_path', required=True, type=str,
153 | help='Path to the first stage model to use')
154 | parser.add_argument('--second_model_file_path', required=True, type=str,
155 | help='Path to the second stage model to use')
156 |
157 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
158 | help='Takes the preferred backends in preference order, separated by whitespace, '
159 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
160 | 'Defaults to [CpuAcc, CpuRef]')
161 | args = parser.parse_args()
162 | main(args)
163 |
--------------------------------------------------------------------------------
/examples/armnn/face_age-gender/yolov2.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | """
5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models.
6 | """
7 |
8 | import cv2
9 | import numpy as np
10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes
11 |
12 |
13 | def yolo_processing(netout):
14 | anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]
15 | nms_threshold=0.2
16 | """Convert Yolo network output to bounding box
17 |
18 | # Args
19 | netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
20 | YOLO neural network output array
21 |
22 | # Returns
23 | boxes : array, shape of (N, 4)
24 | coordinate scale is normalized [0, 1]
25 | probs : array, shape of (N, nb_classes)
26 | """
27 | netout = netout[0].reshape(7,7,5,6)
28 | grid_h, grid_w, nb_box = netout.shape[:3]
29 | boxes = []
30 |
31 | # decode the output by the network
32 | netout[..., 4] = _sigmoid(netout[..., 4])
33 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
34 | netout[..., 5:] *= netout[..., 5:] > 0.3
35 |
36 | for row in range(grid_h):
37 | for col in range(grid_w):
38 | for b in range(nb_box):
39 | # from 4th element onwards are confidence and class classes
40 | classes = netout[row,col,b,5:]
41 |
42 | if np.sum(classes) > 0:
43 | # first 4 elements are x, y, w, and h
44 | x, y, w, h = netout[row,col,b,:4]
45 |
46 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
47 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
48 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
49 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
50 | confidence = netout[row,col,b,4]
51 | box = BoundBox(x, y, w, h, confidence, classes)
52 | boxes.append(box)
53 |
54 | boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)
55 | boxes, probs = boxes_to_array(boxes)
56 | #print(boxes)
57 | predictions = []
58 | def _to_original_scale(boxes):
59 | minmax_boxes = to_minmax(boxes)
60 | minmax_boxes[:,0] *= 224
61 | minmax_boxes[:,2] *= 224
62 | minmax_boxes[:,1] *= 224
63 | minmax_boxes[:,3] *= 224
64 | return minmax_boxes.astype(np.int)
65 |
66 | if len(boxes) > 0:
67 | boxes = _to_original_scale(boxes)
68 |
69 | for i in range(len(boxes)):
70 | predictions.append([0, boxes[i], probs[i][0]])
71 |
72 | return predictions
73 |
74 | def _sigmoid(x):
75 | return 1. / (1. + np.exp(-x))
76 |
77 | def _softmax(x, axis=-1, t=-100.):
78 | x = x - np.max(x)
79 | if np.min(x) < t:
80 | x = x/np.min(x)*t
81 | e_x = np.exp(x)
82 | return e_x / e_x.sum(axis, keepdims=True)
83 |
84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):
85 | """
86 | Gets a multiplier to scale the bounding box positions to
87 | their correct position in the frame.
88 |
89 | Args:
90 | video: Video capture object, contains information about data source.
91 | input_binding_info: Contains shape of model input layer.
92 |
93 | Returns:
94 | Resizing factor to scale box coordinates to output frame size.
95 | """
96 | frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
97 | frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
98 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
99 | return max(frame_height, frame_width) / max(model_height, model_width)
100 |
--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/README.md:
--------------------------------------------------------------------------------
1 | # PyArmNN Face keypoint detection Sample Application
2 |
3 | ## Introduction
4 | This sample application guides the user and shows how to perform face keypoint detection using PyArmNN API.
5 |
6 | The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected faces and five keypoints(left eye, right eye, nose, left corner of the mouth, right corner of the mouth) with the corresponding labels and confidence scores overlaid.
7 |
8 | ## Face keypoint detection from Video File
9 | Face keypoint detection demo that takes a video file, runs inference on each frame producing
10 | bounding boxes and five keypoints on detected faces, and saves the processed video.
11 |
12 | Example usage:
13 |
14 | ```bash
15 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite --video_file_path ../samples/test_s.mp4
16 | ```
17 |
18 | ## Face keypoint detection from Video Stream
19 |
20 | Face keypoint detection demo that takes a video file, takes a video stream from a device, runs inference
21 | on each frame producing bounding boxes and five keypoints on detected faces, and displays a window with the latest processed frame.
22 |
23 | Example usage:
24 |
25 | ```bash
26 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite
27 | ```
28 |
29 | This application has been verified to work against the YOLOv2 detection layer MobileNet models and MobileFaceNet keypoints detector, which can be downloaded from:
30 |
31 | https://files.seeedstudio.com/ml/keypoint_detection_models.zip
32 |
--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/box.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | # Todo : BoundBox & its related method extraction
6 | class BoundBox:
7 | def __init__(self, x, y, w, h, c = None, classes = None):
8 | self.x = x
9 | self.y = y
10 | self.w = w
11 | self.h = h
12 |
13 | self.c = c
14 | self.classes = classes
15 |
16 | def get_label(self):
17 | return np.argmax(self.classes)
18 |
19 | def get_score(self):
20 | return self.classes[self.get_label()]
21 |
22 | def iou(self, bound_box):
23 | b1 = self.as_centroid()
24 | b2 = bound_box.as_centroid()
25 | return centroid_box_iou(b1, b2)
26 |
27 | def as_centroid(self):
28 | return np.array([self.x, self.y, self.w, self.h])
29 |
30 |
31 | def boxes_to_array(bound_boxes):
32 | """
33 | # Args
34 | boxes : list of BoundBox instances
35 |
36 | # Returns
37 | centroid_boxes : (N, 4)
38 | probs : (N, nb_classes)
39 | """
40 | centroid_boxes = []
41 | probs = []
42 | for box in bound_boxes:
43 | centroid_boxes.append([box.x, box.y, box.w, box.h])
44 | probs.append(box.classes)
45 | return np.array(centroid_boxes), np.array(probs)
46 |
47 |
48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):
49 | """
50 | # Args
51 | boxes : list of BoundBox
52 |
53 | # Returns
54 | boxes : list of BoundBox
55 | non maximum supressed BoundBox instances
56 | """
57 | # suppress non-maximal boxes
58 | for c in range(n_classes):
59 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
60 |
61 | for i in range(len(sorted_indices)):
62 | index_i = sorted_indices[i]
63 |
64 | if boxes[index_i].classes[c] == 0:
65 | continue
66 | else:
67 | for j in range(i+1, len(sorted_indices)):
68 | index_j = sorted_indices[j]
69 |
70 | if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:
71 | boxes[index_j].classes[c] = 0
72 | # remove the boxes which are less likely than a obj_threshold
73 | boxes = [box for box in boxes if box.get_score() > obj_threshold]
74 | return boxes
75 |
76 |
77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):
78 | img_size = min(image.shape[:2])
79 | if img_size < desired_size:
80 | scale_factor = float(desired_size) / img_size
81 | else:
82 | scale_factor = 1.0
83 |
84 | h, w = image.shape[:2]
85 | img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))
86 | if boxes != []:
87 | boxes_scaled = boxes*scale_factor
88 | boxes_scaled = boxes_scaled.astype(np.int)
89 | else:
90 | boxes_scaled = boxes
91 | return draw_boxes(img_scaled, boxes_scaled, probs, labels)
92 |
93 |
94 | def draw_boxes(image, boxes, probs, labels):
95 | for box, classes in zip(boxes, probs):
96 | x1, y1, x2, y2 = box
97 | cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)
98 | cv2.putText(image,
99 | '{}: {:.2f}'.format(labels[np.argmax(classes)], classes.max()),
100 | (x1, y1 - 13),
101 | cv2.FONT_HERSHEY_SIMPLEX,
102 | 1e-3 * image.shape[0],
103 | (0,255,0), 2)
104 | return image
105 |
106 |
107 | def centroid_box_iou(box1, box2):
108 | def _interval_overlap(interval_a, interval_b):
109 | x1, x2 = interval_a
110 | x3, x4 = interval_b
111 |
112 | if x3 < x1:
113 | if x4 < x1:
114 | return 0
115 | else:
116 | return min(x2,x4) - x1
117 | else:
118 | if x2 < x3:
119 | return 0
120 | else:
121 | return min(x2,x4) - x3
122 |
123 | _, _, w1, h1 = box1.reshape(-1,)
124 | _, _, w2, h2 = box2.reshape(-1,)
125 | x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)
126 | x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)
127 |
128 | intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])
129 | intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])
130 | intersect = intersect_w * intersect_h
131 | union = w1 * h1 + w2 * h2 - intersect
132 |
133 | return float(intersect) / union
134 |
135 |
136 | def to_centroid(minmax_boxes):
137 | """
138 | minmax_boxes : (N, 4)
139 | """
140 | minmax_boxes = minmax_boxes.astype(np.float)
141 | centroid_boxes = np.zeros_like(minmax_boxes)
142 |
143 | x1 = minmax_boxes[:,0]
144 | y1 = minmax_boxes[:,1]
145 | x2 = minmax_boxes[:,2]
146 | y2 = minmax_boxes[:,3]
147 |
148 | centroid_boxes[:,0] = (x1 + x2) / 2
149 | centroid_boxes[:,1] = (y1 + y2) / 2
150 | centroid_boxes[:,2] = x2 - x1
151 | centroid_boxes[:,3] = y2 - y1
152 | return centroid_boxes
153 |
154 | def to_minmax(centroid_boxes):
155 | centroid_boxes = centroid_boxes.astype(np.float)
156 | minmax_boxes = np.zeros_like(centroid_boxes)
157 |
158 | cx = centroid_boxes[:,0]
159 | cy = centroid_boxes[:,1]
160 | w = centroid_boxes[:,2]
161 | h = centroid_boxes[:,3]
162 |
163 | minmax_boxes[:,0] = cx - w/2
164 | minmax_boxes[:,1] = cy - h/2
165 | minmax_boxes[:,2] = cx + w/2
166 | minmax_boxes[:,3] = cy + h/2
167 | return minmax_boxes
168 |
169 | def create_anchor_boxes(anchors):
170 | """
171 | # Args
172 | anchors : list of floats
173 | # Returns
174 | boxes : array, shape of (len(anchors)/2, 4)
175 | centroid-type
176 | """
177 | boxes = []
178 | n_boxes = int(len(anchors)/2)
179 | for i in range(n_boxes):
180 | boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))
181 | return np.array(boxes)
182 |
183 | def find_match_box(centroid_box, centroid_boxes):
184 | """Find the index of the boxes with the largest overlap among the N-boxes.
185 | # Args
186 | box : array, shape of (1, 4)
187 | boxes : array, shape of (N, 4)
188 |
189 | # Return
190 | match_index : int
191 | """
192 | match_index = -1
193 | max_iou = -1
194 |
195 | for i, box in enumerate(centroid_boxes):
196 | iou = centroid_box_iou(centroid_box, box)
197 |
198 | if max_iou < iou:
199 | match_index = i
200 | max_iou = iou
201 | return match_index
202 |
--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 |
--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/run_video_file.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | """
6 | Face keypoint detection demo that takes a video file, runs inference on each frame producing
7 | bounding boxes and five keypoints on detected faces, and saves the processed video.
8 |
9 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite --video_file_path ../samples/test_s.mp4
10 |
11 | """
12 |
13 | import os
14 | import sys
15 | import time
16 | script_dir = os.path.dirname(__file__)
17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
18 |
19 | import cv2
20 | import numpy as np
21 | from tqdm import tqdm
22 | from argparse import ArgumentParser
23 |
24 | from yolov2 import yolo_processing, yolo_resize_factor
25 | from utils import dict_labels
26 | from cv_utils import init_video_file_capture, resize_with_aspect_ratio, preprocess, preprocess_array
27 | from network_executor import ArmnnNetworkExecutor
28 | import pyarmnn as ann
29 |
30 | def process_faces(frame, detections, executor_kp, resize_factor):
31 | kpts_list = []
32 |
33 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34 |
35 | for detection in detections:
36 | box = detection[1].copy()
37 | for i in range(len(box)):
38 | box[i] = int(box[i] * resize_factor)
39 |
40 | x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]
41 | frame_height, frame_width = frame.shape[:2]
42 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
43 | # Ensure box stays within the frame
44 | x_min, y_min = max(0, x_min), max(0, y_min)
45 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
46 |
47 | face_img = frame[y_min:y_max, x_min:x_max]
48 | face_img = cv2.resize(face_img, (128, 128))
49 |
50 | face_img = face_img.astype(np.float32)
51 | face_img = preprocess_array(face_img)
52 |
53 | input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img])
54 |
55 | plist = executor_kp.run(input_tensors)[0][0]
56 |
57 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
58 | re = (x + int(plist[2] * w), y + int(plist[3] * h))
59 | n = (x + int(plist[4] * w), y + int(plist[5] * h))
60 | lm = (x + int(plist[6] * w), y + int(plist[7] * h))
61 | rm = (x + int(plist[8] * w), y + int(plist[9] * h))
62 | kpts = [le, re, n, lm, rm]
63 |
64 | kpts_list.append(kpts)
65 |
66 | return kpts_list
67 |
68 | def draw_result(frame: np.ndarray, detections: list, resize_factor, kpts):
69 | """
70 | Draws bounding boxes around detected objects and adds a label and confidence score.
71 |
72 | Args:
73 | frame: The original captured frame from video source.
74 | detections: A list of detected objects in the form [class, [box positions], confidence].
75 | resize_factor: Resizing factor to scale box coordinates to output frame size.
76 | kpts: List containing information about face keypoints in format [[le, re, n, lm, rm], [le, re, n, lm, rm], ...]
77 | """
78 | for i in range(len(detections)):
79 | class_idx, box, confidence = [d for d in detections[i]]
80 | label, color = 'Person', (0, 255, 0)
81 |
82 | # Obtain frame size and resized bounding box positions
83 | frame_height, frame_width = frame.shape[:2]
84 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
85 |
86 | # Ensure box stays within the frame
87 | x_min, y_min = max(0, x_min), max(0, y_min)
88 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
89 |
90 | # Draw bounding box around detected object
91 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
92 |
93 | # Create label for detected object class
94 | label = f'{label} {confidence * 100:.1f}%'
95 | label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
96 |
97 | # Make sure label always stays on-screen
98 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
99 |
100 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
101 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
102 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
103 |
104 | # Add label and confidence value
105 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
106 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
107 | label_color, 1, cv2.LINE_AA)
108 |
109 | for kpt in kpts[i]:
110 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (0, 0, 255), 5)
111 |
112 |
113 | def main(args):
114 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_keypoint_demo')
115 | frame_num = len(frame_count)
116 |
117 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
118 | executor_kp = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)
119 |
120 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
121 |
122 | times = []
123 |
124 | for _ in tqdm(frame_count, desc='Processing frames'):
125 | frame_present, frame = video.read()
126 | if not frame_present:
127 | continue
128 |
129 | input_tensors = preprocess(frame, executor_fd.input_binding_info)
130 |
131 | start_time = time.time() # start time of the loop
132 | output_result = executor_fd.run(input_tensors)
133 | detections = process_output(output_result)
134 | kpts = process_faces(frame, detections, executor_kp, resize_factor)
135 | end_time = (time.time() - start_time)*1000
136 |
137 | draw_result(frame, detections, resize_factor, kpts)
138 | times.append(end_time)
139 | video_writer.write(frame)
140 |
141 | print('Finished processing frames')
142 | video.release(), video_writer.release()
143 |
144 | print("Average time(ms): ", sum(times)//frame_num)
145 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
146 |
147 | if __name__ == '__main__':
148 | parser = ArgumentParser()
149 | parser.add_argument('--video_file_path', required=True, type=str,
150 | help='Path to the video file to run object detection on')
151 |
152 | parser.add_argument('--first_model_file_path', required=True, type=str,
153 | help='Path to the first stage model to use')
154 | parser.add_argument('--second_model_file_path', required=True, type=str,
155 | help='Path to the second stage model to use')
156 |
157 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
158 | help='Takes the preferred backends in preference order, separated by whitespace, '
159 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
160 | 'Defaults to [CpuAcc, CpuRef]')
161 | args = parser.parse_args()
162 | main(args)
163 |
--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/run_video_stream.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | """
6 | Face keypoint detection demo that takes a video file, takes a video stream from a device, runs inference
7 | on each frame producing bounding boxes and five keypoints on detected faces, and displays a window with the latest processed frame.
8 |
9 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite
10 |
11 | """
12 |
13 | import os
14 | import sys
15 | import time
16 | script_dir = os.path.dirname(__file__)
17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
18 |
19 | import cv2
20 | import numpy as np
21 | from tqdm import tqdm
22 | from argparse import ArgumentParser
23 |
24 | from yolov2 import yolo_processing, yolo_resize_factor
25 |
26 | from cv_utils import init_video_stream_capture, resize_with_aspect_ratio, preprocess, preprocess_array
27 | from network_executor import ArmnnNetworkExecutor
28 | import pyarmnn as ann
29 |
30 | def process_faces(frame, detections, executor_kp, resize_factor):
31 | kpts_list = []
32 |
33 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34 |
35 | for detection in detections:
36 | box = detection[1].copy()
37 | for i in range(len(box)):
38 | box[i] = int(box[i] * resize_factor)
39 |
40 | x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1]
41 | frame_height, frame_width = frame.shape[:2]
42 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3]
43 | # Ensure box stays within the frame
44 | x_min, y_min = max(0, x_min), max(0, y_min)
45 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
46 |
47 | face_img = frame[y_min:y_max, x_min:x_max]
48 | face_img = cv2.resize(face_img, (128, 128))
49 |
50 | face_img = face_img.astype(np.float32)
51 | face_img = preprocess_array(face_img)
52 |
53 | input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img])
54 |
55 | plist = executor_kp.run(input_tensors)[0][0]
56 |
57 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
58 | re = (x + int(plist[2] * w), y + int(plist[3] * h))
59 | n = (x + int(plist[4] * w), y + int(plist[5] * h))
60 | lm = (x + int(plist[6] * w), y + int(plist[7] * h))
61 | rm = (x + int(plist[8] * w), y + int(plist[9] * h))
62 | kpts = [le, re, n, lm, rm]
63 |
64 | kpts_list.append(kpts)
65 |
66 | return kpts_list
67 |
68 | def draw_result(frame: np.ndarray, detections: list, resize_factor, kpts):
69 | """
70 | Draws bounding boxes around detected objects and adds a label and confidence score.
71 |
72 | Args:
73 | frame: The original captured frame from video source.
74 | detections: A list of detected objects in the form [class, [box positions], confidence].
75 | resize_factor: Resizing factor to scale box coordinates to output frame size.
76 | kpts: List containing information about face keypoints in format [[le, re, n, lm, rm], [le, re, n, lm, rm], ...]
77 | """
78 |
79 | for i in range(len(detections)):
80 | class_idx, box, confidence = [d for d in detections[i]]
81 | label, color = 'Person', (0, 255, 0)
82 |
83 | # Obtain frame size and resized bounding box positions
84 | frame_height, frame_width = frame.shape[:2]
85 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
86 |
87 | # Ensure box stays within the frame
88 | x_min, y_min = max(0, x_min), max(0, y_min)
89 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
90 |
91 | # Draw bounding box around detected object
92 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
93 |
94 | # Create label for detected object class
95 | label = f'{label} {confidence * 100:.1f}%'
96 | label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
97 |
98 | # Make sure label always stays on-screen
99 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
100 |
101 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
102 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
103 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
104 |
105 | # Add label and confidence value
106 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
107 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
108 | label_color, 1, cv2.LINE_AA)
109 |
110 | for kpt in kpts[i]:
111 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (0, 0, 255), 5)
112 |
113 | def main(args):
114 | video = init_video_stream_capture(args.video_source)
115 |
116 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends)
117 | executor_kp = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends)
118 |
119 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info)
120 |
121 | while True:
122 |
123 | frame_present, frame = video.read()
124 | frame = cv2.flip(frame, 1) # Horizontally flip the frame
125 | if not frame_present:
126 | raise RuntimeError('Error reading frame from video stream')
127 | input_tensors = preprocess(frame, executor_fd.input_binding_info)
128 | print("Running inference...")
129 |
130 | start_time = time.time() # start time of the loop
131 | output_result = executor_fd.run(input_tensors)
132 | detections = process_output(output_result)
133 | kpts = process_faces(frame, detections, executor_kp, resize_factor)
134 |
135 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
136 | print("Time(ms): ", (time.time() - start_time)*1000)
137 |
138 | draw_result(frame, detections, resize_factor, kpts)
139 | cv2.imshow('PyArmNN Object Detection Demo', frame)
140 |
141 | if cv2.waitKey(1) == 27:
142 | print('\nExit key activated. Closing video...')
143 | break
144 | video.release(), cv2.destroyAllWindows()
145 |
146 |
147 | if __name__ == '__main__':
148 | parser = ArgumentParser()
149 | parser.add_argument('--video_source', type=int, default=0,
150 | help='Device index to access video stream. Defaults to primary device camera at index 0')
151 |
152 | parser.add_argument('--first_model_file_path', required=True, type=str,
153 | help='Path to the first stage model to use')
154 | parser.add_argument('--second_model_file_path', required=True, type=str,
155 | help='Path to the second stage model to use')
156 |
157 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'],
158 | help='Takes the preferred backends in preference order, separated by whitespace, '
159 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. '
160 | 'Defaults to [CpuAcc, CpuRef]')
161 | args = parser.parse_args()
162 | main(args)
163 |
--------------------------------------------------------------------------------
/examples/armnn/face_keypoints/yolov2.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | """
5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models.
6 | """
7 |
8 | import cv2
9 | import numpy as np
10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes
11 |
12 |
13 | def yolo_processing(netout):
14 | anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]
15 | nms_threshold=0.2
16 | """Convert Yolo network output to bounding box
17 |
18 | # Args
19 | netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
20 | YOLO neural network output array
21 |
22 | # Returns
23 | boxes : array, shape of (N, 4)
24 | coordinate scale is normalized [0, 1]
25 | probs : array, shape of (N, nb_classes)
26 | """
27 | netout = netout[0].reshape(7,7,5,6)
28 | grid_h, grid_w, nb_box = netout.shape[:3]
29 | boxes = []
30 |
31 | # decode the output by the network
32 | netout[..., 4] = _sigmoid(netout[..., 4])
33 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
34 | netout[..., 5:] *= netout[..., 5:] > 0.3
35 |
36 | for row in range(grid_h):
37 | for col in range(grid_w):
38 | for b in range(nb_box):
39 | # from 4th element onwards are confidence and class classes
40 | classes = netout[row,col,b,5:]
41 |
42 | if np.sum(classes) > 0:
43 | # first 4 elements are x, y, w, and h
44 | x, y, w, h = netout[row,col,b,:4]
45 |
46 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
47 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
48 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
49 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
50 | confidence = netout[row,col,b,4]
51 | box = BoundBox(x, y, w, h, confidence, classes)
52 | boxes.append(box)
53 |
54 | boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)
55 | boxes, probs = boxes_to_array(boxes)
56 | #print(boxes)
57 | predictions = []
58 | def _to_original_scale(boxes):
59 | minmax_boxes = to_minmax(boxes)
60 | minmax_boxes[:,0] *= 224
61 | minmax_boxes[:,2] *= 224
62 | minmax_boxes[:,1] *= 224
63 | minmax_boxes[:,3] *= 224
64 | return minmax_boxes.astype(np.int)
65 |
66 | if len(boxes) > 0:
67 | boxes = _to_original_scale(boxes)
68 |
69 | for i in range(len(boxes)):
70 | predictions.append([0, boxes[i], probs[i][0]])
71 |
72 | return predictions
73 |
74 | def _sigmoid(x):
75 | return 1. / (1. + np.exp(-x))
76 |
77 | def _softmax(x, axis=-1, t=-100.):
78 | x = x - np.max(x)
79 | if np.min(x) < t:
80 | x = x/np.min(x)*t
81 | e_x = np.exp(x)
82 | return e_x / e_x.sum(axis, keepdims=True)
83 |
84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):
85 | """
86 | Gets a multiplier to scale the bounding box positions to
87 | their correct position in the frame.
88 |
89 | Args:
90 | video: Video capture object, contains information about data source.
91 | input_binding_info: Contains shape of model input layer.
92 |
93 | Returns:
94 | Resizing factor to scale box coordinates to output frame size.
95 | """
96 | frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
97 | frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
98 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
99 | return max(frame_height, frame_width) / max(model_height, model_width)
100 |
--------------------------------------------------------------------------------
/examples/armnn/face_recognition/README.md:
--------------------------------------------------------------------------------
1 | # PyArmNN Face recognition Sample Application
2 |
3 | ## Introduction
4 | This sample application guides the user and shows how to perform face recognition using PyArmNN API.
5 |
6 | The application takes three models and video file or camera feed as input, runs inference on each frame producing bounding boxes and ID numbers corresponding to entries in database.
7 |
8 | ## Database population
9 |
10 | Before we can run face recognition, we need to exctract features from faces we want to recognize and save the features embedding vectors in encoded form in .json file, which serves as a small database. You can do that with calcuate_features.py.
11 |
12 | Example usage:
13 |
14 | ```bash
15 | python3 calculate_features.py --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite --db_file_path database.db --id 0 --name Paul --picture_file_path paul.png
16 | ```
17 |
18 | ## Face recognition from Video File
19 | Face recognition demo that takes a video file, runs inference on each frame producing
20 | bounding boxes and ID numbers corresponding to entries in database, and saves the processed video.
21 |
22 | Example usage:
23 |
24 | ```bash
25 | python3 run_video_file.py --video_file_path test_s.mp4 --db_file_path database.db --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite
26 | ```
27 |
28 | ## Face recognition from Video Stream
29 |
30 | Face recognition demo that takes a video stream from a device, runs inference
31 | on each frame producing bounding boxes and ID numbers corresponding to entries in database,
32 | and displays a window with the latest processed frame.
33 |
34 | Example usage:
35 |
36 | ```bash
37 | DISPLAY=:0 python3 run_video_stream.py --db_file_path database.db --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite
38 | ```
39 |
40 | This application has been verified to work against the YOLOv2 detection layer MobileNet models, MobileFaceNet keypoints detector and MobileFaceNet face feature embedding extractor which can be downloaded from:
41 |
42 | WIP
43 |
--------------------------------------------------------------------------------
/examples/armnn/face_recognition/box.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | # Todo : BoundBox & its related method extraction
6 | class BoundBox:
7 | def __init__(self, x, y, w, h, c = None, classes = None):
8 | self.x = x
9 | self.y = y
10 | self.w = w
11 | self.h = h
12 |
13 | self.c = c
14 | self.classes = classes
15 |
16 | def get_label(self):
17 | return np.argmax(self.classes)
18 |
19 | def get_score(self):
20 | return self.classes[self.get_label()]
21 |
22 | def iou(self, bound_box):
23 | b1 = self.as_centroid()
24 | b2 = bound_box.as_centroid()
25 | return centroid_box_iou(b1, b2)
26 |
27 | def as_centroid(self):
28 | return np.array([self.x, self.y, self.w, self.h])
29 |
30 |
31 | def boxes_to_array(bound_boxes):
32 | """
33 | # Args
34 | boxes : list of BoundBox instances
35 |
36 | # Returns
37 | centroid_boxes : (N, 4)
38 | probs : (N, nb_classes)
39 | """
40 | centroid_boxes = []
41 | probs = []
42 | for box in bound_boxes:
43 | centroid_boxes.append([box.x, box.y, box.w, box.h])
44 | probs.append(box.classes)
45 | return np.array(centroid_boxes), np.array(probs)
46 |
47 |
48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3):
49 | """
50 | # Args
51 | boxes : list of BoundBox
52 |
53 | # Returns
54 | boxes : list of BoundBox
55 | non maximum supressed BoundBox instances
56 | """
57 | # suppress non-maximal boxes
58 | for c in range(n_classes):
59 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))
60 |
61 | for i in range(len(sorted_indices)):
62 | index_i = sorted_indices[i]
63 |
64 | if boxes[index_i].classes[c] == 0:
65 | continue
66 | else:
67 | for j in range(i+1, len(sorted_indices)):
68 | index_j = sorted_indices[j]
69 |
70 | if boxes[index_i].iou(boxes[index_j]) >= nms_threshold:
71 | boxes[index_j].classes[c] = 0
72 | # remove the boxes which are less likely than a obj_threshold
73 | boxes = [box for box in boxes if box.get_score() > obj_threshold]
74 | return boxes
75 |
76 |
77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400):
78 | img_size = min(image.shape[:2])
79 | if img_size < desired_size:
80 | scale_factor = float(desired_size) / img_size
81 | else:
82 | scale_factor = 1.0
83 |
84 | h, w = image.shape[:2]
85 | img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor)))
86 | if boxes != []:
87 | boxes_scaled = boxes*scale_factor
88 | boxes_scaled = boxes_scaled.astype(np.int)
89 | else:
90 | boxes_scaled = boxes
91 | return draw_boxes(img_scaled, boxes_scaled, probs, labels)
92 |
93 |
94 | def draw_boxes(image, boxes, probs, labels):
95 | for box, classes in zip(boxes, probs):
96 | x1, y1, x2, y2 = box
97 | cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3)
98 | cv2.putText(image,
99 | '{}: {:.2f}'.format(labels[np.argmax(classes)], classes.max()),
100 | (x1, y1 - 13),
101 | cv2.FONT_HERSHEY_SIMPLEX,
102 | 1e-3 * image.shape[0],
103 | (0,255,0), 2)
104 | return image
105 |
106 |
107 | def centroid_box_iou(box1, box2):
108 | def _interval_overlap(interval_a, interval_b):
109 | x1, x2 = interval_a
110 | x3, x4 = interval_b
111 |
112 | if x3 < x1:
113 | if x4 < x1:
114 | return 0
115 | else:
116 | return min(x2,x4) - x1
117 | else:
118 | if x2 < x3:
119 | return 0
120 | else:
121 | return min(x2,x4) - x3
122 |
123 | _, _, w1, h1 = box1.reshape(-1,)
124 | _, _, w2, h2 = box2.reshape(-1,)
125 | x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,)
126 | x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,)
127 |
128 | intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max])
129 | intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max])
130 | intersect = intersect_w * intersect_h
131 | union = w1 * h1 + w2 * h2 - intersect
132 |
133 | return float(intersect) / union
134 |
135 |
136 | def to_centroid(minmax_boxes):
137 | """
138 | minmax_boxes : (N, 4)
139 | """
140 | minmax_boxes = minmax_boxes.astype(np.float)
141 | centroid_boxes = np.zeros_like(minmax_boxes)
142 |
143 | x1 = minmax_boxes[:,0]
144 | y1 = minmax_boxes[:,1]
145 | x2 = minmax_boxes[:,2]
146 | y2 = minmax_boxes[:,3]
147 |
148 | centroid_boxes[:,0] = (x1 + x2) / 2
149 | centroid_boxes[:,1] = (y1 + y2) / 2
150 | centroid_boxes[:,2] = x2 - x1
151 | centroid_boxes[:,3] = y2 - y1
152 | return centroid_boxes
153 |
154 | def to_minmax(centroid_boxes):
155 | centroid_boxes = centroid_boxes.astype(np.float)
156 | minmax_boxes = np.zeros_like(centroid_boxes)
157 |
158 | cx = centroid_boxes[:,0]
159 | cy = centroid_boxes[:,1]
160 | w = centroid_boxes[:,2]
161 | h = centroid_boxes[:,3]
162 |
163 | minmax_boxes[:,0] = cx - w/2
164 | minmax_boxes[:,1] = cy - h/2
165 | minmax_boxes[:,2] = cx + w/2
166 | minmax_boxes[:,3] = cy + h/2
167 | return minmax_boxes
168 |
169 | def create_anchor_boxes(anchors):
170 | """
171 | # Args
172 | anchors : list of floats
173 | # Returns
174 | boxes : array, shape of (len(anchors)/2, 4)
175 | centroid-type
176 | """
177 | boxes = []
178 | n_boxes = int(len(anchors)/2)
179 | for i in range(n_boxes):
180 | boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]]))
181 | return np.array(boxes)
182 |
183 | def find_match_box(centroid_box, centroid_boxes):
184 | """Find the index of the boxes with the largest overlap among the N-boxes.
185 | # Args
186 | box : array, shape of (1, 4)
187 | boxes : array, shape of (N, 4)
188 |
189 | # Return
190 | match_index : int
191 | """
192 | match_index = -1
193 | max_iou = -1
194 |
195 | for i, box in enumerate(centroid_boxes):
196 | iou = centroid_box_iou(centroid_box, box)
197 |
198 | if max_iou < iou:
199 | match_index = i
200 | max_iou = iou
201 | return match_index
202 |
--------------------------------------------------------------------------------
/examples/armnn/face_recognition/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 | scikit_image=>0.18.3
--------------------------------------------------------------------------------
/examples/armnn/face_recognition/yolov2.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 |
4 | """
5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models.
6 | """
7 |
8 | import cv2
9 | import numpy as np
10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes
11 |
12 |
13 | def yolo_processing(netout):
14 | anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025]
15 | nms_threshold=0.2
16 | """Convert Yolo network output to bounding box
17 |
18 | # Args
19 | netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
20 | YOLO neural network output array
21 |
22 | # Returns
23 | boxes : array, shape of (N, 4)
24 | coordinate scale is normalized [0, 1]
25 | probs : array, shape of (N, nb_classes)
26 | """
27 | netout = netout[0].reshape(7,7,5,6)
28 | grid_h, grid_w, nb_box = netout.shape[:3]
29 | boxes = []
30 |
31 | # decode the output by the network
32 | netout[..., 4] = _sigmoid(netout[..., 4])
33 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
34 | netout[..., 5:] *= netout[..., 5:] > 0.3
35 |
36 | for row in range(grid_h):
37 | for col in range(grid_w):
38 | for b in range(nb_box):
39 | # from 4th element onwards are confidence and class classes
40 | classes = netout[row,col,b,5:]
41 |
42 | if np.sum(classes) > 0:
43 | # first 4 elements are x, y, w, and h
44 | x, y, w, h = netout[row,col,b,:4]
45 |
46 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
47 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
48 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width
49 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height
50 | confidence = netout[row,col,b,4]
51 | box = BoundBox(x, y, w, h, confidence, classes)
52 | boxes.append(box)
53 |
54 | boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3)
55 | boxes, probs = boxes_to_array(boxes)
56 | #print(boxes)
57 | predictions = []
58 | def _to_original_scale(boxes):
59 | minmax_boxes = to_minmax(boxes)
60 | minmax_boxes[:,0] *= 224
61 | minmax_boxes[:,2] *= 224
62 | minmax_boxes[:,1] *= 224
63 | minmax_boxes[:,3] *= 224
64 | return minmax_boxes.astype(np.int)
65 |
66 | if len(boxes) > 0:
67 | boxes = _to_original_scale(boxes)
68 |
69 | for i in range(len(boxes)):
70 | predictions.append([0, boxes[i], probs[i][0]])
71 |
72 | return predictions
73 |
74 | def _sigmoid(x):
75 | return 1. / (1. + np.exp(-x))
76 |
77 | def _softmax(x, axis=-1, t=-100.):
78 | x = x - np.max(x)
79 | if np.min(x) < t:
80 | x = x/np.min(x)*t
81 | e_x = np.exp(x)
82 | return e_x / e_x.sum(axis, keepdims=True)
83 |
84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple):
85 | """
86 | Gets a multiplier to scale the bounding box positions to
87 | their correct position in the frame.
88 |
89 | Args:
90 | video: Video capture object, contains information about data source.
91 | input_binding_info: Contains shape of model input layer.
92 |
93 | Returns:
94 | Resizing factor to scale box coordinates to output frame size.
95 | """
96 | frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
97 | frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
98 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
99 | return max(frame_height, frame_width) / max(model_height, model_width)
100 |
--------------------------------------------------------------------------------
/examples/edge_impulse/multi_stage_inference_vehicle_type/multi_stage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import cv2
4 | import os
5 | import sys, getopt
6 | import signal
7 | import time
8 | from edge_impulse_linux.image import ImageImpulseRunner
9 |
10 | show_camera = True
11 |
12 | def draw_result(frame, class_name, bb, confidence):
13 | """
14 | Draws bounding boxes around detected objects and adds a label and confidence score.
15 | Args:
16 | frame: The original captured frame from video source.
17 | detections: A list of detected objects in the form [class, [box positions], confidence].
18 | resize_factor: Resizing factor to scale box coordinates to output frame size.
19 | face_data: List containing information about age and gender
20 | """
21 | color = (255, 0, 0)
22 |
23 | x_min, y_min, x_max, y_max = bb['x'], bb['y'], bb['x']+ bb['width'], bb['y']+ bb['height']
24 |
25 | # Draw bounding box around detected object
26 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
27 |
28 | # Create label for detected object class
29 | label = "{}, {}".format(class_name, confidence)
30 | label_color = (255, 255, 255)
31 |
32 | # Make sure label always stays on-screen
33 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
34 |
35 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
36 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
37 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
38 |
39 | # Add label and confidence value
40 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
41 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
42 |
43 |
44 | def now():
45 | return round(time.time() * 1000)
46 |
47 | def get_webcams():
48 | port_ids = []
49 | for port in range(5):
50 | print("Looking for a camera in port %s:" %port)
51 | camera = cv2.VideoCapture(port)
52 | if camera.isOpened():
53 | ret = camera.read()[0]
54 | if ret:
55 | backendName =camera.getBackendName()
56 | w = camera.get(3)
57 | h = camera.get(4)
58 | print("Camera %s (%s x %s) found in port %s " %(backendName,h,w, port))
59 | port_ids.append(port)
60 | camera.release()
61 | return port_ids
62 |
63 | def sigint_handler(sig, frame):
64 | print('Interrupted')
65 | if (runner):
66 | runner.stop()
67 | sys.exit(0)
68 |
69 | signal.signal(signal.SIGINT, sigint_handler)
70 |
71 | def help():
72 | print('python classify.py ')
73 |
74 | def main(argv):
75 | try:
76 | opts, args = getopt.getopt(argv, "h", ["--help"])
77 | except getopt.GetoptError:
78 | help()
79 | sys.exit(2)
80 |
81 | for opt, arg in opts:
82 | if opt in ('-h', '--help'):
83 | help()
84 | sys.exit()
85 |
86 | if len(args) == 0:
87 | help()
88 | sys.exit(2)
89 |
90 | def get_path(model_name):
91 |
92 | dir_path = os.path.dirname(os.path.realpath(__file__))
93 | modelfile = os.path.join(dir_path, model_name)
94 | print('MODEL: ' + modelfile)
95 | return modelfile
96 |
97 | detection_model = get_path(args[0])
98 | classification_model = get_path(args[1])
99 |
100 | with ImageImpulseRunner(detection_model) as detection_runner, ImageImpulseRunner(classification_model) as classification_runner:
101 |
102 | detection_model_info = detection_runner.init()
103 | classification_model_info = classification_runner.init()
104 |
105 | print('Loaded detection model runner for "' + detection_model_info['project']['owner'] + ' / ' + detection_model_info['project']['name'] + '"')
106 | detection_labels = detection_model_info['model_parameters']['labels']
107 |
108 | print('Loaded detection model runner for "' + classification_model_info['project']['owner'] + ' / ' + classification_model_info['project']['name'] + '"')
109 | classification_labels = classification_model_info['model_parameters']['labels']
110 |
111 | class_model_input_height = classification_model_info['model_parameters']['image_input_height']
112 | class_model_input_width = classification_model_info['model_parameters']['image_input_width']
113 |
114 | if len(args)>= 3:
115 | videoCaptureDeviceId = int(args[2])
116 | else:
117 | port_ids = get_webcams()
118 | if len(port_ids) == 0:
119 | raise Exception('Cannot find any webcams')
120 | if len(args)<= 1 and len(port_ids)> 1:
121 | raise Exception("Multiple cameras found. Add the camera port ID as a second argument to use to this script")
122 | videoCaptureDeviceId = int(port_ids[0])
123 |
124 | camera = cv2.VideoCapture(videoCaptureDeviceId)
125 |
126 | ret = camera.read()[0]
127 | if ret:
128 | backendName = camera.getBackendName()
129 | w = camera.get(3)
130 | h = camera.get(4)
131 | print("Camera %s (%s x %s) in port %s selected." %(backendName,h,w, videoCaptureDeviceId))
132 | camera.release()
133 | else:
134 | raise Exception("Couldn't initialize selected camera.")
135 |
136 | for det_res, img in detection_runner.classifier(videoCaptureDeviceId):
137 | print('Found %d bounding boxes (%d ms.)' % (len(det_res["result"]["bounding_boxes"]), det_res['timing']['dsp'] + det_res['timing']['classification']))
138 | for bb in det_res["result"]["bounding_boxes"]:
139 | print('%s (%.2f): x=%d y=%d w=%d h=%d\n' % (bb['label'], bb['value'], bb['x'], bb['y'], bb['width'], bb['height']))
140 |
141 | cropped_img = img[bb['y']:bb['y']+bb['height'], bb['x']:bb['x']+bb['width']]
142 | resized_img = cv2.resize(cropped_img, (class_model_input_width, class_model_input_height))
143 |
144 | features, cropped = classification_runner.get_features_from_image(resized_img)
145 |
146 | # the image will be resized and cropped, save a copy of the picture here
147 | # so you can see what's being passed into the classifier
148 | #cv2.imwrite('debug.jpg', cropped)
149 |
150 | class_res = classification_runner.classify(features)
151 |
152 | if "classification" in class_res["result"].keys():
153 | print('Classification result (%d ms.) \n' % (class_res['timing']['dsp'] + class_res['timing']['classification']), end='')
154 | top_score = 0
155 | top_label = ''
156 |
157 | for label in classification_labels:
158 | score = class_res['result']['classification'][label]
159 | print('%s: %.2f\n' % (label, score), end='')
160 | if score >= top_score:
161 | top_score = score
162 | top_label = label
163 |
164 | print('----------------------\n', flush=True)
165 | print('Top result: %s with confidence %.2f\n' % (top_label, top_score), end='')
166 | print('----------------------\n', flush=True)
167 |
168 | draw_result(img, top_label, bb, top_score)
169 |
170 | if (show_camera):
171 | cv2.imshow('edgeimpulse', img)
172 | if cv2.waitKey(1) == ord('q'):
173 | break
174 |
175 | detection_runner.stop()
176 | classification_runner.stop()
177 |
178 | if __name__ == "__main__":
179 | main(sys.argv[1:])
--------------------------------------------------------------------------------
/examples/mediapipe/README.md:
--------------------------------------------------------------------------------
1 | # MediaPipe Sample Applications
2 |
3 | ## Introduction
4 | Google MediaPipe offers ready-to-use yet customizable Python solutions as a prebuilt Python package.
5 |
6 | We provide example scripts for performing inference from video file and video stream with `run_video_file.py` and `run_video_stream.py`. For detailed instructions execute ```run_video_file.py --help``` or ```run_video_stream.py --help```
7 |
8 | ## Prerequisites
9 |
10 | ##### MediaPipe
11 |
12 | Before proceeding to the next steps, make sure that you have successfully installed the MediaPipe on your system by following the instructions in the README.
13 |
14 | You can verify that MediaPipe library is installed using:
15 | ```bash
16 | $ pip3 show mediapipe
17 | ```
18 |
19 | ##### Dependencies
20 |
21 | Install the following libraries on your system:
22 | ```bash
23 | sudo apt install ffmpeg python3-opencv
24 | ```
25 |
26 | Create a virtual environment:
27 | ```bash
28 | python3 -m venv devenv --system-site-packages
29 | source devenv/bin/activate
30 | ```
31 |
32 | ### Python bindings for 32bit version
33 |
34 | ```
35 | pip3 install mediapipe-rpi4 tqdm
36 | ```
37 |
38 | ### Python bindings for 64bit version
39 |
40 | Pre-built wheels for Python 3.7 64bit OS were not available at the moment of writing of this article, so we compiled and shared them ourselves.
41 |
42 | ```
43 | wget https://files.seeedstudio.com/ml/mediapipe/mediapipe-0.8-cp37-cp37m-linux_aarch64.whl
44 | pip3 install mediapipe-0.8-cp37-cp37m-linux_aarch64.whl
45 | pip3 install tqdm
46 | ```
47 |
--------------------------------------------------------------------------------
/examples/mediapipe/common/cv_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | """
6 | This file contains helper functions for reading video/image data and
7 | pre/postprocessing of video/image data using OpenCV.
8 | """
9 |
10 | import os
11 |
12 | import cv2
13 | import numpy as np
14 |
15 | def count_frames_manual(video):
16 | # initialize the total number of frames read
17 | total = 0
18 | # loop over the frames of the video
19 | while True:
20 | # grab the current frame
21 | (grabbed, frame) = video.read()
22 |
23 | # check to see if we have reached the end of the
24 | # video
25 | if not grabbed:
26 | break
27 | # increment the total number of frames read
28 | total += 1
29 | # return the total number of frames in the video file
30 | return total
31 |
32 | def create_video_writer(video: cv2.VideoCapture, video_path: str, name: str):
33 | """
34 | Creates a video writer object to write processed frames to file.
35 |
36 | Args:
37 | video: Video capture object, contains information about data source.
38 | video_path: User-specified video file path.
39 | output_path: Optional path to save the processed video.
40 |
41 | Returns:
42 | Video writer object.
43 | """
44 | _, ext = os.path.splitext(video_path)
45 |
46 | i, filename = 0, os.path.join(str(), f'{name}{ext}')
47 |
48 | while os.path.exists(filename):
49 | i += 1
50 | filename = os.path.join(str(), f'{name}({i}){ext}')
51 | print(filename)
52 | video_writer = cv2.VideoWriter(filename=filename,
53 | fourcc=get_source_encoding_int(video),
54 | fps=int(video.get(cv2.CAP_PROP_FPS)),
55 | frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
56 | int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
57 | return video_writer
58 |
59 |
60 | def init_video_file_capture(video_path: str, name: str):
61 | """
62 | Creates a video capture object from a video file.
63 |
64 | Args:
65 | video_path: User-specified video file path.
66 | output_path: Optional path to save the processed video.
67 |
68 | Returns:
69 | Video capture object to capture frames, video writer object to write processed
70 | frames to file, plus total frame count of video source to iterate through.
71 | """
72 | if not os.path.exists(video_path):
73 | raise FileNotFoundError(f'Video file not found for: {video_path}')
74 |
75 | video = cv2.VideoCapture(video_path)
76 | if not video.isOpened:
77 | raise RuntimeError(f'Failed to open video capture from file: {video_path}')
78 |
79 | video_writer = create_video_writer(video, video_path, name)
80 |
81 | iter_frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
82 |
83 | return video, video_writer, range(iter_frame_count)
84 |
85 |
86 | def init_video_stream_capture(video_source: int):
87 | """
88 | Creates a video capture object from a device.
89 |
90 | Args:
91 | video_source: Device index used to read video stream.
92 |
93 | Returns:
94 | Video capture object used to capture frames from a video stream.
95 | """
96 | video = cv2.VideoCapture(video_source)
97 | if not video.isOpened:
98 | raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')
99 | print('Processing video stream. Press \'Esc\' key to exit the demo.')
100 | return video
101 |
102 | def get_source_encoding_int(video_capture):
103 | return int(video_capture.get(cv2.CAP_PROP_FOURCC))
104 |
--------------------------------------------------------------------------------
/examples/mediapipe/face_detection/run_video_file.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from tqdm import tqdm
14 | from argparse import ArgumentParser
15 | from cv_utils import init_video_file_capture
16 |
17 | mp_drawing = mp.solutions.drawing_utils
18 | mp_face_detection = mp.solutions.face_detection
19 |
20 | def main(args):
21 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_detection_demo')
22 | frame_num = len(frame_count)
23 |
24 | times = []
25 |
26 | with mp_face_detection.FaceDetection(model_selection=args.model_selection,
27 | min_detection_confidence=args.min_detection_confidence) as face_detection:
28 |
29 | for _ in tqdm(frame_count, desc='Processing frames'):
30 | frame_present, frame = video.read()
31 | if not frame_present:
32 | continue
33 |
34 | # Flip the image horizontally for a later selfie-view display, and convert
35 | # the BGR image to RGB.
36 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
37 | # To improve performance, optionally mark the image as not writeable to
38 | # pass by reference.
39 | image.flags.writeable = False
40 |
41 | start_time = time.time()
42 | results = face_detection.process(image)
43 | end_time = (time.time() - start_time)*1000
44 |
45 | # Draw the face mesh annotations on the image.
46 | image.flags.writeable = True
47 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
48 | if results.detections:
49 | for detection in results.detections:
50 | mp_drawing.draw_detection(image, detection)
51 |
52 | times.append(end_time)
53 | video_writer.write(image)
54 |
55 | print('Finished processing frames')
56 | video.release(), video_writer.release()
57 |
58 | print("Average time(ms): ", sum(times)//frame_num)
59 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
60 |
61 | if __name__ == '__main__':
62 | parser = ArgumentParser()
63 | parser.add_argument('--video_file_path', required=True, type=str,
64 | help='Path to the video file to run object detection on')
65 |
66 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
67 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
68 |
69 | parser.add_argument('--model_selection', default=1, type=int,
70 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
71 |
72 | args = parser.parse_args()
73 | main(args)
74 |
--------------------------------------------------------------------------------
/examples/mediapipe/face_detection/run_video_stream.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 |
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_face_detection = mp.solutions.face_detection
18 |
19 | def main(args):
20 | video = init_video_stream_capture(args.video_source)
21 |
22 | with mp_face_detection.FaceDetection(model_selection=args.model_selection,
23 | min_detection_confidence=args.min_detection_confidence) as face_detection:
24 |
25 | while True:
26 |
27 | frame_present, frame = video.read()
28 | if not frame_present:
29 | raise RuntimeError('Error reading frame from video stream')
30 |
31 | # Flip the image horizontally for a later selfie-view display, and convert
32 | # the BGR image to RGB.
33 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
34 | # To improve performance, optionally mark the image as not writeable to
35 | # pass by reference.
36 | image.flags.writeable = False
37 |
38 | start_time = time.time()
39 | results = face_detection.process(image)
40 | end_time = (time.time() - start_time)*1000
41 |
42 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
43 | print("Time(ms): ", (time.time() - start_time)*1000)
44 |
45 | # Draw the face mesh annotations on the image.
46 | image.flags.writeable = True
47 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
48 | if results.detections:
49 | for detection in results.detections:
50 | mp_drawing.draw_detection(image, detection)
51 |
52 | cv2.imshow('MediaPipe Face Detection Demo', image)
53 |
54 | if cv2.waitKey(1) == 27:
55 | print('\nExit key activated. Closing video...')
56 | break
57 |
58 | video.release(), cv2.destroyAllWindows()
59 |
60 | if __name__ == '__main__':
61 | parser = ArgumentParser()
62 | parser.add_argument('--video_source', type=int, default=0,
63 | help='Device index to access video stream. Defaults to primary device camera at index 0')
64 |
65 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
66 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
67 |
68 | parser.add_argument('--model_selection', default=1, type=int,
69 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
70 |
71 | args = parser.parse_args()
72 | main(args)
--------------------------------------------------------------------------------
/examples/mediapipe/face_mesh/run_video_file.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | import numpy as np
14 | from tqdm import tqdm
15 | from argparse import ArgumentParser
16 | from cv_utils import init_video_file_capture
17 |
18 | mp_drawing = mp.solutions.drawing_utils
19 | mp_face_mesh = mp.solutions.face_mesh
20 |
21 | def main(args):
22 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_mesh_demo')
23 | frame_num = len(frame_count)
24 | print(frame_count)
25 | drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
26 |
27 | times = []
28 |
29 | with mp_face_mesh.FaceMesh(min_detection_confidence=args.min_detection_confidence,
30 | min_tracking_confidence=args.min_tracking_confidence) as face_mesh:
31 |
32 | for _ in tqdm(frame_count, desc='Processing frames'):
33 | frame_present, frame = video.read()
34 | if not frame_present:
35 | continue
36 |
37 | # Flip the image horizontally for a later selfie-view display, and convert
38 | # the BGR image to RGB.
39 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
40 | # To improve performance, optionally mark the image as not writeable to
41 | # pass by reference.
42 | image.flags.writeable = False
43 |
44 | start_time = time.time()
45 | results = face_mesh.process(image)
46 | end_time = (time.time() - start_time)*1000
47 |
48 | # Draw the face mesh annotations on the image.
49 | image.flags.writeable = True
50 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
51 | if results.multi_face_landmarks:
52 | for face_landmarks in results.multi_face_landmarks:
53 | mp_drawing.draw_landmarks(
54 | image=image,
55 | landmark_list=face_landmarks,
56 | connections=mp_face_mesh.FACE_CONNECTIONS,
57 | landmark_drawing_spec=drawing_spec,
58 | connection_drawing_spec=drawing_spec)
59 |
60 | times.append(end_time)
61 | video_writer.write(image)
62 |
63 | print('Finished processing frames')
64 | video.release(), video_writer.release()
65 |
66 | print("Average time(ms): ", sum(times)//frame_num)
67 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
68 |
69 | if __name__ == '__main__':
70 | parser = ArgumentParser()
71 | parser.add_argument('--video_file_path', required=True, type=str,
72 | help='Path to the video file to run object detection on')
73 |
74 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
75 | help='Path to the first stage model to use')
76 | parser.add_argument('--min_tracking_confidence', default=0.5, type=float,
77 | help='Path to the second stage model to use')
78 |
79 | args = parser.parse_args()
80 | main(args)
81 |
--------------------------------------------------------------------------------
/examples/mediapipe/face_mesh/run_video_stream.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 |
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_face_mesh = mp.solutions.face_mesh
18 |
19 | def main(args):
20 | video = init_video_stream_capture(args.video_source)
21 | drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
22 |
23 | with mp_face_mesh.FaceMesh(min_detection_confidence=args.min_detection_confidence,
24 | min_tracking_confidence=args.min_tracking_confidence,
25 | static_image_mode = False) as face_mesh:
26 |
27 | while True:
28 |
29 | frame_present, frame = video.read()
30 | if not frame_present:
31 | raise RuntimeError('Error reading frame from video stream')
32 |
33 | # Flip the image horizontally for a later selfie-view display, and convert
34 | # the BGR image to RGB.
35 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
36 | # To improve performance, optionally mark the image as not writeable to
37 | # pass by reference.
38 | image.flags.writeable = False
39 |
40 | start_time = time.time()
41 | results = face_mesh.process(image)
42 | end_time = (time.time() - start_time)*1000
43 |
44 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
45 | print("Time(ms): ", (time.time() - start_time)*1000)
46 |
47 | # Draw the face mesh annotations on the image.
48 | image.flags.writeable = True
49 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
50 | if results.multi_face_landmarks:
51 | for face_landmarks in results.multi_face_landmarks:
52 | mp_drawing.draw_landmarks(
53 | image=image,
54 | landmark_list=face_landmarks,
55 | connections=mp_face_mesh.FACE_CONNECTIONS,
56 | landmark_drawing_spec=drawing_spec,
57 | connection_drawing_spec=drawing_spec)
58 |
59 | cv2.imshow('MediaPipe Face Mesh Demo', image)
60 |
61 | if cv2.waitKey(1) == 27:
62 | print('\nExit key activated. Closing video...')
63 | break
64 |
65 | video.release(), cv2.destroyAllWindows()
66 |
67 | if __name__ == '__main__':
68 | parser = ArgumentParser()
69 | parser.add_argument('--video_source', type=int, default=0,
70 | help='Device index to access video stream. Defaults to primary device camera at index 0')
71 |
72 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
73 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
74 | parser.add_argument('--min_tracking_confidence', default=0.5, type=float,
75 | help='Minimum confidence value ([0.0, 1.0]) from the landmark-tracking model for the face landmarks to be considered tracked successfully, or otherwise face detection will be invoked automatically on the next input image.')
76 |
77 | args = parser.parse_args()
78 | main(args)
--------------------------------------------------------------------------------
/examples/mediapipe/hand_landmarks/run_video_file.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from tqdm import tqdm
14 | from argparse import ArgumentParser
15 | from cv_utils import init_video_file_capture
16 |
17 | mp_drawing = mp.solutions.drawing_utils
18 | mp_drawing_styles = mp.solutions.drawing_styles
19 | mp_hands = mp.solutions.hands
20 |
21 | def main(args):
22 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'hand_landmarks_demo')
23 | frame_num = len(frame_count)
24 |
25 | times = []
26 |
27 | with mp_hands.Hands(model_complexity=args.model_selection,
28 | min_detection_confidence=args.min_detection_confidence,
29 | min_tracking_confidence=0.5) as hands:
30 |
31 | for _ in tqdm(frame_count, desc='Processing frames'):
32 | frame_present, frame = video.read()
33 | if not frame_present:
34 | continue
35 |
36 | # Flip the image horizontally for a later selfie-view display, and convert
37 | # the BGR image to RGB.
38 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
39 | # To improve performance, optionally mark the image as not writeable to
40 | # pass by reference.
41 | image.flags.writeable = False
42 |
43 | start_time = time.time()
44 | results = hands.process(image)
45 | end_time = (time.time() - start_time)*1000
46 |
47 | # Draw the hand landmarks annotations on the image.
48 | image.flags.writeable = True
49 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
50 | if results.multi_hand_landmarks:
51 | for hand_landmarks in results.multi_hand_landmarks:
52 | mp_drawing.draw_landmarks(
53 | image,
54 | hand_landmarks,
55 | mp_hands.HAND_CONNECTIONS,
56 | mp_drawing_styles.get_default_hand_landmarks_style(),
57 | mp_drawing_styles.get_default_hand_connections_style())
58 |
59 | times.append(end_time)
60 | video_writer.write(image)
61 |
62 | print('Finished processing frames')
63 | video.release(), video_writer.release()
64 |
65 | print("Average time(ms): ", sum(times)//frame_num)
66 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
67 |
68 | if __name__ == '__main__':
69 | parser = ArgumentParser()
70 | parser.add_argument('--video_file_path', required=True, type=str,
71 | help='Path to the video file to run object detection on')
72 |
73 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
74 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
75 |
76 | parser.add_argument('--model_selection', default=0, type=int,
77 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
78 |
79 | args = parser.parse_args()
80 | main(args)
81 |
--------------------------------------------------------------------------------
/examples/mediapipe/hand_landmarks/run_video_stream.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 |
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_drawing_styles = mp.solutions.drawing_styles
18 | mp_hands = mp.solutions.hands
19 |
20 | def main(args):
21 | video = init_video_stream_capture(args.video_source)
22 |
23 | with mp_hands.Hands(model_complexity=args.model_selection,
24 | min_detection_confidence=args.min_detection_confidence,
25 | min_tracking_confidence=0.5) as hands:
26 |
27 | while True:
28 |
29 | frame_present, frame = video.read()
30 | if not frame_present:
31 | raise RuntimeError('Error reading frame from video stream')
32 |
33 | # Flip the image horizontally for a later selfie-view display, and convert
34 | # the BGR image to RGB.
35 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
36 | # To improve performance, optionally mark the image as not writeable to
37 | # pass by reference.
38 | image.flags.writeable = False
39 |
40 | start_time = time.time()
41 | results = hands.process(image)
42 | end_time = (time.time() - start_time)*1000
43 |
44 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
45 | print("Time(ms): ", (time.time() - start_time)*1000)
46 |
47 | # Draw the hand landmarks annotations on the image.
48 | image.flags.writeable = True
49 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
50 | if results.multi_hand_landmarks:
51 | for hand_landmarks in results.multi_hand_landmarks:
52 | mp_drawing.draw_landmarks(
53 | image,
54 | hand_landmarks,
55 | mp_hands.HAND_CONNECTIONS,
56 | mp_drawing_styles.get_default_hand_landmarks_style(),
57 | mp_drawing_styles.get_default_hand_connections_style())
58 |
59 | cv2.imshow('MediaPipe Hands', image)
60 |
61 | if cv2.waitKey(1) == 27:
62 | print('\nExit key activated. Closing video...')
63 | break
64 |
65 | video.release(), cv2.destroyAllWindows()
66 |
67 | if __name__ == '__main__':
68 | parser = ArgumentParser()
69 | parser.add_argument('--video_source', type=int, default=0,
70 | help='Device index to access video stream. Defaults to primary device camera at index 0')
71 |
72 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
73 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
74 |
75 | parser.add_argument('--model_selection', default=0, type=int,
76 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.')
77 |
78 | args = parser.parse_args()
79 | main(args)
--------------------------------------------------------------------------------
/examples/mediapipe/pose_estimation/run_video_file.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from tqdm import tqdm
14 | from argparse import ArgumentParser
15 | from cv_utils import init_video_file_capture
16 |
17 | mp_drawing = mp.solutions.drawing_utils
18 | mp_pose = mp.solutions.pose
19 |
20 | def main(args):
21 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'pose_estimation_demo')
22 | frame_num = len(frame_count)
23 |
24 | times = []
25 |
26 | with mp_pose.Pose(min_detection_confidence=args.min_detection_confidence,
27 | model_complexity=args.model_complexity,
28 | static_image_mode = False) as pose:
29 |
30 | for _ in tqdm(frame_count, desc='Processing frames'):
31 | frame_present, frame = video.read()
32 | if not frame_present:
33 | continue
34 |
35 | # Flip the image horizontally for a later selfie-view display, and convert
36 | # the BGR image to RGB.
37 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
38 | # To improve performance, optionally mark the image as not writeable to
39 | # pass by reference.
40 | image.flags.writeable = False
41 |
42 | start_time = time.time()
43 | results = pose.process(image)
44 | end_time = (time.time() - start_time)*1000
45 |
46 | # Draw the face mesh annotations on the image.
47 | image.flags.writeable = True
48 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
49 | mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
50 |
51 | times.append(end_time)
52 | video_writer.write(image)
53 |
54 | print('Finished processing frames')
55 | video.release(), video_writer.release()
56 |
57 | print("Average time(ms): ", sum(times)//frame_num)
58 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames
59 |
60 | if __name__ == '__main__':
61 | parser = ArgumentParser()
62 | parser.add_argument('--video_file_path', required=True, type=str,
63 | help='Path to the video file to run object detection on')
64 |
65 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
66 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
67 |
68 | parser.add_argument('--model_complexity', default=0, type=int,
69 | help='Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1')
70 |
71 | args = parser.parse_args()
72 | main(args)
73 |
--------------------------------------------------------------------------------
/examples/mediapipe/pose_estimation/run_video_stream.py:
--------------------------------------------------------------------------------
1 | # Based on MediPipe Example Scripts. All rights reserved.
2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov
3 | # SPDX-License-Identifier: MIT
4 |
5 | import os
6 | import sys
7 | import time
8 | script_dir = os.path.dirname(__file__)
9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
10 |
11 | import cv2
12 | import mediapipe as mp
13 | from argparse import ArgumentParser
14 | from cv_utils import init_video_stream_capture
15 |
16 | mp_drawing = mp.solutions.drawing_utils
17 | mp_pose = mp.solutions.pose
18 |
19 | def main(args):
20 | video = init_video_stream_capture(args.video_source)
21 |
22 | with mp_pose.Pose(min_detection_confidence=args.min_detection_confidence,
23 | model_complexity=args.model_complexity,
24 | static_image_mode = False) as pose:
25 |
26 | while True:
27 |
28 | frame_present, frame = video.read()
29 | if not frame_present:
30 | raise RuntimeError('Error reading frame from video stream')
31 |
32 | # Flip the image horizontally for a later selfie-view display, and convert
33 | # the BGR image to RGB.
34 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
35 | # To improve performance, optionally mark the image as not writeable to
36 | # pass by reference.
37 | image.flags.writeable = False
38 |
39 | start_time = time.time()
40 | results = pose.process(image)
41 | end_time = (time.time() - start_time)*1000
42 |
43 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop
44 | print("Time(ms): ", (time.time() - start_time)*1000)
45 |
46 | # Draw the face mesh annotations on the image.
47 | image.flags.writeable = True
48 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
49 | mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
50 |
51 | cv2.imshow('MediaPipe Pose Estimation Demo', image)
52 |
53 | if cv2.waitKey(1) == 27:
54 | print('\nExit key activated. Closing video...')
55 | break
56 |
57 | video.release(), cv2.destroyAllWindows()
58 |
59 | if __name__ == '__main__':
60 | parser = ArgumentParser()
61 | parser.add_argument('--video_source', type=int, default=0,
62 | help='Device index to access video stream. Defaults to primary device camera at index 0')
63 |
64 | parser.add_argument('--min_detection_confidence', default=0.5, type=float,
65 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5')
66 |
67 | parser.add_argument('--model_complexity', default=0, type=int,
68 | help='Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1')
69 |
70 | args = parser.parse_args()
71 | main(args)
--------------------------------------------------------------------------------
/examples/sample_files/cars.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/cars.mp4
--------------------------------------------------------------------------------
/examples/sample_files/test_dance.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/test_dance.mp4
--------------------------------------------------------------------------------
/examples/sample_files/test_s.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/test_s.mp4
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/README.md:
--------------------------------------------------------------------------------
1 | # TensorFlow Lite Face Recognition Multi-stage Demo
2 |
3 | ## Introduction
4 |
5 | This demo allows for face recognition from either a video stream or a video file. The face embeddings need to be calculated and saved to a database with calculate_features.py before any of the two examples can be run.
6 |
7 | ## Prerequisites
8 |
9 | Install the dependecnies with
10 | ```
11 | pip3 install -r requirements.txt
12 | ```
13 | Make sure you have the necessary system packages for OpenCV to work properly.
14 | ```
15 | sudo apt-get install libatlas-base-dev libjasper-dev libqtgui4 python3-pyqt5 libqt4-test libilmbase-dev libopenexr-dev libgstreamer1.0-dev libavcodec58 libavformat58 libswscale5
16 | ```
17 |
18 | ## Usage
19 |
20 | ### Database population
21 |
22 | Before we can run face recognition, we need to exctract features from faces we want to recognize and save the features embedding vectors in encoded form in .json file, which serves as a small database. You can do that with calcuate_features.py.
23 |
24 | ```
25 | python calculate_features.py --help
26 | OpenCV version: 4.5.3
27 | usage: calculate_features.py [-h] --first_stage FIRST_STAGE --second_stage
28 | SECOND_STAGE --third_stage THIRD_STAGE
29 | [--db_file DB_FILE] --img_file IMG_FILE [--id ID]
30 | [--name NAME]
31 |
32 | optional arguments:
33 | -h, --help show this help message and exit
34 | --first_stage FIRST_STAGE
35 | File path of .tflite file. (default: None)
36 | --second_stage SECOND_STAGE
37 | File path of .tflite file. (default: None)
38 | --third_stage THIRD_STAGE
39 | File path of .tflite file. (default: None)
40 | --db_file DB_FILE File path to database (default: database.db)
41 | --img_file IMG_FILE File path to picture (default: None)
42 | --id ID Path to the video file to run object detection on
43 | (default: 0)
44 | --name NAME Path to the video file to run object detection on
45 | (default: John Doe)
46 | ```
47 | For example, to extract a single face embedding vector of Barrack Hussein Obama's face you can run:
48 | ```
49 | python calculate_features.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite --img_file obama.jpg --name Obama --id 0
50 | ```
51 |
52 | ### Face Recognition from Video File
53 |
54 | Once you have a database with at least one face embedding recorded you can try it on a video file, that contains people's faces. Mainly this is used for testing and benchmarking purposes.
55 |
56 | Example:
57 | ```
58 | python multi_stage_file.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite --file ../../sample_files/test_s.mp4
59 | ```
60 |
61 | ### Face Recognition from Video Stream
62 |
63 | Finally, for actual application purpose you can use multi_stage_stream.py script. It can get video stream either from OpenCV or picamera, if executed on Raspberry Pi with picamera connected.
64 |
65 | Example:
66 | ```
67 | python multi_stage_stream.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite
68 | ```
69 | The output will be served on a Flask web-server on port 5000. This is done in order to simplify testing and running of an application on headless systems.
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/base_camera.py:
--------------------------------------------------------------------------------
1 | import time
2 | import threading
3 | try:
4 | from greenlet import getcurrent as get_ident
5 | except ImportError:
6 | try:
7 | from thread import get_ident
8 | except ImportError:
9 | from _thread import get_ident
10 |
11 |
12 | class CameraEvent(object):
13 | """An Event-like class that signals all active clients when a new frame is
14 | available.
15 | """
16 | def __init__(self):
17 | self.events = {}
18 |
19 | def wait(self):
20 | """Invoked from each client's thread to wait for the next frame."""
21 | ident = get_ident()
22 | if ident not in self.events:
23 | # this is a new client
24 | # add an entry for it in the self.events dict
25 | # each entry has two elements, a threading.Event() and a timestamp
26 | self.events[ident] = [threading.Event(), time.time()]
27 | return self.events[ident][0].wait()
28 |
29 | def set(self):
30 | """Invoked by the camera thread when a new frame is available."""
31 | now = time.time()
32 | remove = None
33 | for ident, event in self.events.items():
34 | if not event[0].isSet():
35 | # if this client's event is not set, then set it
36 | # also update the last set timestamp to now
37 | event[0].set()
38 | event[1] = now
39 | else:
40 | # if the client's event is already set, it means the client
41 | # did not process a previous frame
42 | # if the event stays set for more than 5 seconds, then assume
43 | # the client is gone and remove it
44 | if now - event[1] > 5:
45 | remove = ident
46 | if remove:
47 | del self.events[remove]
48 |
49 | def clear(self):
50 | """Invoked from each client's thread after a frame was processed."""
51 | self.events[get_ident()][0].clear()
52 |
53 |
54 | class BaseCamera(object):
55 | thread = None # background thread that reads frames from camera
56 | frame = None # current frame is stored here by background thread
57 | last_access = 0 # time of last client access to the camera
58 | event = CameraEvent()
59 |
60 | def __init__(self):
61 | """Start the background camera thread if it isn't running yet."""
62 | if BaseCamera.thread is None:
63 | BaseCamera.last_access = time.time()
64 |
65 | # start background frame thread
66 | BaseCamera.thread = threading.Thread(target=self._thread)
67 | BaseCamera.thread.start()
68 |
69 | # wait until frames are available
70 | while self.get_frame() is None:
71 | time.sleep(0)
72 |
73 | def get_frame(self):
74 | """Return the current camera frame."""
75 | BaseCamera.last_access = time.time()
76 |
77 | # wait for a signal from the camera thread
78 | BaseCamera.event.wait()
79 | BaseCamera.event.clear()
80 |
81 | return BaseCamera.frame
82 |
83 | @staticmethod
84 | def frames():
85 | """"Generator that returns frames from the camera."""
86 | raise RuntimeError('Must be implemented by subclasses.')
87 |
88 | @classmethod
89 | def _thread(cls):
90 | """Camera background thread."""
91 | print('Starting camera thread.')
92 | frames_iterator = cls.frames()
93 | for frame in frames_iterator:
94 | BaseCamera.frame = frame
95 | BaseCamera.event.set() # send signal to clients
96 | time.sleep(0)
97 |
98 | # if there hasn't been any clients asking for frames in
99 | # the last 10 seconds then stop the thread
100 | if time.time() - BaseCamera.last_access > 10:
101 | frames_iterator.close()
102 | print('Stopping camera thread due to inactivity.')
103 | break
104 | BaseCamera.thread = None
105 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/calculate_features.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | import numpy as np
4 | import skimage
5 | import skimage.transform
6 | import json, base64
7 |
8 | from cv_utils import decode_yolov3, preprocess
9 | from tflite_runtime.interpreter import Interpreter
10 |
11 | FACE_ANCHORS = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
12 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]]
13 |
14 | IMG_SHAPE = (128, 128) # in HW form
15 | offset_x = 0
16 | offset_y = -15
17 | src = np.array([(44+offset_x, 59+offset_y),
18 | (84+offset_x, 59+offset_y),
19 | (64+offset_x, 82+offset_y),
20 | (47+offset_x, 105),
21 | (81+offset_x, 105)], dtype=np.float32)
22 |
23 | def write_db(db, id, name, vector):
24 |
25 | vector = base64.b64encode(vector).decode('utf-8')
26 |
27 | entry = {"name": name, "vector": vector}
28 | db[id] = entry
29 | print(db)
30 | f = open('database.db','w')
31 | entry = json.dumps(db)
32 | f.write(entry)
33 | f.close()
34 |
35 | return db
36 |
37 | def read_db(db_path = 'database.db'):
38 | try:
39 | f = open(db_path, 'r')
40 | except FileNotFoundError:
41 | clear_db(db_path)
42 | f = open(db_path, 'r')
43 |
44 | content = f.read()
45 | #print(content)
46 | if content:
47 | db = json.loads(content)
48 | f.close()
49 | return db
50 |
51 | def clear_db(db_path = 'database.db'):
52 |
53 | f = open(db_path,'w')
54 | db = {}
55 | content = json.dumps(db)
56 | f.write(content)
57 | f.close()
58 |
59 | def draw_bounding_boxes(frame, detections, labels=['face'], kpts = None):
60 |
61 | def _to_original_scale(boxes, frame_height, frame_width):
62 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
63 |
64 | cx = boxes[0] * frame_width
65 | cy = boxes[1] * frame_height
66 | w = boxes[2] * frame_width
67 | h = boxes[3] * frame_height
68 |
69 | minmax_boxes[0] = cx - w/2
70 | minmax_boxes[1] = cy - h/2
71 | minmax_boxes[2] = cx + w/2
72 | minmax_boxes[3] = cy + h/2
73 |
74 | return minmax_boxes
75 |
76 | color = (0, 255, 0)
77 | label_color = (125, 125, 125)
78 |
79 | for i in range(len(detections)):
80 | class_idx, box, confidence = [d for d in detections[i]]
81 |
82 | # Obtain frame size and resized bounding box positions
83 | frame_height, frame_width = frame.shape[:2]
84 |
85 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
86 | # Ensure box stays within the frame
87 | x_min, y_min = max(0, x_min), max(0, y_min)
88 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
89 |
90 | # Draw bounding box around detected object
91 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
92 |
93 | # Create label for detected object class
94 | label = labels[class_idx].capitalize()
95 | label = f'{label} {confidence * 100:.1f}%'
96 |
97 | # Make sure label always stays on-screen
98 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
99 |
100 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
101 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
102 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
103 |
104 | # Add label and confidence value
105 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
106 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
107 | label_color, 1, cv2.LINE_AA)
108 |
109 | for kpt_set in kpts:
110 | for kpt in kpt_set:
111 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2)
112 |
113 | def process_faces(frame, detections, db, id, name):
114 | kpts_list = []
115 |
116 | def _to_original_scale(boxes, frame_height, frame_width):
117 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
118 |
119 | cx = boxes[0] * frame_width
120 | cy = boxes[1] * frame_height
121 | w = boxes[2] * frame_width
122 | h = boxes[3] * frame_height
123 |
124 | minmax_boxes[0] = cx - w/2
125 | minmax_boxes[1] = cy - h/2
126 | minmax_boxes[2] = cx + w/2
127 | minmax_boxes[3] = cy + h/2
128 |
129 | return minmax_boxes
130 |
131 | for i in range(len(detections)):
132 | _, box, _ = [d for d in detections[i]]
133 |
134 | # Obtain frame size and resized bounding box positions
135 | frame_height, frame_width = frame.shape[:2]
136 |
137 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
138 | # Ensure box stays within the frame
139 | x_min, y_min = max(0, x_min), max(0, y_min)
140 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
141 |
142 | x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
143 |
144 | face_img = frame[y_min:y_max, x_min:x_max]
145 |
146 | plist = second_stage_network.run(face_img)[0]
147 |
148 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
149 | re = (x + int(plist[2] * w), y + int(plist[3] * h+5))
150 | n = (x + int(plist[4] * w), y + int(plist[5] * h))
151 | lm = (x + int(plist[6] * w), y + int(plist[7] * h))
152 | rm = (x + int(plist[8] * w), y + int(plist[9] * h))
153 | kpts = [le, re, n, lm, rm]
154 | kpts_list.append(kpts)
155 | kpts = np.array(kpts, dtype = np.float32)
156 |
157 | transformer = skimage.transform.SimilarityTransform()
158 | transformer.estimate(kpts, src)
159 | M = transformer.params[0: 2, : ]
160 | warped_img = cv2.warpAffine(frame, M, (IMG_SHAPE[1], IMG_SHAPE[0]), borderValue = 0.0)
161 |
162 | features = third_stage_network.run(warped_img)[0]
163 |
164 | write_db(db, id, name, features)
165 |
166 | return kpts_list
167 |
168 | class NetworkExecutor(object):
169 |
170 | def __init__(self, model_file):
171 |
172 | self.interpreter = Interpreter(model_file, num_threads=3)
173 | self.interpreter.allocate_tensors()
174 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
175 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
176 |
177 | def get_output_tensors(self):
178 |
179 | output_details = self.interpreter.get_output_details()
180 | tensor_list = []
181 |
182 | for output in output_details:
183 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
184 | tensor_list.append(tensor)
185 |
186 | return tensor_list
187 |
188 | def run(self, image):
189 | if image.shape[1:2] != (self.input_height, self.input_width):
190 | img = cv2.resize(image, (self.input_width, self.input_height))
191 | img = preprocess(img)
192 | self.interpreter.set_tensor(self.tensor_index, img)
193 | self.interpreter.invoke()
194 | return self.get_output_tensors()
195 |
196 | def main(args):
197 | #clear_db()
198 | db = read_db(args.db_file)
199 |
200 | frame = cv2.imread(args.img_file)
201 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
202 |
203 | results = first_stage_network.run(frame)
204 | detections = decode_yolov3(netout = results, nms_threshold = 0.1,
205 | threshold = 0.7, anchors = FACE_ANCHORS)
206 |
207 | kpts = process_faces(frame, detections, db, args.id, args.name)
208 |
209 | draw_bounding_boxes(frame, detections, ['face'], kpts)
210 | frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
211 | cv2.imwrite(args.img_file.split('.')[0]+'_result.jpg', frame)
212 |
213 |
214 | if __name__ == "__main__" :
215 |
216 | print("OpenCV version: {}".format(cv2. __version__))
217 |
218 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
219 | parser.add_argument('--first_stage', help='Path to the YOLOv3 face detection model to use.', required=True)
220 | parser.add_argument('--second_stage', help='Path to the keypoints detection model to use.', required=True)
221 | parser.add_argument('--third_stage', help='Path to the feature vector embedding extractor model to use.', required=True)
222 |
223 | parser.add_argument('--db_file', help='File path to database', default="database.db")
224 | parser.add_argument('--img_file', help='File path to picture', required=True)
225 | parser.add_argument('--id', default = '0', type=str,
226 | help='Unique ID for the face')
227 | parser.add_argument('--name', default = 'John Doe', type=str,
228 | help='Name for the face feature vecotr (can be duplicate)')
229 |
230 | args = parser.parse_args()
231 |
232 | first_stage_network = NetworkExecutor(args.first_stage)
233 | second_stage_network = NetworkExecutor(args.second_stage)
234 | third_stage_network = NetworkExecutor(args.third_stage)
235 |
236 | main(args)
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/camera_opencv.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from base_camera import BaseCamera
3 |
4 |
5 | class Camera(BaseCamera):
6 | video_source = 0
7 |
8 | @staticmethod
9 | def set_video_source(source):
10 | Camera.video_source = source
11 |
12 | @staticmethod
13 | def frames():
14 | camera = cv2.VideoCapture(Camera.video_source)
15 | if not camera.isOpened():
16 | raise RuntimeError('Could not start camera.')
17 |
18 | while True:
19 | # read current frame
20 | _, img = camera.read()
21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 |
23 | # return img
24 | yield img
25 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/camera_pi.py:
--------------------------------------------------------------------------------
1 | import io
2 | import time
3 | import picamera
4 | import picamera.array
5 | import cv2
6 | from base_camera import BaseCamera
7 |
8 |
9 | class Camera(BaseCamera):
10 | video_source = 0
11 |
12 | @staticmethod
13 | def set_video_source(source):
14 | pass
15 |
16 | @staticmethod
17 | def frames():
18 | with picamera.PiCamera(resolution = (1280,720)) as camera:
19 | # let camera warm up
20 | time.sleep(2)
21 |
22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 | while True:
24 |
25 | camera.capture(stream, format='bgr', use_video_port=True)
26 | # At this point the image is available as stream.array
27 | image = stream.array
28 | stream.truncate(0)
29 | yield image
30 |
31 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/multi_stage_file.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | import numpy as np
4 | import skimage
5 | import skimage.transform
6 | import json, base64
7 | import time
8 | from tqdm import tqdm
9 |
10 | from cv_utils import decode_yolov3, preprocess, init_video_file_capture
11 | from tflite_runtime.interpreter import Interpreter
12 |
13 | FACE_ANCHORS = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
14 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]]
15 |
16 | IMG_SHAPE = (128, 128) # in HW form
17 | offset_x = 0
18 | offset_y = -15
19 | src = np.array([(44+offset_x, 59+offset_y),
20 | (84+offset_x, 59+offset_y),
21 | (64+offset_x, 82+offset_y),
22 | (47+offset_x, 105),
23 | (81+offset_x, 105)], dtype=np.float32)
24 |
25 | def read_db(db_path = 'database.db'):
26 | try:
27 | f = open(db_path, 'r')
28 | except FileNotFoundError:
29 | clear_db(db_path)
30 | f = open(db_path, 'r')
31 |
32 | content = f.read()
33 | #print(content)
34 | if content:
35 | db = json.loads(content)
36 | f.close()
37 | return db
38 |
39 | def clear_db(db_path = 'database.db'):
40 |
41 | f = open(db_path,'w')
42 | db = {}
43 | content = json.dumps(db)
44 | f.write(content)
45 | f.close()
46 |
47 | def draw_bounding_boxes(frame, detections, kpts, ids):
48 |
49 | def _to_original_scale(boxes, frame_height, frame_width):
50 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
51 |
52 | cx = boxes[0] * frame_width
53 | cy = boxes[1] * frame_height
54 | w = boxes[2] * frame_width
55 | h = boxes[3] * frame_height
56 |
57 | minmax_boxes[0] = cx - w/2
58 | minmax_boxes[1] = cy - h/2
59 | minmax_boxes[2] = cx + w/2
60 | minmax_boxes[3] = cy + h/2
61 |
62 | return minmax_boxes
63 |
64 | color = (0, 255, 0)
65 | label_color = (125, 125, 125)
66 |
67 | for i in range(len(detections)):
68 | _, box, _ = [d for d in detections[i]]
69 |
70 | # Obtain frame size and resized bounding box positions
71 | frame_height, frame_width = frame.shape[:2]
72 |
73 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
74 | # Ensure box stays within the frame
75 | x_min, y_min = max(0, x_min), max(0, y_min)
76 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
77 |
78 | # Draw bounding box around detected object
79 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
80 |
81 | # Create label for detected object class
82 | label = 'ID: {} Name: {} {}%'.format(*ids[i])
83 | label_color = (255, 255, 255)
84 |
85 | # Make sure label always stays on-screen
86 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
87 |
88 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
89 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min)
90 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
91 |
92 | # Add label and confidence value
93 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
94 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA)
95 |
96 | for kpt_set in kpts:
97 | for kpt in kpt_set:
98 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2)
99 |
100 | def process_faces(frame, detections, db):
101 | kpts_list = []
102 | id_list = []
103 |
104 | def _to_original_scale(boxes, frame_height, frame_width):
105 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int)
106 |
107 | cx = boxes[0] * frame_width
108 | cy = boxes[1] * frame_height
109 | w = boxes[2] * frame_width
110 | h = boxes[3] * frame_height
111 |
112 | minmax_boxes[0] = cx - w/2
113 | minmax_boxes[1] = cy - h/2
114 | minmax_boxes[2] = cx + w/2
115 | minmax_boxes[3] = cy + h/2
116 |
117 | return minmax_boxes
118 |
119 | for i in range(len(detections)):
120 | _, box, _ = [d for d in detections[i]]
121 |
122 | # Obtain frame size and resized bounding box positions
123 | frame_height, frame_width = frame.shape[:2]
124 |
125 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width)
126 | # Ensure box stays within the frame
127 | x_min, y_min = max(0, x_min), max(0, y_min)
128 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
129 |
130 | x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
131 |
132 | face_img = frame[y_min:y_max, x_min:x_max]
133 |
134 | plist = second_stage_network.run(face_img)[0]
135 |
136 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5))
137 | re = (x + int(plist[2] * w), y + int(plist[3] * h+5))
138 | n = (x + int(plist[4] * w), y + int(plist[5] * h))
139 | lm = (x + int(plist[6] * w), y + int(plist[7] * h))
140 | rm = (x + int(plist[8] * w), y + int(plist[9] * h))
141 | kpts = [le, re, n, lm, rm]
142 | kpts_list.append(kpts)
143 | kpts = np.array(kpts, dtype = np.float32)
144 |
145 | transformer = skimage.transform.SimilarityTransform()
146 | transformer.estimate(kpts, src)
147 | M = transformer.params[0: 2, : ]
148 | warped_img = cv2.warpAffine(frame, M, (IMG_SHAPE[1], IMG_SHAPE[0]), borderValue = 0.0)
149 |
150 | features = third_stage_network.run(warped_img)[0]
151 |
152 | highest_score = 0
153 |
154 | for id in db.keys():
155 | cos_sim = np.dot(features, db[id]['vector'])/(np.linalg.norm(features)*np.linalg.norm(db[id]['vector']))
156 | cos_sim /= 2
157 | cos_sim += 0.5
158 | cos_sim *= 100
159 | if highest_score < cos_sim:
160 | highest_score = cos_sim
161 | recognized_id = id
162 |
163 | if highest_score > 70.0:
164 | print(recognized_id, db[recognized_id]['name'], highest_score)
165 | id_list.append([recognized_id, db[recognized_id]['name'], highest_score])
166 | else:
167 | id_list.append(['X', '', 0.0])
168 | return kpts_list, id_list
169 |
170 | class NetworkExecutor(object):
171 |
172 | def __init__(self, model_file):
173 |
174 | self.interpreter = Interpreter(model_file, num_threads=3)
175 | self.interpreter.allocate_tensors()
176 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
177 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
178 |
179 | def get_output_tensors(self):
180 |
181 | output_details = self.interpreter.get_output_details()
182 | tensor_indices = []
183 | tensor_list = []
184 |
185 | for output in output_details:
186 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
187 | tensor_list.append(tensor)
188 |
189 | return tensor_list
190 |
191 | def run(self, image):
192 | if image.shape[1:2] != (self.input_height, self.input_width):
193 | img = cv2.resize(image, (self.input_width, self.input_height))
194 | img = preprocess(img)
195 | self.interpreter.set_tensor(self.tensor_index, img)
196 | self.interpreter.invoke()
197 | return self.get_output_tensors()
198 |
199 | def main(args):
200 | video, video_writer, frame_count = init_video_file_capture(args.file, 'age_gender_demo')
201 |
202 | frame_num = len(frame_count)
203 | times = []
204 |
205 | for _ in tqdm(frame_count, desc='Processing frames'):
206 | frame_present, frame = video.read()
207 | if not frame_present:
208 | continue
209 |
210 | start_time = time.time()
211 |
212 | results = first_stage_network.run(frame)
213 | detections = decode_yolov3(netout = results, nms_threshold = 0.1,
214 | threshold = args.threshold, anchors = FACE_ANCHORS)
215 | kpts, ids = process_faces(frame, detections, db)
216 |
217 | elapsed_ms = (time.time() - start_time) * 1000
218 |
219 | draw_bounding_boxes(frame, detections, kpts, ids)
220 | times.append(elapsed_ms)
221 | video_writer.write(frame)
222 |
223 | print('Finished processing frames')
224 | video.release(), video_writer.release()
225 |
226 | print("Average time(ms): ", sum(times)//frame_num)
227 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
228 |
229 | if __name__ == "__main__" :
230 |
231 | print("OpenCV version: {}".format(cv2. __version__))
232 |
233 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
234 | parser.add_argument('--first_stage', help='Path to the YOLOv3 face detection model to use.', required=True)
235 | parser.add_argument('--second_stage', help='Path to the keypoints detection model to use.', required=True)
236 | parser.add_argument('--third_stage', help='Path to the feature vector embedding extractor model to use.', required=True)
237 |
238 | parser.add_argument('--db_file', help='File path to database', default="database.db")
239 |
240 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
241 | parser.add_argument('--file', help='File path of video file', required=True)
242 | args = parser.parse_args()
243 |
244 | first_stage_network = NetworkExecutor(args.first_stage)
245 | second_stage_network = NetworkExecutor(args.second_stage)
246 | third_stage_network = NetworkExecutor(args.third_stage)
247 |
248 | db = read_db(args.db_file)
249 | for item in db:
250 | db[item]['vector'] = np.frombuffer(base64.b64decode(db[item]['vector']), np.float32)
251 |
252 | main(args)
253 |
254 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | tqdm>=4.47.0
3 | scikit_image=>0.18.3
4 | opencv-python=>4.5.3
5 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/face_recognition/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Video Streaming Demonstration
4 |
5 |
6 | Tflite Face Recognition Inference Demo
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/base_camera.py:
--------------------------------------------------------------------------------
1 | import time
2 | import threading
3 | try:
4 | from greenlet import getcurrent as get_ident
5 | except ImportError:
6 | try:
7 | from thread import get_ident
8 | except ImportError:
9 | from _thread import get_ident
10 |
11 |
12 | class CameraEvent(object):
13 | """An Event-like class that signals all active clients when a new frame is
14 | available.
15 | """
16 | def __init__(self):
17 | self.events = {}
18 |
19 | def wait(self):
20 | """Invoked from each client's thread to wait for the next frame."""
21 | ident = get_ident()
22 | if ident not in self.events:
23 | # this is a new client
24 | # add an entry for it in the self.events dict
25 | # each entry has two elements, a threading.Event() and a timestamp
26 | self.events[ident] = [threading.Event(), time.time()]
27 | return self.events[ident][0].wait()
28 |
29 | def set(self):
30 | """Invoked by the camera thread when a new frame is available."""
31 | now = time.time()
32 | remove = None
33 | for ident, event in self.events.items():
34 | if not event[0].isSet():
35 | # if this client's event is not set, then set it
36 | # also update the last set timestamp to now
37 | event[0].set()
38 | event[1] = now
39 | else:
40 | # if the client's event is already set, it means the client
41 | # did not process a previous frame
42 | # if the event stays set for more than 5 seconds, then assume
43 | # the client is gone and remove it
44 | if now - event[1] > 5:
45 | remove = ident
46 | if remove:
47 | del self.events[remove]
48 |
49 | def clear(self):
50 | """Invoked from each client's thread after a frame was processed."""
51 | self.events[get_ident()][0].clear()
52 |
53 |
54 | class BaseCamera(object):
55 | thread = None # background thread that reads frames from camera
56 | frame = None # current frame is stored here by background thread
57 | last_access = 0 # time of last client access to the camera
58 | event = CameraEvent()
59 |
60 | def __init__(self):
61 | """Start the background camera thread if it isn't running yet."""
62 | if BaseCamera.thread is None:
63 | BaseCamera.last_access = time.time()
64 |
65 | # start background frame thread
66 | BaseCamera.thread = threading.Thread(target=self._thread)
67 | BaseCamera.thread.start()
68 |
69 | # wait until frames are available
70 | while self.get_frame() is None:
71 | time.sleep(0)
72 |
73 | def get_frame(self):
74 | """Return the current camera frame."""
75 | BaseCamera.last_access = time.time()
76 |
77 | # wait for a signal from the camera thread
78 | BaseCamera.event.wait()
79 | BaseCamera.event.clear()
80 |
81 | return BaseCamera.frame
82 |
83 | @staticmethod
84 | def frames():
85 | """"Generator that returns frames from the camera."""
86 | raise RuntimeError('Must be implemented by subclasses.')
87 |
88 | @classmethod
89 | def _thread(cls):
90 | """Camera background thread."""
91 | print('Starting camera thread.')
92 | frames_iterator = cls.frames()
93 | for frame in frames_iterator:
94 | BaseCamera.frame = frame
95 | BaseCamera.event.set() # send signal to clients
96 | time.sleep(0)
97 |
98 | # if there hasn't been any clients asking for frames in
99 | # the last 10 seconds then stop the thread
100 | if time.time() - BaseCamera.last_access > 10:
101 | frames_iterator.close()
102 | print('Stopping camera thread due to inactivity.')
103 | break
104 | BaseCamera.thread = None
105 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/camera_opencv.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from base_camera import BaseCamera
3 |
4 |
5 | class Camera(BaseCamera):
6 | video_source = 0
7 |
8 | @staticmethod
9 | def set_video_source(source):
10 | Camera.video_source = source
11 |
12 | @staticmethod
13 | def frames():
14 | camera = cv2.VideoCapture(Camera.video_source)
15 | if not camera.isOpened():
16 | raise RuntimeError('Could not start camera.')
17 |
18 | while True:
19 | # read current frame
20 | _, img = camera.read()
21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 |
23 | # return img
24 | yield img
25 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/camera_pi.py:
--------------------------------------------------------------------------------
1 | import io
2 | import time
3 | import picamera
4 | import picamera.array
5 | import cv2
6 | from base_camera import BaseCamera
7 |
8 |
9 | class Camera(BaseCamera):
10 | video_source = 0
11 |
12 | @staticmethod
13 | def set_video_source(source):
14 | pass
15 |
16 | @staticmethod
17 | def frames():
18 | with picamera.PiCamera(resolution = (1280,720)) as camera:
19 | # let camera warm up
20 | time.sleep(2)
21 |
22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 | while True:
24 |
25 | camera.capture(stream, format='bgr', use_video_port=True)
26 | # At this point the image is available as stream.array
27 | image = stream.array
28 | stream.truncate(0)
29 | yield image
30 |
31 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/multi_stage_file.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import os
4 | import cv2
5 | import numpy as np
6 | from tqdm import tqdm
7 |
8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess
9 | from tflite_runtime.interpreter import Interpreter
10 |
11 | def process_age_gender(roi_img):
12 |
13 | ages = ['0-10', '11-20', '21-45', '46-60', '60-100']
14 | genders = ['M', 'F']
15 |
16 | results = second_stage_network.run(roi_img)
17 | age = np.argmax(results[0])
18 | gender = 0 if results[1] < 0.5 else 1
19 |
20 | label = f'{ages[age]} : {genders[gender]}'
21 |
22 | return label
23 |
24 | class NetworkExecutor(object):
25 |
26 | def __init__(self, model_file):
27 |
28 | self.interpreter = Interpreter(model_file, num_threads=3)
29 | self.interpreter.allocate_tensors()
30 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
31 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
32 |
33 | def get_output_tensors(self):
34 |
35 | output_details = self.interpreter.get_output_details()
36 | tensor_indices = []
37 | tensor_list = []
38 |
39 | for output in output_details:
40 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
41 | tensor_list.append(tensor)
42 |
43 | return tensor_list
44 |
45 | def run(self, image):
46 | if image.shape[1:2] != (self.input_height, self.input_width):
47 | img = cv2.resize(image, (self.input_width, self.input_height))
48 | img = preprocess(img)
49 | self.interpreter.set_tensor(self.tensor_index, img)
50 | self.interpreter.invoke()
51 | return self.get_output_tensors()
52 |
53 | def main(args):
54 | video, video_writer, frame_count = init_video_file_capture(args.file, 'age_gender_demo')
55 |
56 | frame_num = len(frame_count)
57 | times = []
58 |
59 | for _ in tqdm(frame_count, desc='Processing frames'):
60 | frame_present, frame = video.read()
61 | if not frame_present:
62 | continue
63 |
64 | start_time = time.time()
65 |
66 | results = first_stage_network.run(frame)
67 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold)
68 | draw_bounding_boxes(frame, detections, None, process_age_gender)
69 |
70 | elapsed_ms = (time.time() - start_time) * 1000
71 |
72 | times.append(elapsed_ms)
73 | video_writer.write(frame)
74 |
75 | print('Finished processing frames')
76 | video.release(), video_writer.release()
77 |
78 | print("Average time(ms): ", sum(times)//frame_num)
79 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
80 |
81 | if __name__ == "__main__" :
82 |
83 | print("OpenCV version: {}".format(cv2. __version__))
84 |
85 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
86 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
87 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
88 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
89 | parser.add_argument('--file', help='File path of video file', required=True)
90 | args = parser.parse_args()
91 |
92 | first_stage_network = NetworkExecutor(args.first_stage)
93 | second_stage_network = NetworkExecutor(args.second_stage)
94 |
95 | main(args)
96 |
97 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/multi_stage_stream.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import os
4 | import cv2
5 | import numpy as np
6 |
7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes
8 | from tflite_runtime.interpreter import Interpreter
9 | from flask import Flask, render_template, request, Response
10 |
11 | app = Flask (__name__, static_url_path = '')
12 |
13 | def process_age_gender(roi_img):
14 |
15 | ages = ['0-10', '11-20', '21-45', '46-60', '60-100']
16 | genders = ['M', 'F']
17 |
18 | results = second_stage_network.run(roi_img)
19 | age = np.argmax(results[0])
20 | gender = 0 if results[1] < 0.5 else 1
21 |
22 | label = f'{ages[age]} : {genders[gender]}'
23 |
24 | return label
25 |
26 | class NetworkExecutor(object):
27 |
28 | def __init__(self, model_file):
29 |
30 | self.interpreter = Interpreter(model_file, num_threads=3)
31 | self.interpreter.allocate_tensors()
32 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
33 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
34 |
35 | def get_output_tensors(self):
36 |
37 | output_details = self.interpreter.get_output_details()
38 | tensor_indices = []
39 | tensor_list = []
40 |
41 | for output in output_details:
42 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
43 | tensor_list.append(tensor)
44 |
45 | return tensor_list
46 |
47 | def run(self, image):
48 | if image.shape[1:2] != (self.input_height, self.input_width):
49 | img = cv2.resize(image, (self.input_width, self.input_height))
50 | img = preprocess(img)
51 | self.interpreter.set_tensor(self.tensor_index, img)
52 | self.interpreter.invoke()
53 | return self.get_output_tensors()
54 |
55 | class Detector(NetworkExecutor):
56 |
57 | def __init__(self, label_file, model_file, threshold):
58 | super().__init__(model_file)
59 | self.threshold = float(threshold)
60 |
61 | def detect(self, frame):
62 | start_time = time.time()
63 | results = self.run(frame)
64 | elapsed_ms = (time.time() - start_time) * 1000
65 |
66 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = self.threshold)
67 | draw_bounding_boxes(frame, detections, None, process_age_gender)
68 |
69 | fps = 1 / elapsed_ms*1000
70 | print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms))
71 |
72 | return cv2.imencode('.jpg', frame)[1].tobytes()
73 |
74 | @app.route("/")
75 | def index():
76 | return render_template('index.html', name = None)
77 |
78 | def gen(camera):
79 | while True:
80 | frame = camera.get_frame()
81 | image = detector.detect(frame)
82 | yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n')
83 |
84 | @app.route('/video_feed')
85 | def video_feed():
86 | return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')
87 |
88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
91 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.5)
92 | parser.add_argument('--source', help='picamera or cv', default='cv')
93 | args = parser.parse_args()
94 |
95 | if args.source == "cv":
96 | from camera_opencv import Camera
97 | source = 0
98 | elif args.source == "picamera":
99 | from camera_pi import Camera
100 | source = 0
101 |
102 | Camera.set_video_source(source)
103 |
104 | detector = Detector(None, args.first_stage, args.threshold)
105 | second_stage_network = NetworkExecutor(args.second_stage)
106 |
107 | if __name__ == "__main__" :
108 | app.run(host = '0.0.0.0', port = 5000, debug = True)
109 |
110 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_age_gender/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Video Streaming Demonstration
4 |
5 |
6 | Tflite Multi-stage Inference Demo
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/base_camera.py:
--------------------------------------------------------------------------------
1 | import time
2 | import threading
3 | try:
4 | from greenlet import getcurrent as get_ident
5 | except ImportError:
6 | try:
7 | from thread import get_ident
8 | except ImportError:
9 | from _thread import get_ident
10 |
11 |
12 | class CameraEvent(object):
13 | """An Event-like class that signals all active clients when a new frame is
14 | available.
15 | """
16 | def __init__(self):
17 | self.events = {}
18 |
19 | def wait(self):
20 | """Invoked from each client's thread to wait for the next frame."""
21 | ident = get_ident()
22 | if ident not in self.events:
23 | # this is a new client
24 | # add an entry for it in the self.events dict
25 | # each entry has two elements, a threading.Event() and a timestamp
26 | self.events[ident] = [threading.Event(), time.time()]
27 | return self.events[ident][0].wait()
28 |
29 | def set(self):
30 | """Invoked by the camera thread when a new frame is available."""
31 | now = time.time()
32 | remove = None
33 | for ident, event in self.events.items():
34 | if not event[0].isSet():
35 | # if this client's event is not set, then set it
36 | # also update the last set timestamp to now
37 | event[0].set()
38 | event[1] = now
39 | else:
40 | # if the client's event is already set, it means the client
41 | # did not process a previous frame
42 | # if the event stays set for more than 5 seconds, then assume
43 | # the client is gone and remove it
44 | if now - event[1] > 5:
45 | remove = ident
46 | if remove:
47 | del self.events[remove]
48 |
49 | def clear(self):
50 | """Invoked from each client's thread after a frame was processed."""
51 | self.events[get_ident()][0].clear()
52 |
53 |
54 | class BaseCamera(object):
55 | thread = None # background thread that reads frames from camera
56 | frame = None # current frame is stored here by background thread
57 | last_access = 0 # time of last client access to the camera
58 | event = CameraEvent()
59 |
60 | def __init__(self):
61 | """Start the background camera thread if it isn't running yet."""
62 | if BaseCamera.thread is None:
63 | BaseCamera.last_access = time.time()
64 |
65 | # start background frame thread
66 | BaseCamera.thread = threading.Thread(target=self._thread)
67 | BaseCamera.thread.start()
68 |
69 | # wait until frames are available
70 | while self.get_frame() is None:
71 | time.sleep(0)
72 |
73 | def get_frame(self):
74 | """Return the current camera frame."""
75 | BaseCamera.last_access = time.time()
76 |
77 | # wait for a signal from the camera thread
78 | BaseCamera.event.wait()
79 | BaseCamera.event.clear()
80 |
81 | return BaseCamera.frame
82 |
83 | @staticmethod
84 | def frames():
85 | """"Generator that returns frames from the camera."""
86 | raise RuntimeError('Must be implemented by subclasses.')
87 |
88 | @classmethod
89 | def _thread(cls):
90 | """Camera background thread."""
91 | print('Starting camera thread.')
92 | frames_iterator = cls.frames()
93 | for frame in frames_iterator:
94 | BaseCamera.frame = frame
95 | BaseCamera.event.set() # send signal to clients
96 | time.sleep(0)
97 |
98 | # if there hasn't been any clients asking for frames in
99 | # the last 10 seconds then stop the thread
100 | if time.time() - BaseCamera.last_access > 10:
101 | frames_iterator.close()
102 | print('Stopping camera thread due to inactivity.')
103 | break
104 | BaseCamera.thread = None
105 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/camera_opencv.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from base_camera import BaseCamera
3 |
4 |
5 | class Camera(BaseCamera):
6 | video_source = 0
7 |
8 | @staticmethod
9 | def set_video_source(source):
10 | Camera.video_source = source
11 |
12 | @staticmethod
13 | def frames():
14 | camera = cv2.VideoCapture(Camera.video_source)
15 | if not camera.isOpened():
16 | raise RuntimeError('Could not start camera.')
17 |
18 | while True:
19 | # read current frame
20 | _, img = camera.read()
21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 |
23 | # return img
24 | yield img
25 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/camera_pi.py:
--------------------------------------------------------------------------------
1 | import io
2 | import time
3 | import picamera
4 | import picamera.array
5 | import cv2
6 | from base_camera import BaseCamera
7 |
8 |
9 | class Camera(BaseCamera):
10 | video_source = 0
11 |
12 | @staticmethod
13 | def set_video_source(source):
14 | pass
15 |
16 | @staticmethod
17 | def frames():
18 | with picamera.PiCamera(resolution = (1280,720)) as camera:
19 | # let camera warm up
20 | time.sleep(2)
21 |
22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 | while True:
24 |
25 | camera.capture(stream, format='bgr', use_video_port=True)
26 | # At this point the image is available as stream.array
27 | image = stream.array
28 | stream.truncate(0)
29 | yield image
30 |
31 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/multi_stage_file.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import os
4 | import cv2
5 | import numpy as np
6 | from tqdm import tqdm
7 |
8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess
9 | from tflite_runtime.interpreter import Interpreter
10 |
11 | def process_face_expression(roi_img):
12 |
13 | emotion_list = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown']
14 |
15 | results = np.squeeze(second_stage_network.run(roi_img))
16 | emotion_idx = np.argmax(results)
17 | emotion_confience = np.max(results)
18 |
19 | label = f'{emotion_list[emotion_idx]} {emotion_confience:.4f}%'
20 |
21 | return label
22 |
23 | class NetworkExecutor(object):
24 |
25 | def __init__(self, model_file):
26 |
27 | self.interpreter = Interpreter(model_file, num_threads=3)
28 | self.interpreter.allocate_tensors()
29 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
30 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
31 |
32 | def get_output_tensors(self):
33 |
34 | output_details = self.interpreter.get_output_details()
35 | tensor_indices = []
36 | tensor_list = []
37 |
38 | for output in output_details:
39 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
40 | tensor_list.append(tensor)
41 |
42 | return tensor_list
43 |
44 | def run(self, image):
45 | if image.shape[1:2] != (self.input_height, self.input_width):
46 | img = cv2.resize(image, (self.input_width, self.input_height))
47 | img = preprocess(img)
48 | self.interpreter.set_tensor(self.tensor_index, img)
49 | self.interpreter.invoke()
50 | return self.get_output_tensors()
51 |
52 | def main(args):
53 | video, video_writer, frame_count = init_video_file_capture(args.file, 'emotion_demo')
54 |
55 | frame_num = len(frame_count)
56 | times = []
57 |
58 | for _ in tqdm(frame_count, desc='Processing frames'):
59 | frame_present, frame = video.read()
60 | if not frame_present:
61 | continue
62 |
63 | start_time = time.time()
64 |
65 | results = first_stage_network.run(frame)
66 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold, anchors = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
67 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]])
68 | draw_bounding_boxes(frame, detections, None, process_face_expression)
69 |
70 | elapsed_ms = (time.time() - start_time) * 1000
71 |
72 | times.append(elapsed_ms)
73 | video_writer.write(frame)
74 |
75 | print('Finished processing frames')
76 | video.release(), video_writer.release()
77 |
78 | print("Average time(ms): ", sum(times)//frame_num)
79 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
80 |
81 | if __name__ == "__main__" :
82 |
83 | print("OpenCV version: {}".format(cv2. __version__))
84 |
85 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
86 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
87 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
88 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
89 | parser.add_argument('--file', help='File path of video file', required=True)
90 | args = parser.parse_args()
91 |
92 | first_stage_network = NetworkExecutor(args.first_stage)
93 | second_stage_network = NetworkExecutor(args.second_stage)
94 |
95 | main(args)
96 |
97 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/multi_stage_stream.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import os
4 | import cv2
5 | import numpy as np
6 |
7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes
8 | from tflite_runtime.interpreter import Interpreter
9 | from flask import Flask, render_template, request, Response
10 |
11 | app = Flask (__name__, static_url_path = '')
12 |
13 | def process_face_expression(roi_img):
14 |
15 | emotion_list = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown']
16 |
17 | results = np.squeeze(second_stage_network.run(roi_img))
18 | emotion_idx = np.argmax(results)
19 | emotion_confience = np.max(results)
20 |
21 | label = f'{emotion_list[emotion_idx]} {emotion_confience:.4f}%'
22 |
23 | return label
24 |
25 | class NetworkExecutor(object):
26 |
27 | def __init__(self, model_file):
28 |
29 | self.interpreter = Interpreter(model_file, num_threads=3)
30 | self.interpreter.allocate_tensors()
31 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
32 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
33 |
34 | def get_output_tensors(self):
35 |
36 | output_details = self.interpreter.get_output_details()
37 | tensor_indices = []
38 | tensor_list = []
39 |
40 | for output in output_details:
41 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
42 | tensor_list.append(tensor)
43 |
44 | return tensor_list
45 |
46 | def run(self, image):
47 | if image.shape[1:2] != (self.input_height, self.input_width):
48 | img = cv2.resize(image, (self.input_width, self.input_height))
49 | img = preprocess(img)
50 | self.interpreter.set_tensor(self.tensor_index, img)
51 | self.interpreter.invoke()
52 | return self.get_output_tensors()
53 |
54 | class Detector(NetworkExecutor):
55 |
56 | def __init__(self, label_file, model_file, threshold):
57 | super().__init__(model_file)
58 | self.threshold = float(threshold)
59 |
60 | def detect(self, frame):
61 | start_time = time.time()
62 | results = self.run(frame)
63 | elapsed_ms = (time.time() - start_time) * 1000
64 |
65 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold, anchors = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]],
66 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]])
67 | draw_bounding_boxes(frame, detections, None, process_face_expression)
68 |
69 | fps = 1 / elapsed_ms*1000
70 | print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms))
71 |
72 | return cv2.imencode('.jpg', frame)[1].tobytes()
73 |
74 | @app.route("/")
75 | def index():
76 | return render_template('index.html', name = None)
77 |
78 | def gen(camera):
79 | while True:
80 | frame = camera.get_frame()
81 | image = detector.detect(frame)
82 | yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n')
83 |
84 | @app.route('/video_feed')
85 | def video_feed():
86 | return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')
87 |
88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
91 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.8)
92 | parser.add_argument('--source', help='picamera or cv', default='cv')
93 | args = parser.parse_args()
94 |
95 | if args.source == "cv":
96 | from camera_opencv import Camera
97 | source = 0
98 | elif args.source == "picamera":
99 | from camera_pi import Camera
100 | source = 0
101 |
102 | Camera.set_video_source(source)
103 |
104 | detector = Detector(None, args.first_stage, args.threshold)
105 | second_stage_network = NetworkExecutor(args.second_stage)
106 |
107 | if __name__ == "__main__" :
108 | app.run(host = '0.0.0.0', port = 5000, debug = True)
109 |
110 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_emotion/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Video Streaming Demonstration
4 |
5 |
6 | Tflite Multi-stage Inference Demo
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/base_camera.py:
--------------------------------------------------------------------------------
1 | import time
2 | import threading
3 | try:
4 | from greenlet import getcurrent as get_ident
5 | except ImportError:
6 | try:
7 | from thread import get_ident
8 | except ImportError:
9 | from _thread import get_ident
10 |
11 |
12 | class CameraEvent(object):
13 | """An Event-like class that signals all active clients when a new frame is
14 | available.
15 | """
16 | def __init__(self):
17 | self.events = {}
18 |
19 | def wait(self):
20 | """Invoked from each client's thread to wait for the next frame."""
21 | ident = get_ident()
22 | if ident not in self.events:
23 | # this is a new client
24 | # add an entry for it in the self.events dict
25 | # each entry has two elements, a threading.Event() and a timestamp
26 | self.events[ident] = [threading.Event(), time.time()]
27 | return self.events[ident][0].wait()
28 |
29 | def set(self):
30 | """Invoked by the camera thread when a new frame is available."""
31 | now = time.time()
32 | remove = None
33 | for ident, event in self.events.items():
34 | if not event[0].isSet():
35 | # if this client's event is not set, then set it
36 | # also update the last set timestamp to now
37 | event[0].set()
38 | event[1] = now
39 | else:
40 | # if the client's event is already set, it means the client
41 | # did not process a previous frame
42 | # if the event stays set for more than 5 seconds, then assume
43 | # the client is gone and remove it
44 | if now - event[1] > 5:
45 | remove = ident
46 | if remove:
47 | del self.events[remove]
48 |
49 | def clear(self):
50 | """Invoked from each client's thread after a frame was processed."""
51 | self.events[get_ident()][0].clear()
52 |
53 |
54 | class BaseCamera(object):
55 | thread = None # background thread that reads frames from camera
56 | frame = None # current frame is stored here by background thread
57 | last_access = 0 # time of last client access to the camera
58 | event = CameraEvent()
59 |
60 | def __init__(self):
61 | """Start the background camera thread if it isn't running yet."""
62 | if BaseCamera.thread is None:
63 | BaseCamera.last_access = time.time()
64 |
65 | # start background frame thread
66 | BaseCamera.thread = threading.Thread(target=self._thread)
67 | BaseCamera.thread.start()
68 |
69 | # wait until frames are available
70 | while self.get_frame() is None:
71 | time.sleep(0)
72 |
73 | def get_frame(self):
74 | """Return the current camera frame."""
75 | BaseCamera.last_access = time.time()
76 |
77 | # wait for a signal from the camera thread
78 | BaseCamera.event.wait()
79 | BaseCamera.event.clear()
80 |
81 | return BaseCamera.frame
82 |
83 | @staticmethod
84 | def frames():
85 | """"Generator that returns frames from the camera."""
86 | raise RuntimeError('Must be implemented by subclasses.')
87 |
88 | @classmethod
89 | def _thread(cls):
90 | """Camera background thread."""
91 | print('Starting camera thread.')
92 | frames_iterator = cls.frames()
93 | for frame in frames_iterator:
94 | BaseCamera.frame = frame
95 | BaseCamera.event.set() # send signal to clients
96 | time.sleep(0)
97 |
98 | # if there hasn't been any clients asking for frames in
99 | # the last 10 seconds then stop the thread
100 | if time.time() - BaseCamera.last_access > 10:
101 | frames_iterator.close()
102 | print('Stopping camera thread due to inactivity.')
103 | break
104 | BaseCamera.thread = None
105 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/camera_opencv.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from base_camera import BaseCamera
3 |
4 |
5 | class Camera(BaseCamera):
6 | video_source = 0
7 |
8 | @staticmethod
9 | def set_video_source(source):
10 | Camera.video_source = source
11 |
12 | @staticmethod
13 | def frames():
14 | camera = cv2.VideoCapture(Camera.video_source)
15 | if not camera.isOpened():
16 | raise RuntimeError('Could not start camera.')
17 |
18 | while True:
19 | # read current frame
20 | _, img = camera.read()
21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
22 |
23 | # return img
24 | yield img
25 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/camera_pi.py:
--------------------------------------------------------------------------------
1 | import io
2 | import time
3 | import picamera
4 | import picamera.array
5 | import cv2
6 | from base_camera import BaseCamera
7 |
8 |
9 | class Camera(BaseCamera):
10 | video_source = 0
11 |
12 | @staticmethod
13 | def set_video_source(source):
14 | pass
15 |
16 | @staticmethod
17 | def frames():
18 | with picamera.PiCamera(resolution = (1280,720)) as camera:
19 | # let camera warm up
20 | time.sleep(2)
21 |
22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream:
23 | while True:
24 |
25 | camera.capture(stream, format='bgr', use_video_port=True)
26 | # At this point the image is available as stream.array
27 | image = stream.array
28 | stream.truncate(0)
29 | yield image
30 |
31 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/labels.txt:
--------------------------------------------------------------------------------
1 | AM-General-Hummer-SUV-2000
2 | Acura-Integra-Type-R-2001
3 | Acura-RL-Sedan-2012
4 | Acura-TL-Sedan-2012
5 | Acura-TL-Type-S-2008
6 | Acura-TSX-Sedan-2012
7 | Acura-ZDX-Hatchback-2012
8 | Aston-Martin-V8-Vantage-Convertible-2012
9 | Aston-Martin-V8-Vantage-Coupe-2012
10 | Aston-Martin-Virage-Convertible-2012
11 | Aston-Martin-Virage-Coupe-2012
12 | Audi-100-Sedan-1994
13 | Audi-100-Wagon-1994
14 | Audi-A5-Coupe-2012
15 | Audi-R8-Coupe-2012
16 | Audi-RS-4-Convertible-2008
17 | Audi-S4-Sedan-2007
18 | Audi-S4-Sedan-2012
19 | Audi-S5-Convertible-2012
20 | Audi-S5-Coupe-2012
21 | Audi-S6-Sedan-2011
22 | Audi-TT-Hatchback-2011
23 | Audi-TT-RS-Coupe-2012
24 | Audi-TTS-Coupe-2012
25 | Audi-V8-Sedan-1994
26 | BMW-1-Series-Convertible-2012
27 | BMW-1-Series-Coupe-2012
28 | BMW-3-Series-Sedan-2012
29 | BMW-3-Series-Wagon-2012
30 | BMW-6-Series-Convertible-2007
31 | BMW-ActiveHybrid-5-Sedan-2012
32 | BMW-M3-Coupe-2012
33 | BMW-M5-Sedan-2010
34 | BMW-M6-Convertible-2010
35 | BMW-X3-SUV-2012
36 | BMW-X5-SUV-2007
37 | BMW-X6-SUV-2012
38 | BMW-Z4-Convertible-2012
39 | Bentley-Arnage-Sedan-2009
40 | Bentley-Continental-Flying-Spur-Sedan-2007
41 | Bentley-Continental-GT-Coupe-2007
42 | Bentley-Continental-GT-Coupe-2012
43 | Bentley-Continental-Supersports-Conv.-Convertible-2012
44 | Bentley-Mulsanne-Sedan-2011
45 | Bugatti-Veyron-16.4-Convertible-2009
46 | Bugatti-Veyron-16.4-Coupe-2009
47 | Buick-Enclave-SUV-2012
48 | Buick-Rainier-SUV-2007
49 | Buick-Regal-GS-2012
50 | Buick-Verano-Sedan-2012
51 | Cadillac-CTS-V-Sedan-2012
52 | Cadillac-Escalade-EXT-Crew-Cab-2007
53 | Cadillac-SRX-SUV-2012
54 | Chevrolet-Avalanche-Crew-Cab-2012
55 | Chevrolet-Camaro-Convertible-2012
56 | Chevrolet-Cobalt-SS-2010
57 | Chevrolet-Corvette-Convertible-2012
58 | Chevrolet-Corvette-Ron-Fellows-Edition-Z06-2007
59 | Chevrolet-Corvette-ZR1-2012
60 | Chevrolet-Express-Cargo-Van-2007
61 | Chevrolet-Express-Van-2007
62 | Chevrolet-HHR-SS-2010
63 | Chevrolet-Impala-Sedan-2007
64 | Chevrolet-Malibu-Hybrid-Sedan-2010
65 | Chevrolet-Malibu-Sedan-2007
66 | Chevrolet-Monte-Carlo-Coupe-2007
67 | Chevrolet-Silverado-1500-Classic-Extended-Cab-2007
68 | Chevrolet-Silverado-1500-Extended-Cab-2012
69 | Chevrolet-Silverado-1500-Hybrid-Crew-Cab-2012
70 | Chevrolet-Silverado-1500-Regular-Cab-2012
71 | Chevrolet-Silverado-2500HD-Regular-Cab-2012
72 | Chevrolet-Sonic-Sedan-2012
73 | Chevrolet-Tahoe-Hybrid-SUV-2012
74 | Chevrolet-TrailBlazer-SS-2009
75 | Chevrolet-Traverse-SUV-2012
76 | Chrysler-300-SRT-8-2010
77 | Chrysler-Aspen-SUV-2009
78 | Chrysler-Crossfire-Convertible-2008
79 | Chrysler-PT-Cruiser-Convertible-2008
80 | Chrysler-Sebring-Convertible-2010
81 | Chrysler-Town-and-Country-Minivan-2012
82 | Daewoo-Nubira-Wagon-2002
83 | Dodge-Caliber-Wagon-2007
84 | Dodge-Caliber-Wagon-2012
85 | Dodge-Caravan-Minivan-1997
86 | Dodge-Challenger-SRT8-2011
87 | Dodge-Charger-SRT-8-2009
88 | Dodge-Charger-Sedan-2012
89 | Dodge-Dakota-Club-Cab-2007
90 | Dodge-Dakota-Crew-Cab-2010
91 | Dodge-Durango-SUV-2007
92 | Dodge-Durango-SUV-2012
93 | Dodge-Journey-SUV-2012
94 | Dodge-Magnum-Wagon-2008
95 | Dodge-Ram-Pickup-3500-Crew-Cab-2010
96 | Dodge-Ram-Pickup-3500-Quad-Cab-2009
97 | Dodge-Sprinter-Cargo-Van-2009
98 | Eagle-Talon-Hatchback-1998
99 | FIAT-500-Abarth-2012
100 | FIAT-500-Convertible-2012
101 | Ferrari-458-Italia-Convertible-2012
102 | Ferrari-458-Italia-Coupe-2012
103 | Ferrari-California-Convertible-2012
104 | Ferrari-FF-Coupe-2012
105 | Fisker-Karma-Sedan-2012
106 | Ford-E-Series-Wagon-Van-2012
107 | Ford-Edge-SUV-2012
108 | Ford-Expedition-EL-SUV-2009
109 | Ford-F-150-Regular-Cab-2007
110 | Ford-F-150-Regular-Cab-2012
111 | Ford-F-450-Super-Duty-Crew-Cab-2012
112 | Ford-Fiesta-Sedan-2012
113 | Ford-Focus-Sedan-2007
114 | Ford-Freestar-Minivan-2007
115 | Ford-GT-Coupe-2006
116 | Ford-Mustang-Convertible-2007
117 | Ford-Ranger-SuperCab-2011
118 | GMC-Acadia-SUV-2012
119 | GMC-Canyon-Extended-Cab-2012
120 | GMC-Savana-Van-2012
121 | GMC-Terrain-SUV-2012
122 | GMC-Yukon-Hybrid-SUV-2012
123 | Geo-Metro-Convertible-1993
124 | HUMMER-H2-SUT-Crew-Cab-2009
125 | HUMMER-H3T-Crew-Cab-2010
126 | Honda-Accord-Coupe-2012
127 | Honda-Accord-Sedan-2012
128 | Honda-Odyssey-Minivan-2007
129 | Honda-Odyssey-Minivan-2012
130 | Hyundai-Accent-Sedan-2012
131 | Hyundai-Azera-Sedan-2012
132 | Hyundai-Elantra-Sedan-2007
133 | Hyundai-Elantra-Touring-Hatchback-2012
134 | Hyundai-Genesis-Sedan-2012
135 | Hyundai-Santa-Fe-SUV-2012
136 | Hyundai-Sonata-Hybrid-Sedan-2012
137 | Hyundai-Sonata-Sedan-2012
138 | Hyundai-Tucson-SUV-2012
139 | Hyundai-Veloster-Hatchback-2012
140 | Hyundai-Veracruz-SUV-2012
141 | Infiniti-G-Coupe-IPL-2012
142 | Infiniti-QX56-SUV-2011
143 | Isuzu-Ascender-SUV-2008
144 | Jaguar-XK-XKR-2012
145 | Jeep-Compass-SUV-2012
146 | Jeep-Grand-Cherokee-SUV-2012
147 | Jeep-Liberty-SUV-2012
148 | Jeep-Patriot-SUV-2012
149 | Jeep-Wrangler-SUV-2012
150 | Lamborghini-Aventador-Coupe-2012
151 | Lamborghini-Diablo-Coupe-2001
152 | Lamborghini-Gallardo-LP-570-4-Superleggera-2012
153 | Lamborghini-Reventon-Coupe-2008
154 | Land-Rover-LR2-SUV-2012
155 | Land-Rover-Range-Rover-SUV-2012
156 | Lincoln-Town-Car-Sedan-2011
157 | MINI-Cooper-Roadster-Convertible-2012
158 | Maybach-Landaulet-Convertible-2012
159 | Mazda-Tribute-SUV-2011
160 | McLaren-MP4-12C-Coupe-2012
161 | Mercedes-Benz-300-Class-Convertible-1993
162 | Mercedes-Benz-C-Class-Sedan-2012
163 | Mercedes-Benz-E-Class-Sedan-2012
164 | Mercedes-Benz-S-Class-Sedan-2012
165 | Mercedes-Benz-SL-Class-Coupe-2009
166 | Mercedes-Benz-Sprinter-Van-2012
167 | Mitsubishi-Lancer-Sedan-2012
168 | Nissan-240SX-Coupe-1998
169 | Nissan-Juke-Hatchback-2012
170 | Nissan-Leaf-Hatchback-2012
171 | Nissan-NV-Passenger-Van-2012
172 | Plymouth-Neon-Coupe-1999
173 | Porsche-Panamera-Sedan-2012
174 | Ram-C-V-Cargo-Van-Minivan-2012
175 | Rolls-Royce-Ghost-Sedan-2012
176 | Rolls-Royce-Phantom-Drophead-Coupe-Convertible-2012
177 | Rolls-Royce-Phantom-Sedan-2012
178 | Scion-xD-Hatchback-2012
179 | Spyker-C8-Convertible-2009
180 | Spyker-C8-Coupe-2009
181 | Suzuki-Aerio-Sedan-2007
182 | Suzuki-Kizashi-Sedan-2012
183 | Suzuki-SX4-Hatchback-2012
184 | Suzuki-SX4-Sedan-2012
185 | Tesla-Model-S-Sedan-2012
186 | Toyota-4Runner-SUV-2012
187 | Toyota-Camry-Sedan-2012
188 | Toyota-Corolla-Sedan-2012
189 | Toyota-Sequoia-SUV-2012
190 | Volkswagen-Beetle-Hatchback-2012
191 | Volkswagen-Golf-Hatchback-1991
192 | Volkswagen-Golf-Hatchback-2012
193 | Volvo-240-Sedan-1993
194 | Volvo-C30-Hatchback-2012
195 | Volvo-XC90-SUV-2007
196 | smart-fortwo-Convertible-2012
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/multi_stage_file.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import os
4 | import cv2
5 | import numpy as np
6 | from tqdm import tqdm
7 |
8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess
9 | from tflite_runtime.interpreter import Interpreter
10 |
11 | def load_labels(path):
12 | with open(path, 'r') as f:
13 | return {i: line.strip() for i, line in enumerate(f.readlines())}
14 |
15 | def process_vehicle_type(roi_img):
16 |
17 | results = second_stage_network.run(roi_img)
18 | vehicle_type = np.argmax(results[0])
19 | confidence = np.max(results[0])
20 | label = f'{labels[vehicle_type]} : {confidence}'
21 |
22 | return label
23 |
24 | class NetworkExecutor(object):
25 |
26 | def __init__(self, model_file, num_threads=3):
27 |
28 | self.interpreter = Interpreter(model_file, num_threads=num_threads)
29 | self.interpreter.allocate_tensors()
30 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
31 | print(self.input_height, self.input_width)
32 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
33 |
34 | def get_output_tensors(self):
35 |
36 | output_details = self.interpreter.get_output_details()
37 | tensor_indices = []
38 | tensor_list = []
39 |
40 | for output in output_details:
41 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
42 | tensor_list.append(tensor)
43 |
44 | return tensor_list
45 |
46 | def run(self, image):
47 | if image.shape[1:2] != (self.input_height, self.input_width):
48 | img = cv2.resize(image, (self.input_width, self.input_height))
49 | img = preprocess(img)
50 | self.interpreter.set_tensor(self.tensor_index, img)
51 | self.interpreter.invoke()
52 | return self.get_output_tensors()
53 |
54 | def main(args):
55 | video, video_writer, frame_count = init_video_file_capture(args.file, 'vehicle_type_demo')
56 |
57 | frame_num = len(frame_count)
58 | times = []
59 |
60 | for _ in tqdm(frame_count, desc='Processing frames'):
61 | frame_present, frame = video.read()
62 | if not frame_present:
63 | continue
64 |
65 | start_time = time.time()
66 |
67 | results = first_stage_network.run(frame)
68 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold)
69 | draw_bounding_boxes(frame, detections, None, process_vehicle_type)
70 |
71 | elapsed_ms = (time.time() - start_time) * 1000
72 |
73 | times.append(elapsed_ms)
74 | video_writer.write(frame)
75 |
76 | print('Finished processing frames')
77 | video.release(), video_writer.release()
78 |
79 | print("Average time(ms): ", sum(times)//frame_num)
80 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop
81 |
82 | if __name__ == "__main__" :
83 |
84 | print("OpenCV version: {}".format(cv2. __version__))
85 |
86 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
87 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
88 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
89 | parser.add_argument('--labels', nargs="+", help='File path of labels file.', required=True)
90 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7)
91 | parser.add_argument('--file', help='File path of video file', required=True)
92 | args = parser.parse_args()
93 |
94 | first_stage_network = NetworkExecutor(args.first_stage, num_threads=2)
95 | second_stage_network = NetworkExecutor(args.second_stage, num_threads=2)
96 |
97 | if not os.path.exists(args.labels[0]):
98 | labels = args.labels
99 | else:
100 | labels = load_labels(args.labels[0])
101 |
102 | main(args)
103 |
104 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/multi_stage_stream.py:
--------------------------------------------------------------------------------
1 | import time
2 | import argparse
3 | import os
4 | import cv2
5 | import numpy as np
6 |
7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes
8 | from tflite_runtime.interpreter import Interpreter
9 | from flask import Flask, render_template, request, Response
10 |
11 | app = Flask (__name__, static_url_path = '')
12 |
13 | def load_labels(path):
14 | with open(path, 'r') as f:
15 | return {i: line.strip() for i, line in enumerate(f.readlines())}
16 |
17 | def process_vehicle_type(roi_img):
18 |
19 | results = second_stage_network.run(roi_img)
20 | vehicle_type = np.argmax(results[0])
21 | confidence = np.max(results[0])
22 | label = f'{labels[vehicle_type]} : {confidence}'
23 |
24 | return label
25 |
26 | class NetworkExecutor(object):
27 |
28 | def __init__(self, model_file, num_threads=3):
29 |
30 | self.interpreter = Interpreter(model_file, num_threads=num_threads)
31 | self.interpreter.allocate_tensors()
32 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']
33 | self.tensor_index = self.interpreter.get_input_details()[0]['index']
34 |
35 | def get_output_tensors(self):
36 |
37 | output_details = self.interpreter.get_output_details()
38 | tensor_indices = []
39 | tensor_list = []
40 |
41 | for output in output_details:
42 | tensor = np.squeeze(self.interpreter.get_tensor(output['index']))
43 | tensor_list.append(tensor)
44 |
45 | return tensor_list
46 |
47 | def run(self, image):
48 | if image.shape[1:2] != (self.input_height, self.input_width):
49 | img = cv2.resize(image, (self.input_width, self.input_height))
50 | img = preprocess(img)
51 | self.interpreter.set_tensor(self.tensor_index, img)
52 | self.interpreter.invoke()
53 | return self.get_output_tensors()
54 |
55 | class Detector(NetworkExecutor):
56 |
57 | def __init__(self, label_file, model_file, threshold):
58 | super().__init__(model_file)
59 | self.threshold = float(threshold)
60 |
61 | def detect(self, frame):
62 | start_time = time.time()
63 | results = self.run(frame)
64 | elapsed_ms = (time.time() - start_time) * 1000
65 |
66 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = self.threshold)
67 | draw_bounding_boxes(frame, detections, None, process_vehicle_type)
68 |
69 | fps = 1 / elapsed_ms*1000
70 | print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms))
71 |
72 | return cv2.imencode('.jpg', frame)[1].tobytes()
73 |
74 | @app.route("/")
75 | def index():
76 | return render_template('index.html', name = None)
77 |
78 | def gen(camera):
79 | while True:
80 | frame = camera.get_frame()
81 | image = detector.detect(frame)
82 | yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n')
83 |
84 | @app.route('/video_feed')
85 | def video_feed():
86 | return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame')
87 |
88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True)
90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True)
91 | parser.add_argument('--labels', nargs="+", help='File path of labels file.', required=True)
92 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.9)
93 | parser.add_argument('--source', help='picamera or cv', default='cv')
94 | args = parser.parse_args()
95 |
96 | if args.source == "cv":
97 | from camera_opencv import Camera
98 | source = 0
99 | elif args.source == "picamera":
100 | from camera_pi import Camera
101 | source = 0
102 |
103 | Camera.set_video_source(source)
104 |
105 | detector = Detector(None, args.first_stage, args.threshold)
106 | second_stage_network = NetworkExecutor(args.second_stage)
107 |
108 | if not os.path.exists(args.labels[0]):
109 | labels = args.labels
110 | else:
111 | labels = load_labels(args.labels[0])
112 |
113 | if __name__ == "__main__" :
114 | app.run(host = '0.0.0.0', port = 5000, debug = True)
115 |
116 |
--------------------------------------------------------------------------------
/examples/tensorflow_lite/multi_stage_inference_vehicle_type/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Video Streaming Demonstration
4 |
5 |
6 | Tflite Multi-stage Inference Demo
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------