├── .github └── workflows │ ├── stale.yml │ └── sync_issues.yml ├── .gitignore ├── .gitmodules ├── README.md ├── examples ├── armnn │ ├── README.md │ ├── common │ │ ├── cv_utils.py │ │ ├── network_executor.py │ │ ├── tests │ │ │ ├── conftest.py │ │ │ ├── context.py │ │ │ ├── test_network_executor.py │ │ │ └── test_utils.py │ │ └── utils.py │ ├── face_age-gender │ │ ├── README.md │ │ ├── box.py │ │ ├── requirements.txt │ │ ├── run_video_file.py │ │ ├── run_video_stream.py │ │ └── yolov2.py │ ├── face_keypoints │ │ ├── README.md │ │ ├── box.py │ │ ├── requirements.txt │ │ ├── run_video_file.py │ │ ├── run_video_stream.py │ │ └── yolov2.py │ └── face_recognition │ │ ├── README.md │ │ ├── box.py │ │ ├── calculate_features.py │ │ ├── requirements.txt │ │ ├── run_video_file.py │ │ ├── run_video_stream.py │ │ └── yolov2.py ├── edge_impulse │ └── multi_stage_inference_vehicle_type │ │ └── multi_stage.py ├── mediapipe │ ├── README.md │ ├── common │ │ └── cv_utils.py │ ├── face_detection │ │ ├── run_video_file.py │ │ └── run_video_stream.py │ ├── face_mesh │ │ ├── run_video_file.py │ │ └── run_video_stream.py │ ├── hand_landmarks │ │ ├── run_video_file.py │ │ └── run_video_stream.py │ └── pose_estimation │ │ ├── run_video_file.py │ │ └── run_video_stream.py ├── sample_files │ ├── cars.mp4 │ ├── test_dance.mp4 │ └── test_s.mp4 └── tensorflow_lite │ ├── face_recognition │ ├── README.md │ ├── base_camera.py │ ├── calculate_features.py │ ├── camera_opencv.py │ ├── camera_pi.py │ ├── cv_utils.py │ ├── multi_stage_file.py │ ├── multi_stage_stream.py │ ├── requirements.txt │ └── templates │ │ └── index.html │ ├── multi_stage_inference_age_gender │ ├── base_camera.py │ ├── camera_opencv.py │ ├── camera_pi.py │ ├── cv_utils.py │ ├── multi_stage_file.py │ ├── multi_stage_stream.py │ └── templates │ │ └── index.html │ ├── multi_stage_inference_emotion │ ├── base_camera.py │ ├── camera_opencv.py │ ├── camera_pi.py │ ├── cv_utils.py │ ├── multi_stage_file.py │ ├── multi_stage_stream.py │ └── templates │ │ └── index.html │ └── multi_stage_inference_vehicle_type │ ├── base_camera.py │ ├── camera_opencv.py │ ├── camera_pi.py │ ├── cv_utils.py │ ├── labels.txt │ ├── multi_stage_file.py │ ├── multi_stage_stream.py │ └── templates │ └── index.html └── jupyter_notebooks ├── aXeleRate_conveyor_belt_rip_recognition.ipynb ├── aXeleRate_face_anti_spoofing.ipynb ├── aXeleRate_lung_segmentation.ipynb └── aXeleRate_multi_stage.ipynb /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '0 4 * * *' 7 | 8 | jobs: 9 | stale: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v4 15 | 16 | - name: Checkout script repository 17 | uses: actions/checkout@v4 18 | with: 19 | repository: Seeed-Studio/sync-github-all-issues 20 | path: ci 21 | 22 | - name: Run script 23 | run: ./ci/tools/stale.sh 24 | env: 25 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | -------------------------------------------------------------------------------- /.github/workflows/sync_issues.yml: -------------------------------------------------------------------------------- 1 | name: Automate Issue Management 2 | 3 | on: 4 | issues: 5 | types: 6 | - opened 7 | - edited 8 | - assigned 9 | - unassigned 10 | - labeled 11 | - unlabeled 12 | - reopened 13 | 14 | jobs: 15 | add_issue_to_project: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Add issue to GitHub Project 19 | uses: actions/add-to-project@v1.0.2 20 | with: 21 | project-url: https://github.com/orgs/Seeed-Studio/projects/17 22 | github-token: ${{ secrets.ISSUE_ASSEMBLE }} 23 | labeled: bug 24 | label-operator: NOT -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | *.tflite 132 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "training_code/edge_ml_age_gender_recognition"] 2 | path = training_code/edge_ml_age_gender_recognition 3 | url = https://github.com/AIWintermuteAI/edge_ml_age_gender_recognition.git 4 | [submodule "training_code/edge_ml_emotion_recognition"] 5 | path = training_code/edge_ml_emotion_recognition 6 | url = https://github.com/AIWintermuteAI/edge_ml_emotion_recognition.git 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Seeed_Python_MachineLearning 2 | Examples and training code for Machine Learning samples that can be run on various Edge devices 3 | -------------------------------------------------------------------------------- /examples/armnn/README.md: -------------------------------------------------------------------------------- 1 | # PyArmNN Sample Applications 2 | 3 | ## Introduction 4 | This sample application guides the user and shows how to perform inference using PyArmNN API. We assume the user has already built PyArmNN by following the instructions of the README in the main PyArmNN directory. 5 | 6 | We provide example scripts for performing inference from video file and video stream with `run_video_file.py` and `run_video_stream.py`. For detailed instructions and download links to reference models, check README file inside each application folder. 7 | 8 | ## Prerequisites 9 | 10 | ##### PyArmNN 11 | 12 | Before proceeding to the next steps, make sure that you have successfully installed the newest version of PyArmNN on your system by following the instructions in the README of the PyArmNN root directory. 13 | 14 | You can verify that PyArmNN library is installed and check PyArmNN version using: 15 | ```bash 16 | $ pip show pyarmnn 17 | ``` 18 | 19 | You can also verify it by running the following and getting output similar to below: 20 | ```bash 21 | $ python -c "import pyarmnn as ann;print(ann.GetVersion())" 22 | '24.0.0' 23 | ``` 24 | 25 | ##### Dependencies 26 | 27 | Install the following libraries on your system: 28 | ```bash 29 | $ sudo apt-get install python3-opencv libqtgui4 libqt4-test 30 | ``` 31 | 32 | Create a virtual environment: 33 | ```bash 34 | $ python3.7 -m venv devenv --system-site-packages 35 | $ source devenv/bin/activate 36 | ``` 37 | 38 | Install the dependencies: 39 | ```bash 40 | $ pip install -r requirements.txt 41 | ``` 42 | 43 | --- 44 | 45 | ## Implementing Your Own Network 46 | The examples provide support for `yolo_v2` detection layer models. However, the user is able to add their own network to the object detection scripts by following the steps: 47 | 48 | 1. Create a new file for your network, for example `network.py`, to contain functions to process the output of the model 49 | 2. In that file, the user will need to write a function that decodes the output vectors obtained from running inference on their network and return the bounding box positions of detected objects plus their class index and confidence. Additionally, include a function that returns a resize factor that will scale the obtained bounding boxes to their correct positions in the original frame 50 | 3. Import the functions into the main file and, such as with the provided networks, add a conditional statement to the `get_model_processing()` function with the new model name and functions 51 | 4. The labels associated with the model can then be passed in with `--label_path` argument 52 | 53 | --- 54 | 55 | # Application Overview 56 | 57 | This section provides a walkthrough of the application, explaining in detail the steps: 58 | 59 | 1. Initialisation 60 | 2. Creating a Network 61 | 3. Preparing the Workload Tensors 62 | 4. Executing Inference 63 | 5. Postprocessing 64 | 65 | 66 | ### Initialisation 67 | 68 | ##### Reading from Video Source 69 | After parsing user arguments, the chosen video file or stream is loaded into an OpenCV `cv2.VideoCapture()` object. We use this object to capture frames from the source using the `read()` function. 70 | 71 | The `VideoCapture` object also tells us information about the source, such as the framerate and resolution of the input video. Using this information, we create a `cv2.VideoWriter()` object which will be used at the end of every loop to write the processed frame to an output video file of the same format as the input. 72 | 73 | ##### Preparing Labels and Model Specific Functions 74 | In order to interpret the result of running inference on the loaded network, it is required to load the labels associated with the model. In the provided example code, the `dict_labels()` function creates a dictionary that is keyed on the classification index at the output node of the model, with values of the dictionary corresponding to a label and a randomly generated RGB color. This ensures that each class has a unique color which will prove helpful when plotting the bounding boxes of various detected objects in a frame. 75 | 76 | Depending on the model being used, the user-specified model name accesses and returns functions to decode and process the inference output, along with a resize factor used when plotting bounding boxes to ensure they are scaled to their correct position in the original frame. 77 | 78 | 79 | ### Creating a Network 80 | 81 | ##### Creating Parser and Importing Graph 82 | The first step with PyArmNN is to import a graph from file by using the appropriate parser. 83 | 84 | The Arm NN SDK provides parsers for reading graphs from a variety of model formats. In our application we specifically focus on `.tflite, .pb, .onnx` models. 85 | 86 | Based on the extension of the provided model file, the corresponding parser is created and the network file loaded with `CreateNetworkFromBinaryFile()` function. The parser will handle the creation of the underlying Arm NN graph. 87 | 88 | ##### Optimizing Graph for Compute Device 89 | Arm NN supports optimized execution on multiple CPU and GPU devices. Prior to executing a graph, we must select the appropriate device context. We do this by creating a runtime context with default options with `IRuntime()`. 90 | 91 | We can optimize the imported graph by specifying a list of backends in order of preference and implement backend-specific optimizations. The backends are identified by a string unique to the backend, for example `CpuAcc, GpuAcc, CpuRef`. 92 | 93 | Internally and transparently, Arm NN splits the graph into subgraph based on backends, it calls a optimize subgraphs function on each of them and, if possible, substitutes the corresponding subgraph in the original graph with its optimized version. 94 | 95 | Using the `Optimize()` function we optimize the graph for inference and load the optimized network onto the compute device with `LoadNetwork()`. This function creates the backend-specific workloads for the layers and a backend specific workload factory which is called to create the workloads. 96 | 97 | ##### Creating Input and Output Binding Information 98 | Parsers can also be used to extract the input information for the network. By calling `GetSubgraphInputTensorNames` we extract all the input names and, with `GetNetworkInputBindingInfo`, bind the input points of the graph. 99 | 100 | The input binding information contains all the essential information about the input. It is a tuple consisting of integer identifiers for bindable layers (inputs, outputs) and the tensor info (data type, quantization information, number of dimensions, total number of elements). 101 | 102 | Similarly, we can get the output binding information for an output layer by using the parser to retrieve output tensor names and calling `GetNetworkOutputBindingInfo()`. 103 | 104 | 105 | ### Preparing the Workload Tensors 106 | 107 | ##### Preprocessing the Captured Frame 108 | Each frame captured from source is read as an `ndarray` in BGR format and therefore has to be preprocessed before being passed into the network. 109 | 110 | This preprocessing step consists of swapping channels (BGR to RGB in this example), resizing the frame to the required resolution, expanding dimensions of the array and doing data type conversion to match the model input layer. This information about the input tensor can be readily obtained from reading the `input_binding_info`. For example, SSD MobileNet V1 takes for input a tensor with shape `[1, 300, 300, 3]` and data type `uint8`. 111 | 112 | ##### Making Input and Output Tensors 113 | To produce the workload tensors, calling the functions `make_input_tensors()` and `make_output_tensors()` will return the input and output tensors respectively. 114 | 115 | 116 | ### Executing Inference 117 | After making the workload tensors, a compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context. By calling the `workload_tensors_to_ndarray()` function, we obtain the results from inference as a list of `ndarrays`. 118 | 119 | 120 | ### Postprocessing 121 | 122 | ##### Decoding and Processing Inference Output 123 | The output from inference must be decoded to obtain information about detected objects in the frame. In the examples there are implementations for two networks but you may also implement your own network decoding solution here. Please refer to Implementing Your Own Network section of this document to learn how to do this. 124 | 125 | For SSD MobileNet V1 models, we decode the results to obtain the bounding box positions, classification index, confidence and number of detections in the input frame. 126 | 127 | For YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold. 128 | 129 | It is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU) to achieve the best visual results. 130 | 131 | The detection results are always returned as a list in the form `[class index, [box positions], confidence score]`, with the box positions list containing bounding box coordinates in the form `[x_min, y_min, x_max, y_max]`. 132 | 133 | ##### Drawing Bounding Boxes 134 | With the obtained results and using `draw_bounding_boxes()`, we are able to draw bounding boxes around detected objects and add the associated label and confidence score. The labels dictionary created earlier uses the class index of the detected object as a key to return the associated label and color for that class. The resize factor defined at the beginning scales the bounding box coordinates to their correct positions in the original frame. The processed frames are written to file or displayed in a separate window. 135 | -------------------------------------------------------------------------------- /examples/armnn/common/cv_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | """ 6 | This file contains helper functions for reading video/image data and 7 | pre/postprocessing of video/image data using OpenCV. 8 | """ 9 | 10 | import os 11 | 12 | import cv2 13 | import numpy as np 14 | 15 | import pyarmnn as ann 16 | 17 | 18 | def preprocess_array(x, **kwargs): 19 | x /= 127.5 20 | x -= 1. 21 | return x 22 | 23 | def preprocess(frame: np.ndarray, input_binding_info: tuple): 24 | """ 25 | Takes a frame, resizes, swaps channels and converts data type to match 26 | model input layer. The converted frame is wrapped in a const tensor 27 | and bound to the input tensor. 28 | 29 | Args: 30 | frame: Captured frame from video. 31 | input_binding_info: Contains shape and data type of model input layer. 32 | 33 | Returns: 34 | Input tensor. 35 | """ 36 | # Swap channels and resize frame to model resolution 37 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 38 | resized_frame = resize_with_aspect_ratio(frame, input_binding_info) 39 | 40 | # Expand dimensions and convert data type to match model input 41 | data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8 42 | resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0) 43 | resized_frame = preprocess_array(resized_frame) 44 | assert resized_frame.shape == tuple(input_binding_info[1].GetShape()) 45 | 46 | input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame]) 47 | return input_tensors 48 | 49 | 50 | def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple): 51 | """ 52 | Resizes frame while maintaining aspect ratio, padding any empty space. 53 | 54 | Args: 55 | frame: Captured frame. 56 | input_binding_info: Contains shape of model input layer. 57 | 58 | Returns: 59 | Frame resized to the size of model input layer. 60 | """ 61 | aspect_ratio = frame.shape[1] / frame.shape[0] 62 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3] 63 | 64 | if aspect_ratio >= 1.0: 65 | new_height, new_width = int(model_width / aspect_ratio), model_width 66 | b_padding, r_padding = model_height - new_height, 0 67 | else: 68 | new_height, new_width = model_height, int(model_height * aspect_ratio) 69 | b_padding, r_padding = 0, model_width - new_width 70 | 71 | # Resize and pad any empty space 72 | frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR) 73 | frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding, 74 | borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0]) 75 | return frame 76 | 77 | 78 | def create_video_writer(video: cv2.VideoCapture, video_path: str, name: str): 79 | """ 80 | Creates a video writer object to write processed frames to file. 81 | 82 | Args: 83 | video: Video capture object, contains information about data source. 84 | video_path: User-specified video file path. 85 | output_path: Optional path to save the processed video. 86 | 87 | Returns: 88 | Video writer object. 89 | """ 90 | _, ext = os.path.splitext(video_path) 91 | 92 | i, filename = 0, os.path.join(str(), f'{name}{ext}') 93 | 94 | while os.path.exists(filename): 95 | i += 1 96 | filename = os.path.join(str(), f'{name}({i}){ext}') 97 | print(filename) 98 | video_writer = cv2.VideoWriter(filename=filename, 99 | fourcc=get_source_encoding_int(video), 100 | fps=int(video.get(cv2.CAP_PROP_FPS)), 101 | frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), 102 | int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))) 103 | return video_writer 104 | 105 | 106 | def init_video_file_capture(video_path: str, name: str): 107 | """ 108 | Creates a video capture object from a video file. 109 | 110 | Args: 111 | video_path: User-specified video file path. 112 | output_path: Optional path to save the processed video. 113 | 114 | Returns: 115 | Video capture object to capture frames, video writer object to write processed 116 | frames to file, plus total frame count of video source to iterate through. 117 | """ 118 | if not os.path.exists(video_path): 119 | raise FileNotFoundError(f'Video file not found for: {video_path}') 120 | video = cv2.VideoCapture(video_path) 121 | if not video.isOpened: 122 | raise RuntimeError(f'Failed to open video capture from file: {video_path}') 123 | 124 | video_writer = create_video_writer(video, video_path, name) 125 | iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))) 126 | return video, video_writer, iter_frame_count 127 | 128 | 129 | def init_video_stream_capture(video_source: int): 130 | """ 131 | Creates a video capture object from a device. 132 | 133 | Args: 134 | video_source: Device index used to read video stream. 135 | 136 | Returns: 137 | Video capture object used to capture frames from a video stream. 138 | """ 139 | video = cv2.VideoCapture(video_source) 140 | if not video.isOpened: 141 | raise RuntimeError(f'Failed to open video capture for device with index: {video_source}') 142 | print('Processing video stream. Press \'Esc\' key to exit the demo.') 143 | return video 144 | 145 | 146 | def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict): 147 | """ 148 | Draws bounding boxes around detected objects and adds a label and confidence score. 149 | 150 | Args: 151 | frame: The original captured frame from video source. 152 | detections: A list of detected objects in the form [class, [box positions], confidence]. 153 | resize_factor: Resizing factor to scale box coordinates to output frame size. 154 | labels: Dictionary of labels and colors keyed on the classification index. 155 | """ 156 | for detection in detections: 157 | class_idx, box, confidence = [d for d in detection] 158 | label, color = labels[class_idx][0].capitalize(), labels[class_idx][1] 159 | 160 | # Obtain frame size and resized bounding box positions 161 | frame_height, frame_width = frame.shape[:2] 162 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box] 163 | 164 | # Ensure box stays within the frame 165 | x_min, y_min = max(0, x_min), max(0, y_min) 166 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 167 | 168 | # Draw bounding box around detected object 169 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 170 | 171 | # Create label for detected object class 172 | label = f'{label} {confidence * 100:.1f}%' 173 | label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255) 174 | 175 | # Make sure label always stays on-screen 176 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 177 | 178 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 179 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min) 180 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 181 | 182 | # Add label and confidence value 183 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 184 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, 185 | label_color, 1, cv2.LINE_AA) 186 | 187 | 188 | def get_source_encoding_int(video_capture): 189 | return int(video_capture.get(cv2.CAP_PROP_FOURCC)) 190 | -------------------------------------------------------------------------------- /examples/armnn/common/network_executor.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | import os 5 | from typing import List, Tuple 6 | 7 | import pyarmnn as ann 8 | import numpy as np 9 | 10 | 11 | def create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()): 12 | """ 13 | Creates a network based on the model file and a list of backends. 14 | 15 | Args: 16 | model_file: User-specified model file. 17 | backends: List of backends to optimize network. 18 | input_names: 19 | output_names: 20 | 21 | Returns: 22 | net_id: Unique ID of the network to run. 23 | runtime: Runtime context for executing inference. 24 | input_binding_info: Contains essential information about the model input. 25 | output_binding_info: Used to map output tensor and its memory. 26 | """ 27 | if not os.path.exists(model_file): 28 | raise FileNotFoundError(f'Model file not found for: {model_file}') 29 | 30 | _, ext = os.path.splitext(model_file) 31 | if ext == '.tflite': 32 | parser = ann.ITfLiteParser() 33 | else: 34 | raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]") 35 | 36 | network = parser.CreateNetworkFromBinaryFile(model_file) 37 | 38 | # Specify backends to optimize network 39 | preferred_backends = [] 40 | for b in backends: 41 | preferred_backends.append(ann.BackendId(b)) 42 | 43 | # Select appropriate device context and optimize the network for that device 44 | options = ann.CreationOptions() 45 | runtime = ann.IRuntime(options) 46 | opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), 47 | ann.OptimizerOptions()) 48 | print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n' 49 | f'Optimization warnings: {messages}') 50 | 51 | # Load the optimized network onto the Runtime device 52 | net_id, _ = runtime.LoadNetwork(opt_network) 53 | 54 | # Get input and output binding information 55 | graph_id = parser.GetSubgraphCount() - 1 56 | input_names = parser.GetSubgraphInputTensorNames(graph_id) 57 | input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0]) 58 | output_names = parser.GetSubgraphOutputTensorNames(graph_id) 59 | output_binding_info = [] 60 | 61 | for output_name in output_names: 62 | out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name) 63 | output_binding_info.append(out_bind_info) 64 | 65 | return net_id, runtime, input_binding_info, output_binding_info 66 | 67 | 68 | def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]: 69 | """ 70 | Executes inference for the loaded network. 71 | 72 | Args: 73 | input_tensors: The input frame tensor. 74 | output_tensors: The output tensor from output node. 75 | runtime: Runtime context for executing inference. 76 | net_id: Unique ID of the network to run. 77 | 78 | Returns: 79 | list: Inference results as a list of ndarrays. 80 | """ 81 | runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) 82 | output = ann.workload_tensors_to_ndarray(output_tensors) 83 | return output 84 | 85 | 86 | class ArmnnNetworkExecutor: 87 | 88 | def __init__(self, model_file: str, backends: list): 89 | """ 90 | Creates an inference executor for a given network and a list of backends. 91 | 92 | Args: 93 | model_file: User-specified model file. 94 | backends: List of backends to optimize network. 95 | """ 96 | self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file, 97 | backends) 98 | self.output_tensors = ann.make_output_tensors(self.output_binding_info) 99 | 100 | def run(self, input_tensors: list) -> List[np.ndarray]: 101 | """ 102 | Executes inference for the loaded network. 103 | 104 | Args: 105 | input_tensors: The input frame tensor. 106 | 107 | Returns: 108 | list: Inference results as a list of ndarrays. 109 | """ 110 | return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id) 111 | -------------------------------------------------------------------------------- /examples/armnn/common/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | import os 5 | import ntpath 6 | 7 | import urllib.request 8 | import zipfile 9 | 10 | import pytest 11 | 12 | script_dir = os.path.dirname(__file__) 13 | @pytest.fixture(scope="session") 14 | def test_data_folder(request): 15 | """ 16 | This fixture returns path to folder with shared test resources among all tests 17 | """ 18 | 19 | data_dir = os.path.join(script_dir, "testdata") 20 | if not os.path.exists(data_dir): 21 | os.mkdir(data_dir) 22 | 23 | files_to_download = ["https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/messi5.jpg", 24 | "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/basketball1.png", 25 | "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/Megamind.avi", 26 | "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip" 27 | ] 28 | 29 | for file in files_to_download: 30 | path, filename = ntpath.split(file) 31 | file_path = os.path.join(data_dir, filename) 32 | if not os.path.exists(file_path): 33 | print("\nDownloading test file: " + file_path + "\n") 34 | urllib.request.urlretrieve(file, file_path) 35 | 36 | # Any unzipping needed, and moving around of files 37 | with zipfile.ZipFile(os.path.join(data_dir, "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"), 'r') as zip_ref: 38 | zip_ref.extractall(data_dir) 39 | 40 | return data_dir 41 | -------------------------------------------------------------------------------- /examples/armnn/common/tests/context.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) 4 | 5 | import cv_utils 6 | import network_executor 7 | import utils 8 | -------------------------------------------------------------------------------- /examples/armnn/common/tests/test_network_executor.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | import os 5 | 6 | import cv2 7 | 8 | from context import network_executor 9 | from context import cv_utils 10 | 11 | 12 | def test_execute_network(test_data_folder): 13 | model_path = os.path.join(test_data_folder, "detect.tflite") 14 | backends = ["CpuAcc", "CpuRef"] 15 | 16 | executor = network_executor.ArmnnNetworkExecutor(model_path, backends) 17 | img = cv2.imread(os.path.join(test_data_folder, "messi5.jpg")) 18 | input_tensors = cv_utils.preprocess(img, executor.input_binding_info) 19 | 20 | output_result = executor.run(input_tensors) 21 | 22 | # Ensure it detects a person 23 | classes = output_result[1] 24 | assert classes[0][0] == 0 25 | -------------------------------------------------------------------------------- /examples/armnn/common/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | import os 5 | 6 | from context import cv_utils 7 | from context import utils 8 | 9 | 10 | def test_get_source_encoding(test_data_folder): 11 | video_file = os.path.join(test_data_folder, "Megamind.avi") 12 | video, video_writer, frame_count = cv_utils.init_video_file_capture(video_file, "/tmp") 13 | assert cv_utils.get_source_encoding_int(video) == 1145656920 14 | 15 | 16 | def test_read_existing_labels_file(test_data_folder): 17 | label_file = os.path.join(test_data_folder, "labelmap.txt") 18 | labels_map = utils.dict_labels(label_file) 19 | assert labels_map is not None 20 | -------------------------------------------------------------------------------- /examples/armnn/common/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | """Contains helper functions that can be used across the example apps.""" 5 | 6 | import os 7 | import errno 8 | from pathlib import Path 9 | 10 | import numpy as np 11 | 12 | 13 | def dict_labels(labels_file_path: str, include_rgb=False) -> dict: 14 | """Creates a dictionary of labels from the input labels file. 15 | 16 | Args: 17 | labels_file: Path to file containing labels to map model outputs. 18 | include_rgb: Adds randomly generated RGB values to the values of the 19 | dictionary. Used for plotting bounding boxes of different colours. 20 | 21 | Returns: 22 | Dictionary with classification indices for keys and labels for values. 23 | 24 | Raises: 25 | FileNotFoundError: 26 | Provided `labels_file_path` does not exist. 27 | """ 28 | labels_file = Path(labels_file_path) 29 | if not labels_file.is_file(): 30 | raise FileNotFoundError( 31 | errno.ENOENT, os.strerror(errno.ENOENT), labels_file_path 32 | ) 33 | 34 | labels = {} 35 | with open(labels_file, "r") as f: 36 | for idx, line in enumerate(f, 0): 37 | if include_rgb: 38 | labels[idx] = line.strip("\n"), tuple(np.random.random(size=3) * 255) 39 | else: 40 | labels[idx] = line.strip("\n") 41 | return labels 42 | -------------------------------------------------------------------------------- /examples/armnn/face_age-gender/README.md: -------------------------------------------------------------------------------- 1 | # PyArmNN Human face age/gender recognition Sample Application 2 | 3 | ## Introduction 4 | This sample application guides the user and shows how to perform age/gender recognition using PyArmNN API. 5 | 6 | The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected faces and age/gender labels overlaid. 7 | 8 | ## Human face age/gender recognition from Video File 9 | Human face age/gender recognition demo that takes a video file, runs inference on each frame producing 10 | bounding boxes and labels around detected faces, and saves the processed video. 11 | 12 | Example usage: 13 | 14 | ```bash 15 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite --video_file_path ../samples/test_s.mp4 16 | ``` 17 | 18 | ## Human face age/gender recognition from Video Stream 19 | 20 | Human face age/gender recognition demo that takes a video stream from a device, runs inference 21 | on each frame producing bounding boxes and labels around detected faces, 22 | and displays a window with the latest processed frame. 23 | 24 | Example usage: 25 | 26 | ```bash 27 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite 28 | ``` 29 | 30 | This application has been verified to work against the YOLOv2 detection layer MobileNet models and MobileFaceNet keypoints detector, which can be downloaded from: 31 | 32 | https://files.seeedstudio.com/ml/age_gender_recognition_models.zip 33 | -------------------------------------------------------------------------------- /examples/armnn/face_age-gender/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | # Todo : BoundBox & its related method extraction 6 | class BoundBox: 7 | def __init__(self, x, y, w, h, c = None, classes = None): 8 | self.x = x 9 | self.y = y 10 | self.w = w 11 | self.h = h 12 | 13 | self.c = c 14 | self.classes = classes 15 | 16 | def get_label(self): 17 | return np.argmax(self.classes) 18 | 19 | def get_score(self): 20 | return self.classes[self.get_label()] 21 | 22 | def iou(self, bound_box): 23 | b1 = self.as_centroid() 24 | b2 = bound_box.as_centroid() 25 | return centroid_box_iou(b1, b2) 26 | 27 | def as_centroid(self): 28 | return np.array([self.x, self.y, self.w, self.h]) 29 | 30 | 31 | def boxes_to_array(bound_boxes): 32 | """ 33 | # Args 34 | boxes : list of BoundBox instances 35 | 36 | # Returns 37 | centroid_boxes : (N, 4) 38 | probs : (N, nb_classes) 39 | """ 40 | centroid_boxes = [] 41 | probs = [] 42 | for box in bound_boxes: 43 | centroid_boxes.append([box.x, box.y, box.w, box.h]) 44 | probs.append(box.classes) 45 | return np.array(centroid_boxes), np.array(probs) 46 | 47 | 48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3): 49 | """ 50 | # Args 51 | boxes : list of BoundBox 52 | 53 | # Returns 54 | boxes : list of BoundBox 55 | non maximum supressed BoundBox instances 56 | """ 57 | # suppress non-maximal boxes 58 | for c in range(n_classes): 59 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes]))) 60 | 61 | for i in range(len(sorted_indices)): 62 | index_i = sorted_indices[i] 63 | 64 | if boxes[index_i].classes[c] == 0: 65 | continue 66 | else: 67 | for j in range(i+1, len(sorted_indices)): 68 | index_j = sorted_indices[j] 69 | 70 | if boxes[index_i].iou(boxes[index_j]) >= nms_threshold: 71 | boxes[index_j].classes[c] = 0 72 | # remove the boxes which are less likely than a obj_threshold 73 | boxes = [box for box in boxes if box.get_score() > obj_threshold] 74 | return boxes 75 | 76 | 77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400): 78 | img_size = min(image.shape[:2]) 79 | if img_size < desired_size: 80 | scale_factor = float(desired_size) / img_size 81 | else: 82 | scale_factor = 1.0 83 | 84 | h, w = image.shape[:2] 85 | img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor))) 86 | if boxes != []: 87 | boxes_scaled = boxes*scale_factor 88 | boxes_scaled = boxes_scaled.astype(np.int) 89 | else: 90 | boxes_scaled = boxes 91 | return draw_boxes(img_scaled, boxes_scaled, probs, labels) 92 | 93 | 94 | def draw_boxes(image, boxes, probs, labels): 95 | for box, classes in zip(boxes, probs): 96 | x1, y1, x2, y2 = box 97 | cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3) 98 | cv2.putText(image, 99 | '{}: {:.2f}'.format(labels[np.argmax(classes)], classes.max()), 100 | (x1, y1 - 13), 101 | cv2.FONT_HERSHEY_SIMPLEX, 102 | 1e-3 * image.shape[0], 103 | (0,255,0), 2) 104 | return image 105 | 106 | 107 | def centroid_box_iou(box1, box2): 108 | def _interval_overlap(interval_a, interval_b): 109 | x1, x2 = interval_a 110 | x3, x4 = interval_b 111 | 112 | if x3 < x1: 113 | if x4 < x1: 114 | return 0 115 | else: 116 | return min(x2,x4) - x1 117 | else: 118 | if x2 < x3: 119 | return 0 120 | else: 121 | return min(x2,x4) - x3 122 | 123 | _, _, w1, h1 = box1.reshape(-1,) 124 | _, _, w2, h2 = box2.reshape(-1,) 125 | x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,) 126 | x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,) 127 | 128 | intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max]) 129 | intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max]) 130 | intersect = intersect_w * intersect_h 131 | union = w1 * h1 + w2 * h2 - intersect 132 | 133 | return float(intersect) / union 134 | 135 | 136 | def to_centroid(minmax_boxes): 137 | """ 138 | minmax_boxes : (N, 4) 139 | """ 140 | minmax_boxes = minmax_boxes.astype(np.float) 141 | centroid_boxes = np.zeros_like(minmax_boxes) 142 | 143 | x1 = minmax_boxes[:,0] 144 | y1 = minmax_boxes[:,1] 145 | x2 = minmax_boxes[:,2] 146 | y2 = minmax_boxes[:,3] 147 | 148 | centroid_boxes[:,0] = (x1 + x2) / 2 149 | centroid_boxes[:,1] = (y1 + y2) / 2 150 | centroid_boxes[:,2] = x2 - x1 151 | centroid_boxes[:,3] = y2 - y1 152 | return centroid_boxes 153 | 154 | def to_minmax(centroid_boxes): 155 | centroid_boxes = centroid_boxes.astype(np.float) 156 | minmax_boxes = np.zeros_like(centroid_boxes) 157 | 158 | cx = centroid_boxes[:,0] 159 | cy = centroid_boxes[:,1] 160 | w = centroid_boxes[:,2] 161 | h = centroid_boxes[:,3] 162 | 163 | minmax_boxes[:,0] = cx - w/2 164 | minmax_boxes[:,1] = cy - h/2 165 | minmax_boxes[:,2] = cx + w/2 166 | minmax_boxes[:,3] = cy + h/2 167 | return minmax_boxes 168 | 169 | def create_anchor_boxes(anchors): 170 | """ 171 | # Args 172 | anchors : list of floats 173 | # Returns 174 | boxes : array, shape of (len(anchors)/2, 4) 175 | centroid-type 176 | """ 177 | boxes = [] 178 | n_boxes = int(len(anchors)/2) 179 | for i in range(n_boxes): 180 | boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]])) 181 | return np.array(boxes) 182 | 183 | def find_match_box(centroid_box, centroid_boxes): 184 | """Find the index of the boxes with the largest overlap among the N-boxes. 185 | # Args 186 | box : array, shape of (1, 4) 187 | boxes : array, shape of (N, 4) 188 | 189 | # Return 190 | match_index : int 191 | """ 192 | match_index = -1 193 | max_iou = -1 194 | 195 | for i, box in enumerate(centroid_boxes): 196 | iou = centroid_box_iou(centroid_box, box) 197 | 198 | if max_iou < iou: 199 | match_index = i 200 | max_iou = iou 201 | return match_index 202 | -------------------------------------------------------------------------------- /examples/armnn/face_age-gender/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.2 2 | tqdm>=4.47.0 3 | -------------------------------------------------------------------------------- /examples/armnn/face_age-gender/run_video_file.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | """ 6 | Human face age/gender recognition demo that takes a video file, runs inference on each frame producing 7 | bounding boxes and labels around detected faces, and saves the processed video. 8 | 9 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite --video_file_path ../samples/test_s.mp4 10 | 11 | """ 12 | 13 | import os 14 | import sys 15 | import time 16 | script_dir = os.path.dirname(__file__) 17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 18 | 19 | import cv2 20 | import numpy as np 21 | from tqdm import tqdm 22 | from argparse import ArgumentParser 23 | 24 | from yolov2 import yolo_processing, yolo_resize_factor 25 | from cv_utils import init_video_file_capture, resize_with_aspect_ratio, preprocess, preprocess_array 26 | from network_executor import ArmnnNetworkExecutor 27 | 28 | import pyarmnn as ann 29 | 30 | gender_list = ["female","male"] 31 | age_list = ["0-10","11-20","21-45","46-60","60-100"] 32 | 33 | def process_faces(frame, detections, executor_age_gender, resize_factor): 34 | global age_list, gender_list 35 | 36 | result_list = [] 37 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 38 | 39 | for detection in detections: 40 | box = detection[1].copy() 41 | for i in range(len(box)): 42 | box[i] = int(box[i] * resize_factor) 43 | 44 | frame_height, frame_width = frame.shape[:2] 45 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3] 46 | 47 | # Ensure box stays within the frame 48 | x_min, y_min = max(0, x_min), max(0, y_min) 49 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 50 | 51 | face_img = frame[y_min:y_max, x_min:x_max] 52 | face_img = cv2.resize(face_img, (128, 128)) 53 | 54 | face_img = face_img.astype(np.float32) 55 | face_img = preprocess_array(face_img) 56 | 57 | input_tensors = ann.make_input_tensors([executor_age_gender.input_binding_info], [face_img]) 58 | 59 | result = executor_age_gender.run(input_tensors) 60 | gender = gender_list[np.argmax(result[0][0])] 61 | age = age_list[np.argmax(result[1][0])] 62 | 63 | result_list.append([gender, age]) 64 | 65 | return result_list 66 | 67 | def draw_result(frame: np.ndarray, detections: list, resize_factor, face_data): 68 | """ 69 | Draws bounding boxes around detected objects and adds a label and confidence score. 70 | 71 | Args: 72 | frame: The original captured frame from video source. 73 | detections: A list of detected objects in the form [class, [box positions], confidence]. 74 | resize_factor: Resizing factor to scale box coordinates to output frame size. 75 | face_data: List containing information about age and gender 76 | """ 77 | for i in range(len(detections)): 78 | class_idx, box, confidence = [d for d in detections[i]] 79 | color = (255, 0, 0) if face_data[i][0] == 'male' else (0, 0, 255) 80 | 81 | # Obtain frame size and resized bounding box positions 82 | frame_height, frame_width = frame.shape[:2] 83 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box] 84 | 85 | # Ensure box stays within the frame 86 | x_min, y_min = max(0, x_min), max(0, y_min) 87 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 88 | 89 | # Draw bounding box around detected object 90 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 91 | 92 | # Create label for detected object class 93 | label = "Gender: {}, Age: {}".format(face_data[i][0], face_data[i][1]) 94 | label_color = (255, 255, 255) 95 | 96 | # Make sure label always stays on-screen 97 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 98 | 99 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 100 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min) 101 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 102 | 103 | # Add label and confidence value 104 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 105 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA) 106 | 107 | 108 | def main(args): 109 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'age_gender_demo') 110 | frame_num = len(frame_count) 111 | 112 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends) 113 | executor_age_gender = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends) 114 | 115 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info) 116 | 117 | times = [] 118 | 119 | for _ in tqdm(frame_count, desc='Processing frames'): 120 | frame_present, frame = video.read() 121 | if not frame_present: 122 | continue 123 | 124 | input_tensors = preprocess(frame, executor_fd.input_binding_info) 125 | 126 | start_time = time.time() # measure only inference and intermediary processing times 127 | output_result = executor_fd.run(input_tensors) 128 | detections = process_output(output_result) 129 | face_data = process_faces(frame, detections, executor_age_gender, resize_factor) 130 | end_time = (time.time() - start_time)*1000 131 | 132 | draw_result(frame, detections, resize_factor, face_data) 133 | 134 | times.append(end_time) 135 | video_writer.write(frame) 136 | 137 | print('Finished processing frames') 138 | video.release(), video_writer.release() 139 | 140 | print("Average time(ms): ", sum(times)//frame_num) 141 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames 142 | 143 | if __name__ == '__main__': 144 | parser = ArgumentParser() 145 | parser.add_argument('--video_file_path', required=True, type=str, 146 | help='Path to the video file to run object detection on') 147 | 148 | parser.add_argument('--first_model_file_path', required=True, type=str, 149 | help='Path to the first stage model to use') 150 | parser.add_argument('--second_model_file_path', required=True, type=str, 151 | help='Path to the second stage model to use') 152 | 153 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'], 154 | help='Takes the preferred backends in preference order, separated by whitespace, ' 155 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. ' 156 | 'Defaults to [CpuAcc, CpuRef]') 157 | args = parser.parse_args() 158 | main(args) 159 | -------------------------------------------------------------------------------- /examples/armnn/face_age-gender/run_video_stream.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | """ 6 | Human face age/gender recognition demo that takes a video stream from a device, runs inference 7 | on each frame producing bounding boxes and labels around detected faces, 8 | and displays a window with the latest processed frame. 9 | 10 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileNet-v1-age-gender.tflite 11 | 12 | """ 13 | 14 | import os 15 | import sys 16 | import time 17 | script_dir = os.path.dirname(__file__) 18 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 19 | 20 | import cv2 21 | import numpy as np 22 | from tqdm import tqdm 23 | from argparse import ArgumentParser 24 | 25 | from yolov2 import yolo_processing, yolo_resize_factor 26 | 27 | from cv_utils import init_video_stream_capture, resize_with_aspect_ratio, preprocess, preprocess_array 28 | from network_executor import ArmnnNetworkExecutor 29 | import pyarmnn as ann 30 | 31 | gender_list = ["female","male"] 32 | age_list = ["0-10","11-20","21-45","46-60","60-100"] 33 | 34 | def process_faces(frame, detections, executor_age_gender, resize_factor): 35 | global age_list, gender_list 36 | 37 | result_list = [] 38 | 39 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 40 | 41 | for detection in detections: 42 | box = detection[1].copy() 43 | for i in range(len(box)): 44 | box[i] = int(box[i] * resize_factor) 45 | 46 | frame_height, frame_width = frame.shape[:2] 47 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3] 48 | 49 | # Ensure box stays within the frame 50 | x_min, y_min = max(0, x_min), max(0, y_min) 51 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 52 | 53 | face_img = frame[y_min:y_max, x_min:x_max] 54 | face_img = cv2.resize(face_img, (128, 128)) 55 | 56 | #cv2.imshow('PyArmNN Object Detection Demo face', face_img) 57 | 58 | face_img = face_img.astype(np.float32) 59 | face_img = preprocess_array(face_img) 60 | 61 | input_tensors = ann.make_input_tensors([executor_age_gender.input_binding_info], [face_img]) 62 | 63 | result = executor_age_gender.run(input_tensors) 64 | gender = gender_list[np.argmax(result[0][0])] 65 | age = age_list[np.argmax(result[1][0])] 66 | 67 | result_list.append([gender, age]) 68 | 69 | return result_list 70 | 71 | def draw_result(frame: np.ndarray, detections: list, resize_factor, face_data): 72 | """ 73 | Draws bounding boxes around detected objects and adds a label and confidence score. 74 | 75 | Args: 76 | frame: The original captured frame from video source. 77 | detections: A list of detected objects in the form [class, [box positions], confidence]. 78 | resize_factor: Resizing factor to scale box coordinates to output frame size. 79 | face_data: List containing information about age and gender 80 | """ 81 | 82 | for i in range(len(detections)): 83 | class_idx, box, confidence = [d for d in detections[i]] 84 | color = (255, 0, 0) if face_data[i][0] == 'male' else (0, 0, 255) 85 | 86 | # Obtain frame size and resized bounding box positions 87 | frame_height, frame_width = frame.shape[:2] 88 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box] 89 | 90 | # Ensure box stays within the frame 91 | x_min, y_min = max(0, x_min), max(0, y_min) 92 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 93 | 94 | # Draw bounding box around detected object 95 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 96 | 97 | # Create label for detected object class 98 | label = "Gender: {}, Age: {}".format(face_data[i][0], face_data[i][1]) 99 | label_color = (255, 255, 255) 100 | 101 | # Make sure label always stays on-screen 102 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 103 | 104 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 105 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min) 106 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 107 | 108 | # Add label and confidence value 109 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 110 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA) 111 | 112 | 113 | def main(args): 114 | video = init_video_stream_capture(args.video_source) 115 | 116 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends) 117 | executor_age_gender = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends) 118 | 119 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info) 120 | 121 | while True: 122 | 123 | frame_present, frame = video.read() 124 | frame = cv2.flip(frame, 1) # Horizontally flip the frame 125 | if not frame_present: 126 | raise RuntimeError('Error reading frame from video stream') 127 | input_tensors = preprocess(frame, executor_fd.input_binding_info) 128 | print("Running inference...") 129 | 130 | start_time = time.time() # start time of the inference 131 | output_result = executor_fd.run(input_tensors) 132 | detections = process_output(output_result) 133 | face_data = process_faces(frame, detections, executor_age_gender, resize_factor) 134 | 135 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop 136 | print("Time(ms): ", (time.time() - start_time)*1000) 137 | 138 | draw_result(frame, detections, resize_factor, face_data) 139 | cv2.imshow('PyArmNN Object Detection Demo', frame) 140 | 141 | if cv2.waitKey(1) == 27: 142 | print('\nExit key activated. Closing video...') 143 | break 144 | video.release(), cv2.destroyAllWindows() 145 | 146 | 147 | if __name__ == '__main__': 148 | parser = ArgumentParser() 149 | parser.add_argument('--video_source', type=int, default=0, 150 | help='Device index to access video stream. Defaults to primary device camera at index 0') 151 | 152 | parser.add_argument('--first_model_file_path', required=True, type=str, 153 | help='Path to the first stage model to use') 154 | parser.add_argument('--second_model_file_path', required=True, type=str, 155 | help='Path to the second stage model to use') 156 | 157 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'], 158 | help='Takes the preferred backends in preference order, separated by whitespace, ' 159 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. ' 160 | 'Defaults to [CpuAcc, CpuRef]') 161 | args = parser.parse_args() 162 | main(args) 163 | -------------------------------------------------------------------------------- /examples/armnn/face_age-gender/yolov2.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | """ 5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models. 6 | """ 7 | 8 | import cv2 9 | import numpy as np 10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes 11 | 12 | 13 | def yolo_processing(netout): 14 | anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025] 15 | nms_threshold=0.2 16 | """Convert Yolo network output to bounding box 17 | 18 | # Args 19 | netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes) 20 | YOLO neural network output array 21 | 22 | # Returns 23 | boxes : array, shape of (N, 4) 24 | coordinate scale is normalized [0, 1] 25 | probs : array, shape of (N, nb_classes) 26 | """ 27 | netout = netout[0].reshape(7,7,5,6) 28 | grid_h, grid_w, nb_box = netout.shape[:3] 29 | boxes = [] 30 | 31 | # decode the output by the network 32 | netout[..., 4] = _sigmoid(netout[..., 4]) 33 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) 34 | netout[..., 5:] *= netout[..., 5:] > 0.3 35 | 36 | for row in range(grid_h): 37 | for col in range(grid_w): 38 | for b in range(nb_box): 39 | # from 4th element onwards are confidence and class classes 40 | classes = netout[row,col,b,5:] 41 | 42 | if np.sum(classes) > 0: 43 | # first 4 elements are x, y, w, and h 44 | x, y, w, h = netout[row,col,b,:4] 45 | 46 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width 47 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height 48 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width 49 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height 50 | confidence = netout[row,col,b,4] 51 | box = BoundBox(x, y, w, h, confidence, classes) 52 | boxes.append(box) 53 | 54 | boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3) 55 | boxes, probs = boxes_to_array(boxes) 56 | #print(boxes) 57 | predictions = [] 58 | def _to_original_scale(boxes): 59 | minmax_boxes = to_minmax(boxes) 60 | minmax_boxes[:,0] *= 224 61 | minmax_boxes[:,2] *= 224 62 | minmax_boxes[:,1] *= 224 63 | minmax_boxes[:,3] *= 224 64 | return minmax_boxes.astype(np.int) 65 | 66 | if len(boxes) > 0: 67 | boxes = _to_original_scale(boxes) 68 | 69 | for i in range(len(boxes)): 70 | predictions.append([0, boxes[i], probs[i][0]]) 71 | 72 | return predictions 73 | 74 | def _sigmoid(x): 75 | return 1. / (1. + np.exp(-x)) 76 | 77 | def _softmax(x, axis=-1, t=-100.): 78 | x = x - np.max(x) 79 | if np.min(x) < t: 80 | x = x/np.min(x)*t 81 | e_x = np.exp(x) 82 | return e_x / e_x.sum(axis, keepdims=True) 83 | 84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple): 85 | """ 86 | Gets a multiplier to scale the bounding box positions to 87 | their correct position in the frame. 88 | 89 | Args: 90 | video: Video capture object, contains information about data source. 91 | input_binding_info: Contains shape of model input layer. 92 | 93 | Returns: 94 | Resizing factor to scale box coordinates to output frame size. 95 | """ 96 | frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT) 97 | frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH) 98 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3] 99 | return max(frame_height, frame_width) / max(model_height, model_width) 100 | -------------------------------------------------------------------------------- /examples/armnn/face_keypoints/README.md: -------------------------------------------------------------------------------- 1 | # PyArmNN Face keypoint detection Sample Application 2 | 3 | ## Introduction 4 | This sample application guides the user and shows how to perform face keypoint detection using PyArmNN API. 5 | 6 | The application takes a model and video file or camera feed as input, runs inference on each frame, and draws bounding boxes around detected faces and five keypoints(left eye, right eye, nose, left corner of the mouth, right corner of the mouth) with the corresponding labels and confidence scores overlaid. 7 | 8 | ## Face keypoint detection from Video File 9 | Face keypoint detection demo that takes a video file, runs inference on each frame producing 10 | bounding boxes and five keypoints on detected faces, and saves the processed video. 11 | 12 | Example usage: 13 | 14 | ```bash 15 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite --video_file_path ../samples/test_s.mp4 16 | ``` 17 | 18 | ## Face keypoint detection from Video Stream 19 | 20 | Face keypoint detection demo that takes a video file, takes a video stream from a device, runs inference 21 | on each frame producing bounding boxes and five keypoints on detected faces, and displays a window with the latest processed frame. 22 | 23 | Example usage: 24 | 25 | ```bash 26 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite 27 | ``` 28 | 29 | This application has been verified to work against the YOLOv2 detection layer MobileNet models and MobileFaceNet keypoints detector, which can be downloaded from: 30 | 31 | https://files.seeedstudio.com/ml/keypoint_detection_models.zip 32 | -------------------------------------------------------------------------------- /examples/armnn/face_keypoints/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | # Todo : BoundBox & its related method extraction 6 | class BoundBox: 7 | def __init__(self, x, y, w, h, c = None, classes = None): 8 | self.x = x 9 | self.y = y 10 | self.w = w 11 | self.h = h 12 | 13 | self.c = c 14 | self.classes = classes 15 | 16 | def get_label(self): 17 | return np.argmax(self.classes) 18 | 19 | def get_score(self): 20 | return self.classes[self.get_label()] 21 | 22 | def iou(self, bound_box): 23 | b1 = self.as_centroid() 24 | b2 = bound_box.as_centroid() 25 | return centroid_box_iou(b1, b2) 26 | 27 | def as_centroid(self): 28 | return np.array([self.x, self.y, self.w, self.h]) 29 | 30 | 31 | def boxes_to_array(bound_boxes): 32 | """ 33 | # Args 34 | boxes : list of BoundBox instances 35 | 36 | # Returns 37 | centroid_boxes : (N, 4) 38 | probs : (N, nb_classes) 39 | """ 40 | centroid_boxes = [] 41 | probs = [] 42 | for box in bound_boxes: 43 | centroid_boxes.append([box.x, box.y, box.w, box.h]) 44 | probs.append(box.classes) 45 | return np.array(centroid_boxes), np.array(probs) 46 | 47 | 48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3): 49 | """ 50 | # Args 51 | boxes : list of BoundBox 52 | 53 | # Returns 54 | boxes : list of BoundBox 55 | non maximum supressed BoundBox instances 56 | """ 57 | # suppress non-maximal boxes 58 | for c in range(n_classes): 59 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes]))) 60 | 61 | for i in range(len(sorted_indices)): 62 | index_i = sorted_indices[i] 63 | 64 | if boxes[index_i].classes[c] == 0: 65 | continue 66 | else: 67 | for j in range(i+1, len(sorted_indices)): 68 | index_j = sorted_indices[j] 69 | 70 | if boxes[index_i].iou(boxes[index_j]) >= nms_threshold: 71 | boxes[index_j].classes[c] = 0 72 | # remove the boxes which are less likely than a obj_threshold 73 | boxes = [box for box in boxes if box.get_score() > obj_threshold] 74 | return boxes 75 | 76 | 77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400): 78 | img_size = min(image.shape[:2]) 79 | if img_size < desired_size: 80 | scale_factor = float(desired_size) / img_size 81 | else: 82 | scale_factor = 1.0 83 | 84 | h, w = image.shape[:2] 85 | img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor))) 86 | if boxes != []: 87 | boxes_scaled = boxes*scale_factor 88 | boxes_scaled = boxes_scaled.astype(np.int) 89 | else: 90 | boxes_scaled = boxes 91 | return draw_boxes(img_scaled, boxes_scaled, probs, labels) 92 | 93 | 94 | def draw_boxes(image, boxes, probs, labels): 95 | for box, classes in zip(boxes, probs): 96 | x1, y1, x2, y2 = box 97 | cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3) 98 | cv2.putText(image, 99 | '{}: {:.2f}'.format(labels[np.argmax(classes)], classes.max()), 100 | (x1, y1 - 13), 101 | cv2.FONT_HERSHEY_SIMPLEX, 102 | 1e-3 * image.shape[0], 103 | (0,255,0), 2) 104 | return image 105 | 106 | 107 | def centroid_box_iou(box1, box2): 108 | def _interval_overlap(interval_a, interval_b): 109 | x1, x2 = interval_a 110 | x3, x4 = interval_b 111 | 112 | if x3 < x1: 113 | if x4 < x1: 114 | return 0 115 | else: 116 | return min(x2,x4) - x1 117 | else: 118 | if x2 < x3: 119 | return 0 120 | else: 121 | return min(x2,x4) - x3 122 | 123 | _, _, w1, h1 = box1.reshape(-1,) 124 | _, _, w2, h2 = box2.reshape(-1,) 125 | x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,) 126 | x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,) 127 | 128 | intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max]) 129 | intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max]) 130 | intersect = intersect_w * intersect_h 131 | union = w1 * h1 + w2 * h2 - intersect 132 | 133 | return float(intersect) / union 134 | 135 | 136 | def to_centroid(minmax_boxes): 137 | """ 138 | minmax_boxes : (N, 4) 139 | """ 140 | minmax_boxes = minmax_boxes.astype(np.float) 141 | centroid_boxes = np.zeros_like(minmax_boxes) 142 | 143 | x1 = minmax_boxes[:,0] 144 | y1 = minmax_boxes[:,1] 145 | x2 = minmax_boxes[:,2] 146 | y2 = minmax_boxes[:,3] 147 | 148 | centroid_boxes[:,0] = (x1 + x2) / 2 149 | centroid_boxes[:,1] = (y1 + y2) / 2 150 | centroid_boxes[:,2] = x2 - x1 151 | centroid_boxes[:,3] = y2 - y1 152 | return centroid_boxes 153 | 154 | def to_minmax(centroid_boxes): 155 | centroid_boxes = centroid_boxes.astype(np.float) 156 | minmax_boxes = np.zeros_like(centroid_boxes) 157 | 158 | cx = centroid_boxes[:,0] 159 | cy = centroid_boxes[:,1] 160 | w = centroid_boxes[:,2] 161 | h = centroid_boxes[:,3] 162 | 163 | minmax_boxes[:,0] = cx - w/2 164 | minmax_boxes[:,1] = cy - h/2 165 | minmax_boxes[:,2] = cx + w/2 166 | minmax_boxes[:,3] = cy + h/2 167 | return minmax_boxes 168 | 169 | def create_anchor_boxes(anchors): 170 | """ 171 | # Args 172 | anchors : list of floats 173 | # Returns 174 | boxes : array, shape of (len(anchors)/2, 4) 175 | centroid-type 176 | """ 177 | boxes = [] 178 | n_boxes = int(len(anchors)/2) 179 | for i in range(n_boxes): 180 | boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]])) 181 | return np.array(boxes) 182 | 183 | def find_match_box(centroid_box, centroid_boxes): 184 | """Find the index of the boxes with the largest overlap among the N-boxes. 185 | # Args 186 | box : array, shape of (1, 4) 187 | boxes : array, shape of (N, 4) 188 | 189 | # Return 190 | match_index : int 191 | """ 192 | match_index = -1 193 | max_iou = -1 194 | 195 | for i, box in enumerate(centroid_boxes): 196 | iou = centroid_box_iou(centroid_box, box) 197 | 198 | if max_iou < iou: 199 | match_index = i 200 | max_iou = iou 201 | return match_index 202 | -------------------------------------------------------------------------------- /examples/armnn/face_keypoints/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.2 2 | tqdm>=4.47.0 3 | -------------------------------------------------------------------------------- /examples/armnn/face_keypoints/run_video_file.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | """ 6 | Face keypoint detection demo that takes a video file, runs inference on each frame producing 7 | bounding boxes and five keypoints on detected faces, and saves the processed video. 8 | 9 | python3 run_video_file.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite --video_file_path ../samples/test_s.mp4 10 | 11 | """ 12 | 13 | import os 14 | import sys 15 | import time 16 | script_dir = os.path.dirname(__file__) 17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 18 | 19 | import cv2 20 | import numpy as np 21 | from tqdm import tqdm 22 | from argparse import ArgumentParser 23 | 24 | from yolov2 import yolo_processing, yolo_resize_factor 25 | from utils import dict_labels 26 | from cv_utils import init_video_file_capture, resize_with_aspect_ratio, preprocess, preprocess_array 27 | from network_executor import ArmnnNetworkExecutor 28 | import pyarmnn as ann 29 | 30 | def process_faces(frame, detections, executor_kp, resize_factor): 31 | kpts_list = [] 32 | 33 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 34 | 35 | for detection in detections: 36 | box = detection[1].copy() 37 | for i in range(len(box)): 38 | box[i] = int(box[i] * resize_factor) 39 | 40 | x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1] 41 | frame_height, frame_width = frame.shape[:2] 42 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3] 43 | # Ensure box stays within the frame 44 | x_min, y_min = max(0, x_min), max(0, y_min) 45 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 46 | 47 | face_img = frame[y_min:y_max, x_min:x_max] 48 | face_img = cv2.resize(face_img, (128, 128)) 49 | 50 | face_img = face_img.astype(np.float32) 51 | face_img = preprocess_array(face_img) 52 | 53 | input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img]) 54 | 55 | plist = executor_kp.run(input_tensors)[0][0] 56 | 57 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5)) 58 | re = (x + int(plist[2] * w), y + int(plist[3] * h)) 59 | n = (x + int(plist[4] * w), y + int(plist[5] * h)) 60 | lm = (x + int(plist[6] * w), y + int(plist[7] * h)) 61 | rm = (x + int(plist[8] * w), y + int(plist[9] * h)) 62 | kpts = [le, re, n, lm, rm] 63 | 64 | kpts_list.append(kpts) 65 | 66 | return kpts_list 67 | 68 | def draw_result(frame: np.ndarray, detections: list, resize_factor, kpts): 69 | """ 70 | Draws bounding boxes around detected objects and adds a label and confidence score. 71 | 72 | Args: 73 | frame: The original captured frame from video source. 74 | detections: A list of detected objects in the form [class, [box positions], confidence]. 75 | resize_factor: Resizing factor to scale box coordinates to output frame size. 76 | kpts: List containing information about face keypoints in format [[le, re, n, lm, rm], [le, re, n, lm, rm], ...] 77 | """ 78 | for i in range(len(detections)): 79 | class_idx, box, confidence = [d for d in detections[i]] 80 | label, color = 'Person', (0, 255, 0) 81 | 82 | # Obtain frame size and resized bounding box positions 83 | frame_height, frame_width = frame.shape[:2] 84 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box] 85 | 86 | # Ensure box stays within the frame 87 | x_min, y_min = max(0, x_min), max(0, y_min) 88 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 89 | 90 | # Draw bounding box around detected object 91 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 92 | 93 | # Create label for detected object class 94 | label = f'{label} {confidence * 100:.1f}%' 95 | label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255) 96 | 97 | # Make sure label always stays on-screen 98 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 99 | 100 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 101 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min) 102 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 103 | 104 | # Add label and confidence value 105 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 106 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, 107 | label_color, 1, cv2.LINE_AA) 108 | 109 | for kpt in kpts[i]: 110 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (0, 0, 255), 5) 111 | 112 | 113 | def main(args): 114 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_keypoint_demo') 115 | frame_num = len(frame_count) 116 | 117 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends) 118 | executor_kp = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends) 119 | 120 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info) 121 | 122 | times = [] 123 | 124 | for _ in tqdm(frame_count, desc='Processing frames'): 125 | frame_present, frame = video.read() 126 | if not frame_present: 127 | continue 128 | 129 | input_tensors = preprocess(frame, executor_fd.input_binding_info) 130 | 131 | start_time = time.time() # start time of the loop 132 | output_result = executor_fd.run(input_tensors) 133 | detections = process_output(output_result) 134 | kpts = process_faces(frame, detections, executor_kp, resize_factor) 135 | end_time = (time.time() - start_time)*1000 136 | 137 | draw_result(frame, detections, resize_factor, kpts) 138 | times.append(end_time) 139 | video_writer.write(frame) 140 | 141 | print('Finished processing frames') 142 | video.release(), video_writer.release() 143 | 144 | print("Average time(ms): ", sum(times)//frame_num) 145 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop 146 | 147 | if __name__ == '__main__': 148 | parser = ArgumentParser() 149 | parser.add_argument('--video_file_path', required=True, type=str, 150 | help='Path to the video file to run object detection on') 151 | 152 | parser.add_argument('--first_model_file_path', required=True, type=str, 153 | help='Path to the first stage model to use') 154 | parser.add_argument('--second_model_file_path', required=True, type=str, 155 | help='Path to the second stage model to use') 156 | 157 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'], 158 | help='Takes the preferred backends in preference order, separated by whitespace, ' 159 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. ' 160 | 'Defaults to [CpuAcc, CpuRef]') 161 | args = parser.parse_args() 162 | main(args) 163 | -------------------------------------------------------------------------------- /examples/armnn/face_keypoints/run_video_stream.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | """ 6 | Face keypoint detection demo that takes a video file, takes a video stream from a device, runs inference 7 | on each frame producing bounding boxes and five keypoints on detected faces, and displays a window with the latest processed frame. 8 | 9 | DISPLAY=:0 python3 run_video_stream.py --first_model_file_path YOLO_best_mAP.tflite --second_model_file MobileFaceNet_kpts.tflite 10 | 11 | """ 12 | 13 | import os 14 | import sys 15 | import time 16 | script_dir = os.path.dirname(__file__) 17 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 18 | 19 | import cv2 20 | import numpy as np 21 | from tqdm import tqdm 22 | from argparse import ArgumentParser 23 | 24 | from yolov2 import yolo_processing, yolo_resize_factor 25 | 26 | from cv_utils import init_video_stream_capture, resize_with_aspect_ratio, preprocess, preprocess_array 27 | from network_executor import ArmnnNetworkExecutor 28 | import pyarmnn as ann 29 | 30 | def process_faces(frame, detections, executor_kp, resize_factor): 31 | kpts_list = [] 32 | 33 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 34 | 35 | for detection in detections: 36 | box = detection[1].copy() 37 | for i in range(len(box)): 38 | box[i] = int(box[i] * resize_factor) 39 | 40 | x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1] 41 | frame_height, frame_width = frame.shape[:2] 42 | x_min, y_min, x_max, y_max = box[0], box[1], box[2], box[3] 43 | # Ensure box stays within the frame 44 | x_min, y_min = max(0, x_min), max(0, y_min) 45 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 46 | 47 | face_img = frame[y_min:y_max, x_min:x_max] 48 | face_img = cv2.resize(face_img, (128, 128)) 49 | 50 | face_img = face_img.astype(np.float32) 51 | face_img = preprocess_array(face_img) 52 | 53 | input_tensors = ann.make_input_tensors([executor_kp.input_binding_info], [face_img]) 54 | 55 | plist = executor_kp.run(input_tensors)[0][0] 56 | 57 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5)) 58 | re = (x + int(plist[2] * w), y + int(plist[3] * h)) 59 | n = (x + int(plist[4] * w), y + int(plist[5] * h)) 60 | lm = (x + int(plist[6] * w), y + int(plist[7] * h)) 61 | rm = (x + int(plist[8] * w), y + int(plist[9] * h)) 62 | kpts = [le, re, n, lm, rm] 63 | 64 | kpts_list.append(kpts) 65 | 66 | return kpts_list 67 | 68 | def draw_result(frame: np.ndarray, detections: list, resize_factor, kpts): 69 | """ 70 | Draws bounding boxes around detected objects and adds a label and confidence score. 71 | 72 | Args: 73 | frame: The original captured frame from video source. 74 | detections: A list of detected objects in the form [class, [box positions], confidence]. 75 | resize_factor: Resizing factor to scale box coordinates to output frame size. 76 | kpts: List containing information about face keypoints in format [[le, re, n, lm, rm], [le, re, n, lm, rm], ...] 77 | """ 78 | 79 | for i in range(len(detections)): 80 | class_idx, box, confidence = [d for d in detections[i]] 81 | label, color = 'Person', (0, 255, 0) 82 | 83 | # Obtain frame size and resized bounding box positions 84 | frame_height, frame_width = frame.shape[:2] 85 | x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box] 86 | 87 | # Ensure box stays within the frame 88 | x_min, y_min = max(0, x_min), max(0, y_min) 89 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 90 | 91 | # Draw bounding box around detected object 92 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 93 | 94 | # Create label for detected object class 95 | label = f'{label} {confidence * 100:.1f}%' 96 | label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255) 97 | 98 | # Make sure label always stays on-screen 99 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 100 | 101 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 102 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min) 103 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 104 | 105 | # Add label and confidence value 106 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 107 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, 108 | label_color, 1, cv2.LINE_AA) 109 | 110 | for kpt in kpts[i]: 111 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (0, 0, 255), 5) 112 | 113 | def main(args): 114 | video = init_video_stream_capture(args.video_source) 115 | 116 | executor_fd = ArmnnNetworkExecutor(args.first_model_file_path, args.preferred_backends) 117 | executor_kp = ArmnnNetworkExecutor(args.second_model_file_path, args.preferred_backends) 118 | 119 | process_output, resize_factor = yolo_processing, yolo_resize_factor(video, executor_fd.input_binding_info) 120 | 121 | while True: 122 | 123 | frame_present, frame = video.read() 124 | frame = cv2.flip(frame, 1) # Horizontally flip the frame 125 | if not frame_present: 126 | raise RuntimeError('Error reading frame from video stream') 127 | input_tensors = preprocess(frame, executor_fd.input_binding_info) 128 | print("Running inference...") 129 | 130 | start_time = time.time() # start time of the loop 131 | output_result = executor_fd.run(input_tensors) 132 | detections = process_output(output_result) 133 | kpts = process_faces(frame, detections, executor_kp, resize_factor) 134 | 135 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop 136 | print("Time(ms): ", (time.time() - start_time)*1000) 137 | 138 | draw_result(frame, detections, resize_factor, kpts) 139 | cv2.imshow('PyArmNN Object Detection Demo', frame) 140 | 141 | if cv2.waitKey(1) == 27: 142 | print('\nExit key activated. Closing video...') 143 | break 144 | video.release(), cv2.destroyAllWindows() 145 | 146 | 147 | if __name__ == '__main__': 148 | parser = ArgumentParser() 149 | parser.add_argument('--video_source', type=int, default=0, 150 | help='Device index to access video stream. Defaults to primary device camera at index 0') 151 | 152 | parser.add_argument('--first_model_file_path', required=True, type=str, 153 | help='Path to the first stage model to use') 154 | parser.add_argument('--second_model_file_path', required=True, type=str, 155 | help='Path to the second stage model to use') 156 | 157 | parser.add_argument('--preferred_backends', type=str, nargs='+', default=['CpuAcc', 'CpuRef'], 158 | help='Takes the preferred backends in preference order, separated by whitespace, ' 159 | 'for example: CpuAcc GpuAcc CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. ' 160 | 'Defaults to [CpuAcc, CpuRef]') 161 | args = parser.parse_args() 162 | main(args) 163 | -------------------------------------------------------------------------------- /examples/armnn/face_keypoints/yolov2.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | """ 5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models. 6 | """ 7 | 8 | import cv2 9 | import numpy as np 10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes 11 | 12 | 13 | def yolo_processing(netout): 14 | anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025] 15 | nms_threshold=0.2 16 | """Convert Yolo network output to bounding box 17 | 18 | # Args 19 | netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes) 20 | YOLO neural network output array 21 | 22 | # Returns 23 | boxes : array, shape of (N, 4) 24 | coordinate scale is normalized [0, 1] 25 | probs : array, shape of (N, nb_classes) 26 | """ 27 | netout = netout[0].reshape(7,7,5,6) 28 | grid_h, grid_w, nb_box = netout.shape[:3] 29 | boxes = [] 30 | 31 | # decode the output by the network 32 | netout[..., 4] = _sigmoid(netout[..., 4]) 33 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) 34 | netout[..., 5:] *= netout[..., 5:] > 0.3 35 | 36 | for row in range(grid_h): 37 | for col in range(grid_w): 38 | for b in range(nb_box): 39 | # from 4th element onwards are confidence and class classes 40 | classes = netout[row,col,b,5:] 41 | 42 | if np.sum(classes) > 0: 43 | # first 4 elements are x, y, w, and h 44 | x, y, w, h = netout[row,col,b,:4] 45 | 46 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width 47 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height 48 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width 49 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height 50 | confidence = netout[row,col,b,4] 51 | box = BoundBox(x, y, w, h, confidence, classes) 52 | boxes.append(box) 53 | 54 | boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3) 55 | boxes, probs = boxes_to_array(boxes) 56 | #print(boxes) 57 | predictions = [] 58 | def _to_original_scale(boxes): 59 | minmax_boxes = to_minmax(boxes) 60 | minmax_boxes[:,0] *= 224 61 | minmax_boxes[:,2] *= 224 62 | minmax_boxes[:,1] *= 224 63 | minmax_boxes[:,3] *= 224 64 | return minmax_boxes.astype(np.int) 65 | 66 | if len(boxes) > 0: 67 | boxes = _to_original_scale(boxes) 68 | 69 | for i in range(len(boxes)): 70 | predictions.append([0, boxes[i], probs[i][0]]) 71 | 72 | return predictions 73 | 74 | def _sigmoid(x): 75 | return 1. / (1. + np.exp(-x)) 76 | 77 | def _softmax(x, axis=-1, t=-100.): 78 | x = x - np.max(x) 79 | if np.min(x) < t: 80 | x = x/np.min(x)*t 81 | e_x = np.exp(x) 82 | return e_x / e_x.sum(axis, keepdims=True) 83 | 84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple): 85 | """ 86 | Gets a multiplier to scale the bounding box positions to 87 | their correct position in the frame. 88 | 89 | Args: 90 | video: Video capture object, contains information about data source. 91 | input_binding_info: Contains shape of model input layer. 92 | 93 | Returns: 94 | Resizing factor to scale box coordinates to output frame size. 95 | """ 96 | frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT) 97 | frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH) 98 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3] 99 | return max(frame_height, frame_width) / max(model_height, model_width) 100 | -------------------------------------------------------------------------------- /examples/armnn/face_recognition/README.md: -------------------------------------------------------------------------------- 1 | # PyArmNN Face recognition Sample Application 2 | 3 | ## Introduction 4 | This sample application guides the user and shows how to perform face recognition using PyArmNN API. 5 | 6 | The application takes three models and video file or camera feed as input, runs inference on each frame producing bounding boxes and ID numbers corresponding to entries in database. 7 | 8 | ## Database population 9 | 10 | Before we can run face recognition, we need to exctract features from faces we want to recognize and save the features embedding vectors in encoded form in .json file, which serves as a small database. You can do that with calcuate_features.py. 11 | 12 | Example usage: 13 | 14 | ```bash 15 | python3 calculate_features.py --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite --db_file_path database.db --id 0 --name Paul --picture_file_path paul.png 16 | ``` 17 | 18 | ## Face recognition from Video File 19 | Face recognition demo that takes a video file, runs inference on each frame producing 20 | bounding boxes and ID numbers corresponding to entries in database, and saves the processed video. 21 | 22 | Example usage: 23 | 24 | ```bash 25 | python3 run_video_file.py --video_file_path test_s.mp4 --db_file_path database.db --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite 26 | ``` 27 | 28 | ## Face recognition from Video Stream 29 | 30 | Face recognition demo that takes a video stream from a device, runs inference 31 | on each frame producing bounding boxes and ID numbers corresponding to entries in database, 32 | and displays a window with the latest processed frame. 33 | 34 | Example usage: 35 | 36 | ```bash 37 | DISPLAY=:0 python3 run_video_stream.py --db_file_path database.db --fd_model_file_path ../face_rec_models/YOLOv2_best_mAP.tflite --kp_model_file_path ../face_rec_models/MobileFaceNet_kpts.tflite --fe_model_file_path ../face_rec_models/MobileFaceNet_features.tflite 38 | ``` 39 | 40 | This application has been verified to work against the YOLOv2 detection layer MobileNet models, MobileFaceNet keypoints detector and MobileFaceNet face feature embedding extractor which can be downloaded from: 41 | 42 | WIP 43 | -------------------------------------------------------------------------------- /examples/armnn/face_recognition/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | # Todo : BoundBox & its related method extraction 6 | class BoundBox: 7 | def __init__(self, x, y, w, h, c = None, classes = None): 8 | self.x = x 9 | self.y = y 10 | self.w = w 11 | self.h = h 12 | 13 | self.c = c 14 | self.classes = classes 15 | 16 | def get_label(self): 17 | return np.argmax(self.classes) 18 | 19 | def get_score(self): 20 | return self.classes[self.get_label()] 21 | 22 | def iou(self, bound_box): 23 | b1 = self.as_centroid() 24 | b2 = bound_box.as_centroid() 25 | return centroid_box_iou(b1, b2) 26 | 27 | def as_centroid(self): 28 | return np.array([self.x, self.y, self.w, self.h]) 29 | 30 | 31 | def boxes_to_array(bound_boxes): 32 | """ 33 | # Args 34 | boxes : list of BoundBox instances 35 | 36 | # Returns 37 | centroid_boxes : (N, 4) 38 | probs : (N, nb_classes) 39 | """ 40 | centroid_boxes = [] 41 | probs = [] 42 | for box in bound_boxes: 43 | centroid_boxes.append([box.x, box.y, box.w, box.h]) 44 | probs.append(box.classes) 45 | return np.array(centroid_boxes), np.array(probs) 46 | 47 | 48 | def nms_boxes(boxes, n_classes, nms_threshold=0.3, obj_threshold=0.3): 49 | """ 50 | # Args 51 | boxes : list of BoundBox 52 | 53 | # Returns 54 | boxes : list of BoundBox 55 | non maximum supressed BoundBox instances 56 | """ 57 | # suppress non-maximal boxes 58 | for c in range(n_classes): 59 | sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes]))) 60 | 61 | for i in range(len(sorted_indices)): 62 | index_i = sorted_indices[i] 63 | 64 | if boxes[index_i].classes[c] == 0: 65 | continue 66 | else: 67 | for j in range(i+1, len(sorted_indices)): 68 | index_j = sorted_indices[j] 69 | 70 | if boxes[index_i].iou(boxes[index_j]) >= nms_threshold: 71 | boxes[index_j].classes[c] = 0 72 | # remove the boxes which are less likely than a obj_threshold 73 | boxes = [box for box in boxes if box.get_score() > obj_threshold] 74 | return boxes 75 | 76 | 77 | def draw_scaled_boxes(image, boxes, probs, labels, desired_size=400): 78 | img_size = min(image.shape[:2]) 79 | if img_size < desired_size: 80 | scale_factor = float(desired_size) / img_size 81 | else: 82 | scale_factor = 1.0 83 | 84 | h, w = image.shape[:2] 85 | img_scaled = cv2.resize(image, (int(w*scale_factor), int(h*scale_factor))) 86 | if boxes != []: 87 | boxes_scaled = boxes*scale_factor 88 | boxes_scaled = boxes_scaled.astype(np.int) 89 | else: 90 | boxes_scaled = boxes 91 | return draw_boxes(img_scaled, boxes_scaled, probs, labels) 92 | 93 | 94 | def draw_boxes(image, boxes, probs, labels): 95 | for box, classes in zip(boxes, probs): 96 | x1, y1, x2, y2 = box 97 | cv2.rectangle(image, (x1,y1), (x2,y2), (0,255,0), 3) 98 | cv2.putText(image, 99 | '{}: {:.2f}'.format(labels[np.argmax(classes)], classes.max()), 100 | (x1, y1 - 13), 101 | cv2.FONT_HERSHEY_SIMPLEX, 102 | 1e-3 * image.shape[0], 103 | (0,255,0), 2) 104 | return image 105 | 106 | 107 | def centroid_box_iou(box1, box2): 108 | def _interval_overlap(interval_a, interval_b): 109 | x1, x2 = interval_a 110 | x3, x4 = interval_b 111 | 112 | if x3 < x1: 113 | if x4 < x1: 114 | return 0 115 | else: 116 | return min(x2,x4) - x1 117 | else: 118 | if x2 < x3: 119 | return 0 120 | else: 121 | return min(x2,x4) - x3 122 | 123 | _, _, w1, h1 = box1.reshape(-1,) 124 | _, _, w2, h2 = box2.reshape(-1,) 125 | x1_min, y1_min, x1_max, y1_max = to_minmax(box1.reshape(-1,4)).reshape(-1,) 126 | x2_min, y2_min, x2_max, y2_max = to_minmax(box2.reshape(-1,4)).reshape(-1,) 127 | 128 | intersect_w = _interval_overlap([x1_min, x1_max], [x2_min, x2_max]) 129 | intersect_h = _interval_overlap([y1_min, y1_max], [y2_min, y2_max]) 130 | intersect = intersect_w * intersect_h 131 | union = w1 * h1 + w2 * h2 - intersect 132 | 133 | return float(intersect) / union 134 | 135 | 136 | def to_centroid(minmax_boxes): 137 | """ 138 | minmax_boxes : (N, 4) 139 | """ 140 | minmax_boxes = minmax_boxes.astype(np.float) 141 | centroid_boxes = np.zeros_like(minmax_boxes) 142 | 143 | x1 = minmax_boxes[:,0] 144 | y1 = minmax_boxes[:,1] 145 | x2 = minmax_boxes[:,2] 146 | y2 = minmax_boxes[:,3] 147 | 148 | centroid_boxes[:,0] = (x1 + x2) / 2 149 | centroid_boxes[:,1] = (y1 + y2) / 2 150 | centroid_boxes[:,2] = x2 - x1 151 | centroid_boxes[:,3] = y2 - y1 152 | return centroid_boxes 153 | 154 | def to_minmax(centroid_boxes): 155 | centroid_boxes = centroid_boxes.astype(np.float) 156 | minmax_boxes = np.zeros_like(centroid_boxes) 157 | 158 | cx = centroid_boxes[:,0] 159 | cy = centroid_boxes[:,1] 160 | w = centroid_boxes[:,2] 161 | h = centroid_boxes[:,3] 162 | 163 | minmax_boxes[:,0] = cx - w/2 164 | minmax_boxes[:,1] = cy - h/2 165 | minmax_boxes[:,2] = cx + w/2 166 | minmax_boxes[:,3] = cy + h/2 167 | return minmax_boxes 168 | 169 | def create_anchor_boxes(anchors): 170 | """ 171 | # Args 172 | anchors : list of floats 173 | # Returns 174 | boxes : array, shape of (len(anchors)/2, 4) 175 | centroid-type 176 | """ 177 | boxes = [] 178 | n_boxes = int(len(anchors)/2) 179 | for i in range(n_boxes): 180 | boxes.append(np.array([0, 0, anchors[2*i], anchors[2*i+1]])) 181 | return np.array(boxes) 182 | 183 | def find_match_box(centroid_box, centroid_boxes): 184 | """Find the index of the boxes with the largest overlap among the N-boxes. 185 | # Args 186 | box : array, shape of (1, 4) 187 | boxes : array, shape of (N, 4) 188 | 189 | # Return 190 | match_index : int 191 | """ 192 | match_index = -1 193 | max_iou = -1 194 | 195 | for i, box in enumerate(centroid_boxes): 196 | iou = centroid_box_iou(centroid_box, box) 197 | 198 | if max_iou < iou: 199 | match_index = i 200 | max_iou = iou 201 | return match_index 202 | -------------------------------------------------------------------------------- /examples/armnn/face_recognition/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.2 2 | tqdm>=4.47.0 3 | scikit_image=>0.18.3 -------------------------------------------------------------------------------- /examples/armnn/face_recognition/yolov2.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # SPDX-License-Identifier: MIT 3 | 4 | """ 5 | Contains functions specific to decoding and processing inference results for YOLO V3 Tiny models. 6 | """ 7 | 8 | import cv2 9 | import numpy as np 10 | from box import BoundBox, nms_boxes, boxes_to_array, to_minmax, draw_boxes 11 | 12 | 13 | def yolo_processing(netout): 14 | anchors = [1.889, 2.5245, 2.9465, 3.94056, 3.99987, 5.3658, 5.155437, 6.92275, 6.718375, 9.01025] 15 | nms_threshold=0.2 16 | """Convert Yolo network output to bounding box 17 | 18 | # Args 19 | netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes) 20 | YOLO neural network output array 21 | 22 | # Returns 23 | boxes : array, shape of (N, 4) 24 | coordinate scale is normalized [0, 1] 25 | probs : array, shape of (N, nb_classes) 26 | """ 27 | netout = netout[0].reshape(7,7,5,6) 28 | grid_h, grid_w, nb_box = netout.shape[:3] 29 | boxes = [] 30 | 31 | # decode the output by the network 32 | netout[..., 4] = _sigmoid(netout[..., 4]) 33 | netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) 34 | netout[..., 5:] *= netout[..., 5:] > 0.3 35 | 36 | for row in range(grid_h): 37 | for col in range(grid_w): 38 | for b in range(nb_box): 39 | # from 4th element onwards are confidence and class classes 40 | classes = netout[row,col,b,5:] 41 | 42 | if np.sum(classes) > 0: 43 | # first 4 elements are x, y, w, and h 44 | x, y, w, h = netout[row,col,b,:4] 45 | 46 | x = (col + _sigmoid(x)) / grid_w # center position, unit: image width 47 | y = (row + _sigmoid(y)) / grid_h # center position, unit: image height 48 | w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width 49 | h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height 50 | confidence = netout[row,col,b,4] 51 | box = BoundBox(x, y, w, h, confidence, classes) 52 | boxes.append(box) 53 | 54 | boxes = nms_boxes(boxes, len(classes), nms_threshold, 0.3) 55 | boxes, probs = boxes_to_array(boxes) 56 | #print(boxes) 57 | predictions = [] 58 | def _to_original_scale(boxes): 59 | minmax_boxes = to_minmax(boxes) 60 | minmax_boxes[:,0] *= 224 61 | minmax_boxes[:,2] *= 224 62 | minmax_boxes[:,1] *= 224 63 | minmax_boxes[:,3] *= 224 64 | return minmax_boxes.astype(np.int) 65 | 66 | if len(boxes) > 0: 67 | boxes = _to_original_scale(boxes) 68 | 69 | for i in range(len(boxes)): 70 | predictions.append([0, boxes[i], probs[i][0]]) 71 | 72 | return predictions 73 | 74 | def _sigmoid(x): 75 | return 1. / (1. + np.exp(-x)) 76 | 77 | def _softmax(x, axis=-1, t=-100.): 78 | x = x - np.max(x) 79 | if np.min(x) < t: 80 | x = x/np.min(x)*t 81 | e_x = np.exp(x) 82 | return e_x / e_x.sum(axis, keepdims=True) 83 | 84 | def yolo_resize_factor(video: cv2.VideoCapture, input_binding_info: tuple): 85 | """ 86 | Gets a multiplier to scale the bounding box positions to 87 | their correct position in the frame. 88 | 89 | Args: 90 | video: Video capture object, contains information about data source. 91 | input_binding_info: Contains shape of model input layer. 92 | 93 | Returns: 94 | Resizing factor to scale box coordinates to output frame size. 95 | """ 96 | frame_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT) 97 | frame_width = video.get(cv2.CAP_PROP_FRAME_WIDTH) 98 | model_height, model_width = list(input_binding_info[1].GetShape())[1:3] 99 | return max(frame_height, frame_width) / max(model_height, model_width) 100 | -------------------------------------------------------------------------------- /examples/edge_impulse/multi_stage_inference_vehicle_type/multi_stage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cv2 4 | import os 5 | import sys, getopt 6 | import signal 7 | import time 8 | from edge_impulse_linux.image import ImageImpulseRunner 9 | 10 | show_camera = True 11 | 12 | def draw_result(frame, class_name, bb, confidence): 13 | """ 14 | Draws bounding boxes around detected objects and adds a label and confidence score. 15 | Args: 16 | frame: The original captured frame from video source. 17 | detections: A list of detected objects in the form [class, [box positions], confidence]. 18 | resize_factor: Resizing factor to scale box coordinates to output frame size. 19 | face_data: List containing information about age and gender 20 | """ 21 | color = (255, 0, 0) 22 | 23 | x_min, y_min, x_max, y_max = bb['x'], bb['y'], bb['x']+ bb['width'], bb['y']+ bb['height'] 24 | 25 | # Draw bounding box around detected object 26 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 27 | 28 | # Create label for detected object class 29 | label = "{}, {}".format(class_name, confidence) 30 | label_color = (255, 255, 255) 31 | 32 | # Make sure label always stays on-screen 33 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 34 | 35 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 36 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min) 37 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 38 | 39 | # Add label and confidence value 40 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 41 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA) 42 | 43 | 44 | def now(): 45 | return round(time.time() * 1000) 46 | 47 | def get_webcams(): 48 | port_ids = [] 49 | for port in range(5): 50 | print("Looking for a camera in port %s:" %port) 51 | camera = cv2.VideoCapture(port) 52 | if camera.isOpened(): 53 | ret = camera.read()[0] 54 | if ret: 55 | backendName =camera.getBackendName() 56 | w = camera.get(3) 57 | h = camera.get(4) 58 | print("Camera %s (%s x %s) found in port %s " %(backendName,h,w, port)) 59 | port_ids.append(port) 60 | camera.release() 61 | return port_ids 62 | 63 | def sigint_handler(sig, frame): 64 | print('Interrupted') 65 | if (runner): 66 | runner.stop() 67 | sys.exit(0) 68 | 69 | signal.signal(signal.SIGINT, sigint_handler) 70 | 71 | def help(): 72 | print('python classify.py ') 73 | 74 | def main(argv): 75 | try: 76 | opts, args = getopt.getopt(argv, "h", ["--help"]) 77 | except getopt.GetoptError: 78 | help() 79 | sys.exit(2) 80 | 81 | for opt, arg in opts: 82 | if opt in ('-h', '--help'): 83 | help() 84 | sys.exit() 85 | 86 | if len(args) == 0: 87 | help() 88 | sys.exit(2) 89 | 90 | def get_path(model_name): 91 | 92 | dir_path = os.path.dirname(os.path.realpath(__file__)) 93 | modelfile = os.path.join(dir_path, model_name) 94 | print('MODEL: ' + modelfile) 95 | return modelfile 96 | 97 | detection_model = get_path(args[0]) 98 | classification_model = get_path(args[1]) 99 | 100 | with ImageImpulseRunner(detection_model) as detection_runner, ImageImpulseRunner(classification_model) as classification_runner: 101 | 102 | detection_model_info = detection_runner.init() 103 | classification_model_info = classification_runner.init() 104 | 105 | print('Loaded detection model runner for "' + detection_model_info['project']['owner'] + ' / ' + detection_model_info['project']['name'] + '"') 106 | detection_labels = detection_model_info['model_parameters']['labels'] 107 | 108 | print('Loaded detection model runner for "' + classification_model_info['project']['owner'] + ' / ' + classification_model_info['project']['name'] + '"') 109 | classification_labels = classification_model_info['model_parameters']['labels'] 110 | 111 | class_model_input_height = classification_model_info['model_parameters']['image_input_height'] 112 | class_model_input_width = classification_model_info['model_parameters']['image_input_width'] 113 | 114 | if len(args)>= 3: 115 | videoCaptureDeviceId = int(args[2]) 116 | else: 117 | port_ids = get_webcams() 118 | if len(port_ids) == 0: 119 | raise Exception('Cannot find any webcams') 120 | if len(args)<= 1 and len(port_ids)> 1: 121 | raise Exception("Multiple cameras found. Add the camera port ID as a second argument to use to this script") 122 | videoCaptureDeviceId = int(port_ids[0]) 123 | 124 | camera = cv2.VideoCapture(videoCaptureDeviceId) 125 | 126 | ret = camera.read()[0] 127 | if ret: 128 | backendName = camera.getBackendName() 129 | w = camera.get(3) 130 | h = camera.get(4) 131 | print("Camera %s (%s x %s) in port %s selected." %(backendName,h,w, videoCaptureDeviceId)) 132 | camera.release() 133 | else: 134 | raise Exception("Couldn't initialize selected camera.") 135 | 136 | for det_res, img in detection_runner.classifier(videoCaptureDeviceId): 137 | print('Found %d bounding boxes (%d ms.)' % (len(det_res["result"]["bounding_boxes"]), det_res['timing']['dsp'] + det_res['timing']['classification'])) 138 | for bb in det_res["result"]["bounding_boxes"]: 139 | print('%s (%.2f): x=%d y=%d w=%d h=%d\n' % (bb['label'], bb['value'], bb['x'], bb['y'], bb['width'], bb['height'])) 140 | 141 | cropped_img = img[bb['y']:bb['y']+bb['height'], bb['x']:bb['x']+bb['width']] 142 | resized_img = cv2.resize(cropped_img, (class_model_input_width, class_model_input_height)) 143 | 144 | features, cropped = classification_runner.get_features_from_image(resized_img) 145 | 146 | # the image will be resized and cropped, save a copy of the picture here 147 | # so you can see what's being passed into the classifier 148 | #cv2.imwrite('debug.jpg', cropped) 149 | 150 | class_res = classification_runner.classify(features) 151 | 152 | if "classification" in class_res["result"].keys(): 153 | print('Classification result (%d ms.) \n' % (class_res['timing']['dsp'] + class_res['timing']['classification']), end='') 154 | top_score = 0 155 | top_label = '' 156 | 157 | for label in classification_labels: 158 | score = class_res['result']['classification'][label] 159 | print('%s: %.2f\n' % (label, score), end='') 160 | if score >= top_score: 161 | top_score = score 162 | top_label = label 163 | 164 | print('----------------------\n', flush=True) 165 | print('Top result: %s with confidence %.2f\n' % (top_label, top_score), end='') 166 | print('----------------------\n', flush=True) 167 | 168 | draw_result(img, top_label, bb, top_score) 169 | 170 | if (show_camera): 171 | cv2.imshow('edgeimpulse', img) 172 | if cv2.waitKey(1) == ord('q'): 173 | break 174 | 175 | detection_runner.stop() 176 | classification_runner.stop() 177 | 178 | if __name__ == "__main__": 179 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /examples/mediapipe/README.md: -------------------------------------------------------------------------------- 1 | # MediaPipe Sample Applications 2 | 3 | ## Introduction 4 | Google MediaPipe offers ready-to-use yet customizable Python solutions as a prebuilt Python package. 5 | 6 | We provide example scripts for performing inference from video file and video stream with `run_video_file.py` and `run_video_stream.py`. For detailed instructions execute ```run_video_file.py --help``` or ```run_video_stream.py --help``` 7 | 8 | ## Prerequisites 9 | 10 | ##### MediaPipe 11 | 12 | Before proceeding to the next steps, make sure that you have successfully installed the MediaPipe on your system by following the instructions in the README. 13 | 14 | You can verify that MediaPipe library is installed using: 15 | ```bash 16 | $ pip3 show mediapipe 17 | ``` 18 | 19 | ##### Dependencies 20 | 21 | Install the following libraries on your system: 22 | ```bash 23 | sudo apt install ffmpeg python3-opencv 24 | ``` 25 | 26 | Create a virtual environment: 27 | ```bash 28 | python3 -m venv devenv --system-site-packages 29 | source devenv/bin/activate 30 | ``` 31 | 32 | ### Python bindings for 32bit version 33 | 34 | ``` 35 | pip3 install mediapipe-rpi4 tqdm 36 | ``` 37 | 38 | ### Python bindings for 64bit version 39 | 40 | Pre-built wheels for Python 3.7 64bit OS were not available at the moment of writing of this article, so we compiled and shared them ourselves. 41 | 42 | ``` 43 | wget https://files.seeedstudio.com/ml/mediapipe/mediapipe-0.8-cp37-cp37m-linux_aarch64.whl 44 | pip3 install mediapipe-0.8-cp37-cp37m-linux_aarch64.whl 45 | pip3 install tqdm 46 | ``` 47 | -------------------------------------------------------------------------------- /examples/mediapipe/common/cv_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | """ 6 | This file contains helper functions for reading video/image data and 7 | pre/postprocessing of video/image data using OpenCV. 8 | """ 9 | 10 | import os 11 | 12 | import cv2 13 | import numpy as np 14 | 15 | def count_frames_manual(video): 16 | # initialize the total number of frames read 17 | total = 0 18 | # loop over the frames of the video 19 | while True: 20 | # grab the current frame 21 | (grabbed, frame) = video.read() 22 | 23 | # check to see if we have reached the end of the 24 | # video 25 | if not grabbed: 26 | break 27 | # increment the total number of frames read 28 | total += 1 29 | # return the total number of frames in the video file 30 | return total 31 | 32 | def create_video_writer(video: cv2.VideoCapture, video_path: str, name: str): 33 | """ 34 | Creates a video writer object to write processed frames to file. 35 | 36 | Args: 37 | video: Video capture object, contains information about data source. 38 | video_path: User-specified video file path. 39 | output_path: Optional path to save the processed video. 40 | 41 | Returns: 42 | Video writer object. 43 | """ 44 | _, ext = os.path.splitext(video_path) 45 | 46 | i, filename = 0, os.path.join(str(), f'{name}{ext}') 47 | 48 | while os.path.exists(filename): 49 | i += 1 50 | filename = os.path.join(str(), f'{name}({i}){ext}') 51 | print(filename) 52 | video_writer = cv2.VideoWriter(filename=filename, 53 | fourcc=get_source_encoding_int(video), 54 | fps=int(video.get(cv2.CAP_PROP_FPS)), 55 | frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), 56 | int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))) 57 | return video_writer 58 | 59 | 60 | def init_video_file_capture(video_path: str, name: str): 61 | """ 62 | Creates a video capture object from a video file. 63 | 64 | Args: 65 | video_path: User-specified video file path. 66 | output_path: Optional path to save the processed video. 67 | 68 | Returns: 69 | Video capture object to capture frames, video writer object to write processed 70 | frames to file, plus total frame count of video source to iterate through. 71 | """ 72 | if not os.path.exists(video_path): 73 | raise FileNotFoundError(f'Video file not found for: {video_path}') 74 | 75 | video = cv2.VideoCapture(video_path) 76 | if not video.isOpened: 77 | raise RuntimeError(f'Failed to open video capture from file: {video_path}') 78 | 79 | video_writer = create_video_writer(video, video_path, name) 80 | 81 | iter_frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 82 | 83 | return video, video_writer, range(iter_frame_count) 84 | 85 | 86 | def init_video_stream_capture(video_source: int): 87 | """ 88 | Creates a video capture object from a device. 89 | 90 | Args: 91 | video_source: Device index used to read video stream. 92 | 93 | Returns: 94 | Video capture object used to capture frames from a video stream. 95 | """ 96 | video = cv2.VideoCapture(video_source) 97 | if not video.isOpened: 98 | raise RuntimeError(f'Failed to open video capture for device with index: {video_source}') 99 | print('Processing video stream. Press \'Esc\' key to exit the demo.') 100 | return video 101 | 102 | def get_source_encoding_int(video_capture): 103 | return int(video_capture.get(cv2.CAP_PROP_FOURCC)) 104 | -------------------------------------------------------------------------------- /examples/mediapipe/face_detection/run_video_file.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from tqdm import tqdm 14 | from argparse import ArgumentParser 15 | from cv_utils import init_video_file_capture 16 | 17 | mp_drawing = mp.solutions.drawing_utils 18 | mp_face_detection = mp.solutions.face_detection 19 | 20 | def main(args): 21 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_detection_demo') 22 | frame_num = len(frame_count) 23 | 24 | times = [] 25 | 26 | with mp_face_detection.FaceDetection(model_selection=args.model_selection, 27 | min_detection_confidence=args.min_detection_confidence) as face_detection: 28 | 29 | for _ in tqdm(frame_count, desc='Processing frames'): 30 | frame_present, frame = video.read() 31 | if not frame_present: 32 | continue 33 | 34 | # Flip the image horizontally for a later selfie-view display, and convert 35 | # the BGR image to RGB. 36 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 37 | # To improve performance, optionally mark the image as not writeable to 38 | # pass by reference. 39 | image.flags.writeable = False 40 | 41 | start_time = time.time() 42 | results = face_detection.process(image) 43 | end_time = (time.time() - start_time)*1000 44 | 45 | # Draw the face mesh annotations on the image. 46 | image.flags.writeable = True 47 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 48 | if results.detections: 49 | for detection in results.detections: 50 | mp_drawing.draw_detection(image, detection) 51 | 52 | times.append(end_time) 53 | video_writer.write(image) 54 | 55 | print('Finished processing frames') 56 | video.release(), video_writer.release() 57 | 58 | print("Average time(ms): ", sum(times)//frame_num) 59 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames 60 | 61 | if __name__ == '__main__': 62 | parser = ArgumentParser() 63 | parser.add_argument('--video_file_path', required=True, type=str, 64 | help='Path to the video file to run object detection on') 65 | 66 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 67 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 68 | 69 | parser.add_argument('--model_selection', default=1, type=int, 70 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.') 71 | 72 | args = parser.parse_args() 73 | main(args) 74 | -------------------------------------------------------------------------------- /examples/mediapipe/face_detection/run_video_stream.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from argparse import ArgumentParser 14 | from cv_utils import init_video_stream_capture 15 | 16 | mp_drawing = mp.solutions.drawing_utils 17 | mp_face_detection = mp.solutions.face_detection 18 | 19 | def main(args): 20 | video = init_video_stream_capture(args.video_source) 21 | 22 | with mp_face_detection.FaceDetection(model_selection=args.model_selection, 23 | min_detection_confidence=args.min_detection_confidence) as face_detection: 24 | 25 | while True: 26 | 27 | frame_present, frame = video.read() 28 | if not frame_present: 29 | raise RuntimeError('Error reading frame from video stream') 30 | 31 | # Flip the image horizontally for a later selfie-view display, and convert 32 | # the BGR image to RGB. 33 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 34 | # To improve performance, optionally mark the image as not writeable to 35 | # pass by reference. 36 | image.flags.writeable = False 37 | 38 | start_time = time.time() 39 | results = face_detection.process(image) 40 | end_time = (time.time() - start_time)*1000 41 | 42 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop 43 | print("Time(ms): ", (time.time() - start_time)*1000) 44 | 45 | # Draw the face mesh annotations on the image. 46 | image.flags.writeable = True 47 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 48 | if results.detections: 49 | for detection in results.detections: 50 | mp_drawing.draw_detection(image, detection) 51 | 52 | cv2.imshow('MediaPipe Face Detection Demo', image) 53 | 54 | if cv2.waitKey(1) == 27: 55 | print('\nExit key activated. Closing video...') 56 | break 57 | 58 | video.release(), cv2.destroyAllWindows() 59 | 60 | if __name__ == '__main__': 61 | parser = ArgumentParser() 62 | parser.add_argument('--video_source', type=int, default=0, 63 | help='Device index to access video stream. Defaults to primary device camera at index 0') 64 | 65 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 66 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 67 | 68 | parser.add_argument('--model_selection', default=1, type=int, 69 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.') 70 | 71 | args = parser.parse_args() 72 | main(args) -------------------------------------------------------------------------------- /examples/mediapipe/face_mesh/run_video_file.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | import numpy as np 14 | from tqdm import tqdm 15 | from argparse import ArgumentParser 16 | from cv_utils import init_video_file_capture 17 | 18 | mp_drawing = mp.solutions.drawing_utils 19 | mp_face_mesh = mp.solutions.face_mesh 20 | 21 | def main(args): 22 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'face_mesh_demo') 23 | frame_num = len(frame_count) 24 | print(frame_count) 25 | drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) 26 | 27 | times = [] 28 | 29 | with mp_face_mesh.FaceMesh(min_detection_confidence=args.min_detection_confidence, 30 | min_tracking_confidence=args.min_tracking_confidence) as face_mesh: 31 | 32 | for _ in tqdm(frame_count, desc='Processing frames'): 33 | frame_present, frame = video.read() 34 | if not frame_present: 35 | continue 36 | 37 | # Flip the image horizontally for a later selfie-view display, and convert 38 | # the BGR image to RGB. 39 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 40 | # To improve performance, optionally mark the image as not writeable to 41 | # pass by reference. 42 | image.flags.writeable = False 43 | 44 | start_time = time.time() 45 | results = face_mesh.process(image) 46 | end_time = (time.time() - start_time)*1000 47 | 48 | # Draw the face mesh annotations on the image. 49 | image.flags.writeable = True 50 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 51 | if results.multi_face_landmarks: 52 | for face_landmarks in results.multi_face_landmarks: 53 | mp_drawing.draw_landmarks( 54 | image=image, 55 | landmark_list=face_landmarks, 56 | connections=mp_face_mesh.FACE_CONNECTIONS, 57 | landmark_drawing_spec=drawing_spec, 58 | connection_drawing_spec=drawing_spec) 59 | 60 | times.append(end_time) 61 | video_writer.write(image) 62 | 63 | print('Finished processing frames') 64 | video.release(), video_writer.release() 65 | 66 | print("Average time(ms): ", sum(times)//frame_num) 67 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames 68 | 69 | if __name__ == '__main__': 70 | parser = ArgumentParser() 71 | parser.add_argument('--video_file_path', required=True, type=str, 72 | help='Path to the video file to run object detection on') 73 | 74 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 75 | help='Path to the first stage model to use') 76 | parser.add_argument('--min_tracking_confidence', default=0.5, type=float, 77 | help='Path to the second stage model to use') 78 | 79 | args = parser.parse_args() 80 | main(args) 81 | -------------------------------------------------------------------------------- /examples/mediapipe/face_mesh/run_video_stream.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from argparse import ArgumentParser 14 | from cv_utils import init_video_stream_capture 15 | 16 | mp_drawing = mp.solutions.drawing_utils 17 | mp_face_mesh = mp.solutions.face_mesh 18 | 19 | def main(args): 20 | video = init_video_stream_capture(args.video_source) 21 | drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) 22 | 23 | with mp_face_mesh.FaceMesh(min_detection_confidence=args.min_detection_confidence, 24 | min_tracking_confidence=args.min_tracking_confidence, 25 | static_image_mode = False) as face_mesh: 26 | 27 | while True: 28 | 29 | frame_present, frame = video.read() 30 | if not frame_present: 31 | raise RuntimeError('Error reading frame from video stream') 32 | 33 | # Flip the image horizontally for a later selfie-view display, and convert 34 | # the BGR image to RGB. 35 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 36 | # To improve performance, optionally mark the image as not writeable to 37 | # pass by reference. 38 | image.flags.writeable = False 39 | 40 | start_time = time.time() 41 | results = face_mesh.process(image) 42 | end_time = (time.time() - start_time)*1000 43 | 44 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop 45 | print("Time(ms): ", (time.time() - start_time)*1000) 46 | 47 | # Draw the face mesh annotations on the image. 48 | image.flags.writeable = True 49 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 50 | if results.multi_face_landmarks: 51 | for face_landmarks in results.multi_face_landmarks: 52 | mp_drawing.draw_landmarks( 53 | image=image, 54 | landmark_list=face_landmarks, 55 | connections=mp_face_mesh.FACE_CONNECTIONS, 56 | landmark_drawing_spec=drawing_spec, 57 | connection_drawing_spec=drawing_spec) 58 | 59 | cv2.imshow('MediaPipe Face Mesh Demo', image) 60 | 61 | if cv2.waitKey(1) == 27: 62 | print('\nExit key activated. Closing video...') 63 | break 64 | 65 | video.release(), cv2.destroyAllWindows() 66 | 67 | if __name__ == '__main__': 68 | parser = ArgumentParser() 69 | parser.add_argument('--video_source', type=int, default=0, 70 | help='Device index to access video stream. Defaults to primary device camera at index 0') 71 | 72 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 73 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 74 | parser.add_argument('--min_tracking_confidence', default=0.5, type=float, 75 | help='Minimum confidence value ([0.0, 1.0]) from the landmark-tracking model for the face landmarks to be considered tracked successfully, or otherwise face detection will be invoked automatically on the next input image.') 76 | 77 | args = parser.parse_args() 78 | main(args) -------------------------------------------------------------------------------- /examples/mediapipe/hand_landmarks/run_video_file.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from tqdm import tqdm 14 | from argparse import ArgumentParser 15 | from cv_utils import init_video_file_capture 16 | 17 | mp_drawing = mp.solutions.drawing_utils 18 | mp_drawing_styles = mp.solutions.drawing_styles 19 | mp_hands = mp.solutions.hands 20 | 21 | def main(args): 22 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'hand_landmarks_demo') 23 | frame_num = len(frame_count) 24 | 25 | times = [] 26 | 27 | with mp_hands.Hands(model_complexity=args.model_selection, 28 | min_detection_confidence=args.min_detection_confidence, 29 | min_tracking_confidence=0.5) as hands: 30 | 31 | for _ in tqdm(frame_count, desc='Processing frames'): 32 | frame_present, frame = video.read() 33 | if not frame_present: 34 | continue 35 | 36 | # Flip the image horizontally for a later selfie-view display, and convert 37 | # the BGR image to RGB. 38 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 39 | # To improve performance, optionally mark the image as not writeable to 40 | # pass by reference. 41 | image.flags.writeable = False 42 | 43 | start_time = time.time() 44 | results = hands.process(image) 45 | end_time = (time.time() - start_time)*1000 46 | 47 | # Draw the hand landmarks annotations on the image. 48 | image.flags.writeable = True 49 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 50 | if results.multi_hand_landmarks: 51 | for hand_landmarks in results.multi_hand_landmarks: 52 | mp_drawing.draw_landmarks( 53 | image, 54 | hand_landmarks, 55 | mp_hands.HAND_CONNECTIONS, 56 | mp_drawing_styles.get_default_hand_landmarks_style(), 57 | mp_drawing_styles.get_default_hand_connections_style()) 58 | 59 | times.append(end_time) 60 | video_writer.write(image) 61 | 62 | print('Finished processing frames') 63 | video.release(), video_writer.release() 64 | 65 | print("Average time(ms): ", sum(times)//frame_num) 66 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames 67 | 68 | if __name__ == '__main__': 69 | parser = ArgumentParser() 70 | parser.add_argument('--video_file_path', required=True, type=str, 71 | help='Path to the video file to run object detection on') 72 | 73 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 74 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 75 | 76 | parser.add_argument('--model_selection', default=0, type=int, 77 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.') 78 | 79 | args = parser.parse_args() 80 | main(args) 81 | -------------------------------------------------------------------------------- /examples/mediapipe/hand_landmarks/run_video_stream.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from argparse import ArgumentParser 14 | from cv_utils import init_video_stream_capture 15 | 16 | mp_drawing = mp.solutions.drawing_utils 17 | mp_drawing_styles = mp.solutions.drawing_styles 18 | mp_hands = mp.solutions.hands 19 | 20 | def main(args): 21 | video = init_video_stream_capture(args.video_source) 22 | 23 | with mp_hands.Hands(model_complexity=args.model_selection, 24 | min_detection_confidence=args.min_detection_confidence, 25 | min_tracking_confidence=0.5) as hands: 26 | 27 | while True: 28 | 29 | frame_present, frame = video.read() 30 | if not frame_present: 31 | raise RuntimeError('Error reading frame from video stream') 32 | 33 | # Flip the image horizontally for a later selfie-view display, and convert 34 | # the BGR image to RGB. 35 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 36 | # To improve performance, optionally mark the image as not writeable to 37 | # pass by reference. 38 | image.flags.writeable = False 39 | 40 | start_time = time.time() 41 | results = hands.process(image) 42 | end_time = (time.time() - start_time)*1000 43 | 44 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop 45 | print("Time(ms): ", (time.time() - start_time)*1000) 46 | 47 | # Draw the hand landmarks annotations on the image. 48 | image.flags.writeable = True 49 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 50 | if results.multi_hand_landmarks: 51 | for hand_landmarks in results.multi_hand_landmarks: 52 | mp_drawing.draw_landmarks( 53 | image, 54 | hand_landmarks, 55 | mp_hands.HAND_CONNECTIONS, 56 | mp_drawing_styles.get_default_hand_landmarks_style(), 57 | mp_drawing_styles.get_default_hand_connections_style()) 58 | 59 | cv2.imshow('MediaPipe Hands', image) 60 | 61 | if cv2.waitKey(1) == 27: 62 | print('\nExit key activated. Closing video...') 63 | break 64 | 65 | video.release(), cv2.destroyAllWindows() 66 | 67 | if __name__ == '__main__': 68 | parser = ArgumentParser() 69 | parser.add_argument('--video_source', type=int, default=0, 70 | help='Device index to access video stream. Defaults to primary device camera at index 0') 71 | 72 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 73 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 74 | 75 | parser.add_argument('--model_selection', default=0, type=int, 76 | help='Use 0 to select a short-range model that works best for faces within 2 meters from the camera, and 1 for a full-range model best for faces within 5 meters.') 77 | 78 | args = parser.parse_args() 79 | main(args) -------------------------------------------------------------------------------- /examples/mediapipe/pose_estimation/run_video_file.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from tqdm import tqdm 14 | from argparse import ArgumentParser 15 | from cv_utils import init_video_file_capture 16 | 17 | mp_drawing = mp.solutions.drawing_utils 18 | mp_pose = mp.solutions.pose 19 | 20 | def main(args): 21 | video, video_writer, frame_count = init_video_file_capture(args.video_file_path, 'pose_estimation_demo') 22 | frame_num = len(frame_count) 23 | 24 | times = [] 25 | 26 | with mp_pose.Pose(min_detection_confidence=args.min_detection_confidence, 27 | model_complexity=args.model_complexity, 28 | static_image_mode = False) as pose: 29 | 30 | for _ in tqdm(frame_count, desc='Processing frames'): 31 | frame_present, frame = video.read() 32 | if not frame_present: 33 | continue 34 | 35 | # Flip the image horizontally for a later selfie-view display, and convert 36 | # the BGR image to RGB. 37 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 38 | # To improve performance, optionally mark the image as not writeable to 39 | # pass by reference. 40 | image.flags.writeable = False 41 | 42 | start_time = time.time() 43 | results = pose.process(image) 44 | end_time = (time.time() - start_time)*1000 45 | 46 | # Draw the face mesh annotations on the image. 47 | image.flags.writeable = True 48 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 49 | mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) 50 | 51 | times.append(end_time) 52 | video_writer.write(image) 53 | 54 | print('Finished processing frames') 55 | video.release(), video_writer.release() 56 | 57 | print("Average time(ms): ", sum(times)//frame_num) 58 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1000.0 / average of inference times for all the frames 59 | 60 | if __name__ == '__main__': 61 | parser = ArgumentParser() 62 | parser.add_argument('--video_file_path', required=True, type=str, 63 | help='Path to the video file to run object detection on') 64 | 65 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 66 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 67 | 68 | parser.add_argument('--model_complexity', default=0, type=int, 69 | help='Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1') 70 | 71 | args = parser.parse_args() 72 | main(args) 73 | -------------------------------------------------------------------------------- /examples/mediapipe/pose_estimation/run_video_stream.py: -------------------------------------------------------------------------------- 1 | # Based on MediPipe Example Scripts. All rights reserved. 2 | # Modified 2021 Seeed Studio STU, Dmitry Maslov 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | import sys 7 | import time 8 | script_dir = os.path.dirname(__file__) 9 | sys.path.insert(1, os.path.join(script_dir, '..', 'common')) 10 | 11 | import cv2 12 | import mediapipe as mp 13 | from argparse import ArgumentParser 14 | from cv_utils import init_video_stream_capture 15 | 16 | mp_drawing = mp.solutions.drawing_utils 17 | mp_pose = mp.solutions.pose 18 | 19 | def main(args): 20 | video = init_video_stream_capture(args.video_source) 21 | 22 | with mp_pose.Pose(min_detection_confidence=args.min_detection_confidence, 23 | model_complexity=args.model_complexity, 24 | static_image_mode = False) as pose: 25 | 26 | while True: 27 | 28 | frame_present, frame = video.read() 29 | if not frame_present: 30 | raise RuntimeError('Error reading frame from video stream') 31 | 32 | # Flip the image horizontally for a later selfie-view display, and convert 33 | # the BGR image to RGB. 34 | image = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB) 35 | # To improve performance, optionally mark the image as not writeable to 36 | # pass by reference. 37 | image.flags.writeable = False 38 | 39 | start_time = time.time() 40 | results = pose.process(image) 41 | end_time = (time.time() - start_time)*1000 42 | 43 | print("FPS: ", 1.0 / (time.time() - start_time)) # FPS = 1 / time to process loop 44 | print("Time(ms): ", (time.time() - start_time)*1000) 45 | 46 | # Draw the face mesh annotations on the image. 47 | image.flags.writeable = True 48 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 49 | mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) 50 | 51 | cv2.imshow('MediaPipe Pose Estimation Demo', image) 52 | 53 | if cv2.waitKey(1) == 27: 54 | print('\nExit key activated. Closing video...') 55 | break 56 | 57 | video.release(), cv2.destroyAllWindows() 58 | 59 | if __name__ == '__main__': 60 | parser = ArgumentParser() 61 | parser.add_argument('--video_source', type=int, default=0, 62 | help='Device index to access video stream. Defaults to primary device camera at index 0') 63 | 64 | parser.add_argument('--min_detection_confidence', default=0.5, type=float, 65 | help='Minimum confidence value ([0.0, 1.0]) from the face detection model for the detection to be considered successful. Default to 0.5') 66 | 67 | parser.add_argument('--model_complexity', default=0, type=int, 68 | help='Landmark accuracy as well as inference latency generally go up with the model complexity. Default to 1') 69 | 70 | args = parser.parse_args() 71 | main(args) -------------------------------------------------------------------------------- /examples/sample_files/cars.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/cars.mp4 -------------------------------------------------------------------------------- /examples/sample_files/test_dance.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/test_dance.mp4 -------------------------------------------------------------------------------- /examples/sample_files/test_s.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seeed-Studio/Seeed_Python_MachineLearning/1fc0bf6d24d778c4fe501541966857b6fd50c146/examples/sample_files/test_s.mp4 -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow Lite Face Recognition Multi-stage Demo 2 | 3 | ## Introduction 4 | 5 | This demo allows for face recognition from either a video stream or a video file. The face embeddings need to be calculated and saved to a database with calculate_features.py before any of the two examples can be run. 6 | 7 | ## Prerequisites 8 | 9 | Install the dependecnies with 10 | ``` 11 | pip3 install -r requirements.txt 12 | ``` 13 | Make sure you have the necessary system packages for OpenCV to work properly. 14 | ``` 15 | sudo apt-get install libatlas-base-dev libjasper-dev libqtgui4 python3-pyqt5 libqt4-test libilmbase-dev libopenexr-dev libgstreamer1.0-dev libavcodec58 libavformat58 libswscale5 16 | ``` 17 | 18 | ## Usage 19 | 20 | ### Database population 21 | 22 | Before we can run face recognition, we need to exctract features from faces we want to recognize and save the features embedding vectors in encoded form in .json file, which serves as a small database. You can do that with calcuate_features.py. 23 | 24 | ``` 25 | python calculate_features.py --help 26 | OpenCV version: 4.5.3 27 | usage: calculate_features.py [-h] --first_stage FIRST_STAGE --second_stage 28 | SECOND_STAGE --third_stage THIRD_STAGE 29 | [--db_file DB_FILE] --img_file IMG_FILE [--id ID] 30 | [--name NAME] 31 | 32 | optional arguments: 33 | -h, --help show this help message and exit 34 | --first_stage FIRST_STAGE 35 | File path of .tflite file. (default: None) 36 | --second_stage SECOND_STAGE 37 | File path of .tflite file. (default: None) 38 | --third_stage THIRD_STAGE 39 | File path of .tflite file. (default: None) 40 | --db_file DB_FILE File path to database (default: database.db) 41 | --img_file IMG_FILE File path to picture (default: None) 42 | --id ID Path to the video file to run object detection on 43 | (default: 0) 44 | --name NAME Path to the video file to run object detection on 45 | (default: John Doe) 46 | ``` 47 | For example, to extract a single face embedding vector of Barrack Hussein Obama's face you can run: 48 | ``` 49 | python calculate_features.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite --img_file obama.jpg --name Obama --id 0 50 | ``` 51 | 52 | ### Face Recognition from Video File 53 | 54 | Once you have a database with at least one face embedding recorded you can try it on a video file, that contains people's faces. Mainly this is used for testing and benchmarking purposes. 55 | 56 | Example: 57 | ``` 58 | python multi_stage_file.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite --file ../../sample_files/test_s.mp4 59 | ``` 60 | 61 | ### Face Recognition from Video Stream 62 | 63 | Finally, for actual application purpose you can use multi_stage_stream.py script. It can get video stream either from OpenCV or picamera, if executed on Raspberry Pi with picamera connected. 64 | 65 | Example: 66 | ``` 67 | python multi_stage_stream.py --first_stage ../face_rec_models/YOLOv3_best_recall_quant.tflite --second_stage ../face_rec_models/MobileFaceNet_kpts_quant.tflite --third_stage ../face_rec_models/MobileFaceNet_features_quant.tflite 68 | ``` 69 | The output will be served on a Flask web-server on port 5000. This is done in order to simplify testing and running of an application on headless systems. -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/base_camera.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | try: 4 | from greenlet import getcurrent as get_ident 5 | except ImportError: 6 | try: 7 | from thread import get_ident 8 | except ImportError: 9 | from _thread import get_ident 10 | 11 | 12 | class CameraEvent(object): 13 | """An Event-like class that signals all active clients when a new frame is 14 | available. 15 | """ 16 | def __init__(self): 17 | self.events = {} 18 | 19 | def wait(self): 20 | """Invoked from each client's thread to wait for the next frame.""" 21 | ident = get_ident() 22 | if ident not in self.events: 23 | # this is a new client 24 | # add an entry for it in the self.events dict 25 | # each entry has two elements, a threading.Event() and a timestamp 26 | self.events[ident] = [threading.Event(), time.time()] 27 | return self.events[ident][0].wait() 28 | 29 | def set(self): 30 | """Invoked by the camera thread when a new frame is available.""" 31 | now = time.time() 32 | remove = None 33 | for ident, event in self.events.items(): 34 | if not event[0].isSet(): 35 | # if this client's event is not set, then set it 36 | # also update the last set timestamp to now 37 | event[0].set() 38 | event[1] = now 39 | else: 40 | # if the client's event is already set, it means the client 41 | # did not process a previous frame 42 | # if the event stays set for more than 5 seconds, then assume 43 | # the client is gone and remove it 44 | if now - event[1] > 5: 45 | remove = ident 46 | if remove: 47 | del self.events[remove] 48 | 49 | def clear(self): 50 | """Invoked from each client's thread after a frame was processed.""" 51 | self.events[get_ident()][0].clear() 52 | 53 | 54 | class BaseCamera(object): 55 | thread = None # background thread that reads frames from camera 56 | frame = None # current frame is stored here by background thread 57 | last_access = 0 # time of last client access to the camera 58 | event = CameraEvent() 59 | 60 | def __init__(self): 61 | """Start the background camera thread if it isn't running yet.""" 62 | if BaseCamera.thread is None: 63 | BaseCamera.last_access = time.time() 64 | 65 | # start background frame thread 66 | BaseCamera.thread = threading.Thread(target=self._thread) 67 | BaseCamera.thread.start() 68 | 69 | # wait until frames are available 70 | while self.get_frame() is None: 71 | time.sleep(0) 72 | 73 | def get_frame(self): 74 | """Return the current camera frame.""" 75 | BaseCamera.last_access = time.time() 76 | 77 | # wait for a signal from the camera thread 78 | BaseCamera.event.wait() 79 | BaseCamera.event.clear() 80 | 81 | return BaseCamera.frame 82 | 83 | @staticmethod 84 | def frames(): 85 | """"Generator that returns frames from the camera.""" 86 | raise RuntimeError('Must be implemented by subclasses.') 87 | 88 | @classmethod 89 | def _thread(cls): 90 | """Camera background thread.""" 91 | print('Starting camera thread.') 92 | frames_iterator = cls.frames() 93 | for frame in frames_iterator: 94 | BaseCamera.frame = frame 95 | BaseCamera.event.set() # send signal to clients 96 | time.sleep(0) 97 | 98 | # if there hasn't been any clients asking for frames in 99 | # the last 10 seconds then stop the thread 100 | if time.time() - BaseCamera.last_access > 10: 101 | frames_iterator.close() 102 | print('Stopping camera thread due to inactivity.') 103 | break 104 | BaseCamera.thread = None 105 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/calculate_features.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import numpy as np 4 | import skimage 5 | import skimage.transform 6 | import json, base64 7 | 8 | from cv_utils import decode_yolov3, preprocess 9 | from tflite_runtime.interpreter import Interpreter 10 | 11 | FACE_ANCHORS = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]], 12 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]] 13 | 14 | IMG_SHAPE = (128, 128) # in HW form 15 | offset_x = 0 16 | offset_y = -15 17 | src = np.array([(44+offset_x, 59+offset_y), 18 | (84+offset_x, 59+offset_y), 19 | (64+offset_x, 82+offset_y), 20 | (47+offset_x, 105), 21 | (81+offset_x, 105)], dtype=np.float32) 22 | 23 | def write_db(db, id, name, vector): 24 | 25 | vector = base64.b64encode(vector).decode('utf-8') 26 | 27 | entry = {"name": name, "vector": vector} 28 | db[id] = entry 29 | print(db) 30 | f = open('database.db','w') 31 | entry = json.dumps(db) 32 | f.write(entry) 33 | f.close() 34 | 35 | return db 36 | 37 | def read_db(db_path = 'database.db'): 38 | try: 39 | f = open(db_path, 'r') 40 | except FileNotFoundError: 41 | clear_db(db_path) 42 | f = open(db_path, 'r') 43 | 44 | content = f.read() 45 | #print(content) 46 | if content: 47 | db = json.loads(content) 48 | f.close() 49 | return db 50 | 51 | def clear_db(db_path = 'database.db'): 52 | 53 | f = open(db_path,'w') 54 | db = {} 55 | content = json.dumps(db) 56 | f.write(content) 57 | f.close() 58 | 59 | def draw_bounding_boxes(frame, detections, labels=['face'], kpts = None): 60 | 61 | def _to_original_scale(boxes, frame_height, frame_width): 62 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int) 63 | 64 | cx = boxes[0] * frame_width 65 | cy = boxes[1] * frame_height 66 | w = boxes[2] * frame_width 67 | h = boxes[3] * frame_height 68 | 69 | minmax_boxes[0] = cx - w/2 70 | minmax_boxes[1] = cy - h/2 71 | minmax_boxes[2] = cx + w/2 72 | minmax_boxes[3] = cy + h/2 73 | 74 | return minmax_boxes 75 | 76 | color = (0, 255, 0) 77 | label_color = (125, 125, 125) 78 | 79 | for i in range(len(detections)): 80 | class_idx, box, confidence = [d for d in detections[i]] 81 | 82 | # Obtain frame size and resized bounding box positions 83 | frame_height, frame_width = frame.shape[:2] 84 | 85 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width) 86 | # Ensure box stays within the frame 87 | x_min, y_min = max(0, x_min), max(0, y_min) 88 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 89 | 90 | # Draw bounding box around detected object 91 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 92 | 93 | # Create label for detected object class 94 | label = labels[class_idx].capitalize() 95 | label = f'{label} {confidence * 100:.1f}%' 96 | 97 | # Make sure label always stays on-screen 98 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 99 | 100 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 101 | lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min) 102 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 103 | 104 | # Add label and confidence value 105 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 106 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50, 107 | label_color, 1, cv2.LINE_AA) 108 | 109 | for kpt_set in kpts: 110 | for kpt in kpt_set: 111 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2) 112 | 113 | def process_faces(frame, detections, db, id, name): 114 | kpts_list = [] 115 | 116 | def _to_original_scale(boxes, frame_height, frame_width): 117 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int) 118 | 119 | cx = boxes[0] * frame_width 120 | cy = boxes[1] * frame_height 121 | w = boxes[2] * frame_width 122 | h = boxes[3] * frame_height 123 | 124 | minmax_boxes[0] = cx - w/2 125 | minmax_boxes[1] = cy - h/2 126 | minmax_boxes[2] = cx + w/2 127 | minmax_boxes[3] = cy + h/2 128 | 129 | return minmax_boxes 130 | 131 | for i in range(len(detections)): 132 | _, box, _ = [d for d in detections[i]] 133 | 134 | # Obtain frame size and resized bounding box positions 135 | frame_height, frame_width = frame.shape[:2] 136 | 137 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width) 138 | # Ensure box stays within the frame 139 | x_min, y_min = max(0, x_min), max(0, y_min) 140 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 141 | 142 | x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min 143 | 144 | face_img = frame[y_min:y_max, x_min:x_max] 145 | 146 | plist = second_stage_network.run(face_img)[0] 147 | 148 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5)) 149 | re = (x + int(plist[2] * w), y + int(plist[3] * h+5)) 150 | n = (x + int(plist[4] * w), y + int(plist[5] * h)) 151 | lm = (x + int(plist[6] * w), y + int(plist[7] * h)) 152 | rm = (x + int(plist[8] * w), y + int(plist[9] * h)) 153 | kpts = [le, re, n, lm, rm] 154 | kpts_list.append(kpts) 155 | kpts = np.array(kpts, dtype = np.float32) 156 | 157 | transformer = skimage.transform.SimilarityTransform() 158 | transformer.estimate(kpts, src) 159 | M = transformer.params[0: 2, : ] 160 | warped_img = cv2.warpAffine(frame, M, (IMG_SHAPE[1], IMG_SHAPE[0]), borderValue = 0.0) 161 | 162 | features = third_stage_network.run(warped_img)[0] 163 | 164 | write_db(db, id, name, features) 165 | 166 | return kpts_list 167 | 168 | class NetworkExecutor(object): 169 | 170 | def __init__(self, model_file): 171 | 172 | self.interpreter = Interpreter(model_file, num_threads=3) 173 | self.interpreter.allocate_tensors() 174 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 175 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 176 | 177 | def get_output_tensors(self): 178 | 179 | output_details = self.interpreter.get_output_details() 180 | tensor_list = [] 181 | 182 | for output in output_details: 183 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 184 | tensor_list.append(tensor) 185 | 186 | return tensor_list 187 | 188 | def run(self, image): 189 | if image.shape[1:2] != (self.input_height, self.input_width): 190 | img = cv2.resize(image, (self.input_width, self.input_height)) 191 | img = preprocess(img) 192 | self.interpreter.set_tensor(self.tensor_index, img) 193 | self.interpreter.invoke() 194 | return self.get_output_tensors() 195 | 196 | def main(args): 197 | #clear_db() 198 | db = read_db(args.db_file) 199 | 200 | frame = cv2.imread(args.img_file) 201 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 202 | 203 | results = first_stage_network.run(frame) 204 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, 205 | threshold = 0.7, anchors = FACE_ANCHORS) 206 | 207 | kpts = process_faces(frame, detections, db, args.id, args.name) 208 | 209 | draw_bounding_boxes(frame, detections, ['face'], kpts) 210 | frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) 211 | cv2.imwrite(args.img_file.split('.')[0]+'_result.jpg', frame) 212 | 213 | 214 | if __name__ == "__main__" : 215 | 216 | print("OpenCV version: {}".format(cv2. __version__)) 217 | 218 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 219 | parser.add_argument('--first_stage', help='Path to the YOLOv3 face detection model to use.', required=True) 220 | parser.add_argument('--second_stage', help='Path to the keypoints detection model to use.', required=True) 221 | parser.add_argument('--third_stage', help='Path to the feature vector embedding extractor model to use.', required=True) 222 | 223 | parser.add_argument('--db_file', help='File path to database', default="database.db") 224 | parser.add_argument('--img_file', help='File path to picture', required=True) 225 | parser.add_argument('--id', default = '0', type=str, 226 | help='Unique ID for the face') 227 | parser.add_argument('--name', default = 'John Doe', type=str, 228 | help='Name for the face feature vecotr (can be duplicate)') 229 | 230 | args = parser.parse_args() 231 | 232 | first_stage_network = NetworkExecutor(args.first_stage) 233 | second_stage_network = NetworkExecutor(args.second_stage) 234 | third_stage_network = NetworkExecutor(args.third_stage) 235 | 236 | main(args) -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/camera_opencv.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from base_camera import BaseCamera 3 | 4 | 5 | class Camera(BaseCamera): 6 | video_source = 0 7 | 8 | @staticmethod 9 | def set_video_source(source): 10 | Camera.video_source = source 11 | 12 | @staticmethod 13 | def frames(): 14 | camera = cv2.VideoCapture(Camera.video_source) 15 | if not camera.isOpened(): 16 | raise RuntimeError('Could not start camera.') 17 | 18 | while True: 19 | # read current frame 20 | _, img = camera.read() 21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 22 | 23 | # return img 24 | yield img 25 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/camera_pi.py: -------------------------------------------------------------------------------- 1 | import io 2 | import time 3 | import picamera 4 | import picamera.array 5 | import cv2 6 | from base_camera import BaseCamera 7 | 8 | 9 | class Camera(BaseCamera): 10 | video_source = 0 11 | 12 | @staticmethod 13 | def set_video_source(source): 14 | pass 15 | 16 | @staticmethod 17 | def frames(): 18 | with picamera.PiCamera(resolution = (1280,720)) as camera: 19 | # let camera warm up 20 | time.sleep(2) 21 | 22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream: 23 | while True: 24 | 25 | camera.capture(stream, format='bgr', use_video_port=True) 26 | # At this point the image is available as stream.array 27 | image = stream.array 28 | stream.truncate(0) 29 | yield image 30 | 31 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/multi_stage_file.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import numpy as np 4 | import skimage 5 | import skimage.transform 6 | import json, base64 7 | import time 8 | from tqdm import tqdm 9 | 10 | from cv_utils import decode_yolov3, preprocess, init_video_file_capture 11 | from tflite_runtime.interpreter import Interpreter 12 | 13 | FACE_ANCHORS = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]], 14 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]] 15 | 16 | IMG_SHAPE = (128, 128) # in HW form 17 | offset_x = 0 18 | offset_y = -15 19 | src = np.array([(44+offset_x, 59+offset_y), 20 | (84+offset_x, 59+offset_y), 21 | (64+offset_x, 82+offset_y), 22 | (47+offset_x, 105), 23 | (81+offset_x, 105)], dtype=np.float32) 24 | 25 | def read_db(db_path = 'database.db'): 26 | try: 27 | f = open(db_path, 'r') 28 | except FileNotFoundError: 29 | clear_db(db_path) 30 | f = open(db_path, 'r') 31 | 32 | content = f.read() 33 | #print(content) 34 | if content: 35 | db = json.loads(content) 36 | f.close() 37 | return db 38 | 39 | def clear_db(db_path = 'database.db'): 40 | 41 | f = open(db_path,'w') 42 | db = {} 43 | content = json.dumps(db) 44 | f.write(content) 45 | f.close() 46 | 47 | def draw_bounding_boxes(frame, detections, kpts, ids): 48 | 49 | def _to_original_scale(boxes, frame_height, frame_width): 50 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int) 51 | 52 | cx = boxes[0] * frame_width 53 | cy = boxes[1] * frame_height 54 | w = boxes[2] * frame_width 55 | h = boxes[3] * frame_height 56 | 57 | minmax_boxes[0] = cx - w/2 58 | minmax_boxes[1] = cy - h/2 59 | minmax_boxes[2] = cx + w/2 60 | minmax_boxes[3] = cy + h/2 61 | 62 | return minmax_boxes 63 | 64 | color = (0, 255, 0) 65 | label_color = (125, 125, 125) 66 | 67 | for i in range(len(detections)): 68 | _, box, _ = [d for d in detections[i]] 69 | 70 | # Obtain frame size and resized bounding box positions 71 | frame_height, frame_width = frame.shape[:2] 72 | 73 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width) 74 | # Ensure box stays within the frame 75 | x_min, y_min = max(0, x_min), max(0, y_min) 76 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 77 | 78 | # Draw bounding box around detected object 79 | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) 80 | 81 | # Create label for detected object class 82 | label = 'ID: {} Name: {} {}%'.format(*ids[i]) 83 | label_color = (255, 255, 255) 84 | 85 | # Make sure label always stays on-screen 86 | x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2] 87 | 88 | lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text) 89 | lbl_box_xy_max = (x_min + int(0.75 * x_text), y_min + y_text if y_min<25 else y_min) 90 | lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5) 91 | 92 | # Add label and confidence value 93 | cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1) 94 | cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.70, label_color, 1, cv2.LINE_AA) 95 | 96 | for kpt_set in kpts: 97 | for kpt in kpt_set: 98 | cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 5, (255, 0, 0), 2) 99 | 100 | def process_faces(frame, detections, db): 101 | kpts_list = [] 102 | id_list = [] 103 | 104 | def _to_original_scale(boxes, frame_height, frame_width): 105 | minmax_boxes = np.empty(shape=(4, ), dtype=np.int) 106 | 107 | cx = boxes[0] * frame_width 108 | cy = boxes[1] * frame_height 109 | w = boxes[2] * frame_width 110 | h = boxes[3] * frame_height 111 | 112 | minmax_boxes[0] = cx - w/2 113 | minmax_boxes[1] = cy - h/2 114 | minmax_boxes[2] = cx + w/2 115 | minmax_boxes[3] = cy + h/2 116 | 117 | return minmax_boxes 118 | 119 | for i in range(len(detections)): 120 | _, box, _ = [d for d in detections[i]] 121 | 122 | # Obtain frame size and resized bounding box positions 123 | frame_height, frame_width = frame.shape[:2] 124 | 125 | x_min, y_min, x_max, y_max = _to_original_scale(box, frame_height, frame_width) 126 | # Ensure box stays within the frame 127 | x_min, y_min = max(0, x_min), max(0, y_min) 128 | x_max, y_max = min(frame_width, x_max), min(frame_height, y_max) 129 | 130 | x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min 131 | 132 | face_img = frame[y_min:y_max, x_min:x_max] 133 | 134 | plist = second_stage_network.run(face_img)[0] 135 | 136 | le = (x + int(plist[0] * w+5), y + int(plist[1] * h+5)) 137 | re = (x + int(plist[2] * w), y + int(plist[3] * h+5)) 138 | n = (x + int(plist[4] * w), y + int(plist[5] * h)) 139 | lm = (x + int(plist[6] * w), y + int(plist[7] * h)) 140 | rm = (x + int(plist[8] * w), y + int(plist[9] * h)) 141 | kpts = [le, re, n, lm, rm] 142 | kpts_list.append(kpts) 143 | kpts = np.array(kpts, dtype = np.float32) 144 | 145 | transformer = skimage.transform.SimilarityTransform() 146 | transformer.estimate(kpts, src) 147 | M = transformer.params[0: 2, : ] 148 | warped_img = cv2.warpAffine(frame, M, (IMG_SHAPE[1], IMG_SHAPE[0]), borderValue = 0.0) 149 | 150 | features = third_stage_network.run(warped_img)[0] 151 | 152 | highest_score = 0 153 | 154 | for id in db.keys(): 155 | cos_sim = np.dot(features, db[id]['vector'])/(np.linalg.norm(features)*np.linalg.norm(db[id]['vector'])) 156 | cos_sim /= 2 157 | cos_sim += 0.5 158 | cos_sim *= 100 159 | if highest_score < cos_sim: 160 | highest_score = cos_sim 161 | recognized_id = id 162 | 163 | if highest_score > 70.0: 164 | print(recognized_id, db[recognized_id]['name'], highest_score) 165 | id_list.append([recognized_id, db[recognized_id]['name'], highest_score]) 166 | else: 167 | id_list.append(['X', '', 0.0]) 168 | return kpts_list, id_list 169 | 170 | class NetworkExecutor(object): 171 | 172 | def __init__(self, model_file): 173 | 174 | self.interpreter = Interpreter(model_file, num_threads=3) 175 | self.interpreter.allocate_tensors() 176 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 177 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 178 | 179 | def get_output_tensors(self): 180 | 181 | output_details = self.interpreter.get_output_details() 182 | tensor_indices = [] 183 | tensor_list = [] 184 | 185 | for output in output_details: 186 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 187 | tensor_list.append(tensor) 188 | 189 | return tensor_list 190 | 191 | def run(self, image): 192 | if image.shape[1:2] != (self.input_height, self.input_width): 193 | img = cv2.resize(image, (self.input_width, self.input_height)) 194 | img = preprocess(img) 195 | self.interpreter.set_tensor(self.tensor_index, img) 196 | self.interpreter.invoke() 197 | return self.get_output_tensors() 198 | 199 | def main(args): 200 | video, video_writer, frame_count = init_video_file_capture(args.file, 'age_gender_demo') 201 | 202 | frame_num = len(frame_count) 203 | times = [] 204 | 205 | for _ in tqdm(frame_count, desc='Processing frames'): 206 | frame_present, frame = video.read() 207 | if not frame_present: 208 | continue 209 | 210 | start_time = time.time() 211 | 212 | results = first_stage_network.run(frame) 213 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, 214 | threshold = args.threshold, anchors = FACE_ANCHORS) 215 | kpts, ids = process_faces(frame, detections, db) 216 | 217 | elapsed_ms = (time.time() - start_time) * 1000 218 | 219 | draw_bounding_boxes(frame, detections, kpts, ids) 220 | times.append(elapsed_ms) 221 | video_writer.write(frame) 222 | 223 | print('Finished processing frames') 224 | video.release(), video_writer.release() 225 | 226 | print("Average time(ms): ", sum(times)//frame_num) 227 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop 228 | 229 | if __name__ == "__main__" : 230 | 231 | print("OpenCV version: {}".format(cv2. __version__)) 232 | 233 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 234 | parser.add_argument('--first_stage', help='Path to the YOLOv3 face detection model to use.', required=True) 235 | parser.add_argument('--second_stage', help='Path to the keypoints detection model to use.', required=True) 236 | parser.add_argument('--third_stage', help='Path to the feature vector embedding extractor model to use.', required=True) 237 | 238 | parser.add_argument('--db_file', help='File path to database', default="database.db") 239 | 240 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7) 241 | parser.add_argument('--file', help='File path of video file', required=True) 242 | args = parser.parse_args() 243 | 244 | first_stage_network = NetworkExecutor(args.first_stage) 245 | second_stage_network = NetworkExecutor(args.second_stage) 246 | third_stage_network = NetworkExecutor(args.third_stage) 247 | 248 | db = read_db(args.db_file) 249 | for item in db: 250 | db[item]['vector'] = np.frombuffer(base64.b64decode(db[item]['vector']), np.float32) 251 | 252 | main(args) 253 | 254 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.2 2 | tqdm>=4.47.0 3 | scikit_image=>0.18.3 4 | opencv-python=>4.5.3 5 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/face_recognition/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Video Streaming Demonstration 4 | 5 | 6 |

Tflite Face Recognition Inference Demo

7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_age_gender/base_camera.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | try: 4 | from greenlet import getcurrent as get_ident 5 | except ImportError: 6 | try: 7 | from thread import get_ident 8 | except ImportError: 9 | from _thread import get_ident 10 | 11 | 12 | class CameraEvent(object): 13 | """An Event-like class that signals all active clients when a new frame is 14 | available. 15 | """ 16 | def __init__(self): 17 | self.events = {} 18 | 19 | def wait(self): 20 | """Invoked from each client's thread to wait for the next frame.""" 21 | ident = get_ident() 22 | if ident not in self.events: 23 | # this is a new client 24 | # add an entry for it in the self.events dict 25 | # each entry has two elements, a threading.Event() and a timestamp 26 | self.events[ident] = [threading.Event(), time.time()] 27 | return self.events[ident][0].wait() 28 | 29 | def set(self): 30 | """Invoked by the camera thread when a new frame is available.""" 31 | now = time.time() 32 | remove = None 33 | for ident, event in self.events.items(): 34 | if not event[0].isSet(): 35 | # if this client's event is not set, then set it 36 | # also update the last set timestamp to now 37 | event[0].set() 38 | event[1] = now 39 | else: 40 | # if the client's event is already set, it means the client 41 | # did not process a previous frame 42 | # if the event stays set for more than 5 seconds, then assume 43 | # the client is gone and remove it 44 | if now - event[1] > 5: 45 | remove = ident 46 | if remove: 47 | del self.events[remove] 48 | 49 | def clear(self): 50 | """Invoked from each client's thread after a frame was processed.""" 51 | self.events[get_ident()][0].clear() 52 | 53 | 54 | class BaseCamera(object): 55 | thread = None # background thread that reads frames from camera 56 | frame = None # current frame is stored here by background thread 57 | last_access = 0 # time of last client access to the camera 58 | event = CameraEvent() 59 | 60 | def __init__(self): 61 | """Start the background camera thread if it isn't running yet.""" 62 | if BaseCamera.thread is None: 63 | BaseCamera.last_access = time.time() 64 | 65 | # start background frame thread 66 | BaseCamera.thread = threading.Thread(target=self._thread) 67 | BaseCamera.thread.start() 68 | 69 | # wait until frames are available 70 | while self.get_frame() is None: 71 | time.sleep(0) 72 | 73 | def get_frame(self): 74 | """Return the current camera frame.""" 75 | BaseCamera.last_access = time.time() 76 | 77 | # wait for a signal from the camera thread 78 | BaseCamera.event.wait() 79 | BaseCamera.event.clear() 80 | 81 | return BaseCamera.frame 82 | 83 | @staticmethod 84 | def frames(): 85 | """"Generator that returns frames from the camera.""" 86 | raise RuntimeError('Must be implemented by subclasses.') 87 | 88 | @classmethod 89 | def _thread(cls): 90 | """Camera background thread.""" 91 | print('Starting camera thread.') 92 | frames_iterator = cls.frames() 93 | for frame in frames_iterator: 94 | BaseCamera.frame = frame 95 | BaseCamera.event.set() # send signal to clients 96 | time.sleep(0) 97 | 98 | # if there hasn't been any clients asking for frames in 99 | # the last 10 seconds then stop the thread 100 | if time.time() - BaseCamera.last_access > 10: 101 | frames_iterator.close() 102 | print('Stopping camera thread due to inactivity.') 103 | break 104 | BaseCamera.thread = None 105 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_age_gender/camera_opencv.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from base_camera import BaseCamera 3 | 4 | 5 | class Camera(BaseCamera): 6 | video_source = 0 7 | 8 | @staticmethod 9 | def set_video_source(source): 10 | Camera.video_source = source 11 | 12 | @staticmethod 13 | def frames(): 14 | camera = cv2.VideoCapture(Camera.video_source) 15 | if not camera.isOpened(): 16 | raise RuntimeError('Could not start camera.') 17 | 18 | while True: 19 | # read current frame 20 | _, img = camera.read() 21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 22 | 23 | # return img 24 | yield img 25 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_age_gender/camera_pi.py: -------------------------------------------------------------------------------- 1 | import io 2 | import time 3 | import picamera 4 | import picamera.array 5 | import cv2 6 | from base_camera import BaseCamera 7 | 8 | 9 | class Camera(BaseCamera): 10 | video_source = 0 11 | 12 | @staticmethod 13 | def set_video_source(source): 14 | pass 15 | 16 | @staticmethod 17 | def frames(): 18 | with picamera.PiCamera(resolution = (1280,720)) as camera: 19 | # let camera warm up 20 | time.sleep(2) 21 | 22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream: 23 | while True: 24 | 25 | camera.capture(stream, format='bgr', use_video_port=True) 26 | # At this point the image is available as stream.array 27 | image = stream.array 28 | stream.truncate(0) 29 | yield image 30 | 31 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_age_gender/multi_stage_file.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess 9 | from tflite_runtime.interpreter import Interpreter 10 | 11 | def process_age_gender(roi_img): 12 | 13 | ages = ['0-10', '11-20', '21-45', '46-60', '60-100'] 14 | genders = ['M', 'F'] 15 | 16 | results = second_stage_network.run(roi_img) 17 | age = np.argmax(results[0]) 18 | gender = 0 if results[1] < 0.5 else 1 19 | 20 | label = f'{ages[age]} : {genders[gender]}' 21 | 22 | return label 23 | 24 | class NetworkExecutor(object): 25 | 26 | def __init__(self, model_file): 27 | 28 | self.interpreter = Interpreter(model_file, num_threads=3) 29 | self.interpreter.allocate_tensors() 30 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 31 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 32 | 33 | def get_output_tensors(self): 34 | 35 | output_details = self.interpreter.get_output_details() 36 | tensor_indices = [] 37 | tensor_list = [] 38 | 39 | for output in output_details: 40 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 41 | tensor_list.append(tensor) 42 | 43 | return tensor_list 44 | 45 | def run(self, image): 46 | if image.shape[1:2] != (self.input_height, self.input_width): 47 | img = cv2.resize(image, (self.input_width, self.input_height)) 48 | img = preprocess(img) 49 | self.interpreter.set_tensor(self.tensor_index, img) 50 | self.interpreter.invoke() 51 | return self.get_output_tensors() 52 | 53 | def main(args): 54 | video, video_writer, frame_count = init_video_file_capture(args.file, 'age_gender_demo') 55 | 56 | frame_num = len(frame_count) 57 | times = [] 58 | 59 | for _ in tqdm(frame_count, desc='Processing frames'): 60 | frame_present, frame = video.read() 61 | if not frame_present: 62 | continue 63 | 64 | start_time = time.time() 65 | 66 | results = first_stage_network.run(frame) 67 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold) 68 | draw_bounding_boxes(frame, detections, None, process_age_gender) 69 | 70 | elapsed_ms = (time.time() - start_time) * 1000 71 | 72 | times.append(elapsed_ms) 73 | video_writer.write(frame) 74 | 75 | print('Finished processing frames') 76 | video.release(), video_writer.release() 77 | 78 | print("Average time(ms): ", sum(times)//frame_num) 79 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop 80 | 81 | if __name__ == "__main__" : 82 | 83 | print("OpenCV version: {}".format(cv2. __version__)) 84 | 85 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 86 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True) 87 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 88 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7) 89 | parser.add_argument('--file', help='File path of video file', required=True) 90 | args = parser.parse_args() 91 | 92 | first_stage_network = NetworkExecutor(args.first_stage) 93 | second_stage_network = NetworkExecutor(args.second_stage) 94 | 95 | main(args) 96 | 97 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_age_gender/multi_stage_stream.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | 7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes 8 | from tflite_runtime.interpreter import Interpreter 9 | from flask import Flask, render_template, request, Response 10 | 11 | app = Flask (__name__, static_url_path = '') 12 | 13 | def process_age_gender(roi_img): 14 | 15 | ages = ['0-10', '11-20', '21-45', '46-60', '60-100'] 16 | genders = ['M', 'F'] 17 | 18 | results = second_stage_network.run(roi_img) 19 | age = np.argmax(results[0]) 20 | gender = 0 if results[1] < 0.5 else 1 21 | 22 | label = f'{ages[age]} : {genders[gender]}' 23 | 24 | return label 25 | 26 | class NetworkExecutor(object): 27 | 28 | def __init__(self, model_file): 29 | 30 | self.interpreter = Interpreter(model_file, num_threads=3) 31 | self.interpreter.allocate_tensors() 32 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 33 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 34 | 35 | def get_output_tensors(self): 36 | 37 | output_details = self.interpreter.get_output_details() 38 | tensor_indices = [] 39 | tensor_list = [] 40 | 41 | for output in output_details: 42 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 43 | tensor_list.append(tensor) 44 | 45 | return tensor_list 46 | 47 | def run(self, image): 48 | if image.shape[1:2] != (self.input_height, self.input_width): 49 | img = cv2.resize(image, (self.input_width, self.input_height)) 50 | img = preprocess(img) 51 | self.interpreter.set_tensor(self.tensor_index, img) 52 | self.interpreter.invoke() 53 | return self.get_output_tensors() 54 | 55 | class Detector(NetworkExecutor): 56 | 57 | def __init__(self, label_file, model_file, threshold): 58 | super().__init__(model_file) 59 | self.threshold = float(threshold) 60 | 61 | def detect(self, frame): 62 | start_time = time.time() 63 | results = self.run(frame) 64 | elapsed_ms = (time.time() - start_time) * 1000 65 | 66 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = self.threshold) 67 | draw_bounding_boxes(frame, detections, None, process_age_gender) 68 | 69 | fps = 1 / elapsed_ms*1000 70 | print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms)) 71 | 72 | return cv2.imencode('.jpg', frame)[1].tobytes() 73 | 74 | @app.route("/") 75 | def index(): 76 | return render_template('index.html', name = None) 77 | 78 | def gen(camera): 79 | while True: 80 | frame = camera.get_frame() 81 | image = detector.detect(frame) 82 | yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n') 83 | 84 | @app.route('/video_feed') 85 | def video_feed(): 86 | return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame') 87 | 88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True) 90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 91 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.5) 92 | parser.add_argument('--source', help='picamera or cv', default='cv') 93 | args = parser.parse_args() 94 | 95 | if args.source == "cv": 96 | from camera_opencv import Camera 97 | source = 0 98 | elif args.source == "picamera": 99 | from camera_pi import Camera 100 | source = 0 101 | 102 | Camera.set_video_source(source) 103 | 104 | detector = Detector(None, args.first_stage, args.threshold) 105 | second_stage_network = NetworkExecutor(args.second_stage) 106 | 107 | if __name__ == "__main__" : 108 | app.run(host = '0.0.0.0', port = 5000, debug = True) 109 | 110 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_age_gender/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Video Streaming Demonstration 4 | 5 | 6 |

Tflite Multi-stage Inference Demo

7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_emotion/base_camera.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | try: 4 | from greenlet import getcurrent as get_ident 5 | except ImportError: 6 | try: 7 | from thread import get_ident 8 | except ImportError: 9 | from _thread import get_ident 10 | 11 | 12 | class CameraEvent(object): 13 | """An Event-like class that signals all active clients when a new frame is 14 | available. 15 | """ 16 | def __init__(self): 17 | self.events = {} 18 | 19 | def wait(self): 20 | """Invoked from each client's thread to wait for the next frame.""" 21 | ident = get_ident() 22 | if ident not in self.events: 23 | # this is a new client 24 | # add an entry for it in the self.events dict 25 | # each entry has two elements, a threading.Event() and a timestamp 26 | self.events[ident] = [threading.Event(), time.time()] 27 | return self.events[ident][0].wait() 28 | 29 | def set(self): 30 | """Invoked by the camera thread when a new frame is available.""" 31 | now = time.time() 32 | remove = None 33 | for ident, event in self.events.items(): 34 | if not event[0].isSet(): 35 | # if this client's event is not set, then set it 36 | # also update the last set timestamp to now 37 | event[0].set() 38 | event[1] = now 39 | else: 40 | # if the client's event is already set, it means the client 41 | # did not process a previous frame 42 | # if the event stays set for more than 5 seconds, then assume 43 | # the client is gone and remove it 44 | if now - event[1] > 5: 45 | remove = ident 46 | if remove: 47 | del self.events[remove] 48 | 49 | def clear(self): 50 | """Invoked from each client's thread after a frame was processed.""" 51 | self.events[get_ident()][0].clear() 52 | 53 | 54 | class BaseCamera(object): 55 | thread = None # background thread that reads frames from camera 56 | frame = None # current frame is stored here by background thread 57 | last_access = 0 # time of last client access to the camera 58 | event = CameraEvent() 59 | 60 | def __init__(self): 61 | """Start the background camera thread if it isn't running yet.""" 62 | if BaseCamera.thread is None: 63 | BaseCamera.last_access = time.time() 64 | 65 | # start background frame thread 66 | BaseCamera.thread = threading.Thread(target=self._thread) 67 | BaseCamera.thread.start() 68 | 69 | # wait until frames are available 70 | while self.get_frame() is None: 71 | time.sleep(0) 72 | 73 | def get_frame(self): 74 | """Return the current camera frame.""" 75 | BaseCamera.last_access = time.time() 76 | 77 | # wait for a signal from the camera thread 78 | BaseCamera.event.wait() 79 | BaseCamera.event.clear() 80 | 81 | return BaseCamera.frame 82 | 83 | @staticmethod 84 | def frames(): 85 | """"Generator that returns frames from the camera.""" 86 | raise RuntimeError('Must be implemented by subclasses.') 87 | 88 | @classmethod 89 | def _thread(cls): 90 | """Camera background thread.""" 91 | print('Starting camera thread.') 92 | frames_iterator = cls.frames() 93 | for frame in frames_iterator: 94 | BaseCamera.frame = frame 95 | BaseCamera.event.set() # send signal to clients 96 | time.sleep(0) 97 | 98 | # if there hasn't been any clients asking for frames in 99 | # the last 10 seconds then stop the thread 100 | if time.time() - BaseCamera.last_access > 10: 101 | frames_iterator.close() 102 | print('Stopping camera thread due to inactivity.') 103 | break 104 | BaseCamera.thread = None 105 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_emotion/camera_opencv.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from base_camera import BaseCamera 3 | 4 | 5 | class Camera(BaseCamera): 6 | video_source = 0 7 | 8 | @staticmethod 9 | def set_video_source(source): 10 | Camera.video_source = source 11 | 12 | @staticmethod 13 | def frames(): 14 | camera = cv2.VideoCapture(Camera.video_source) 15 | if not camera.isOpened(): 16 | raise RuntimeError('Could not start camera.') 17 | 18 | while True: 19 | # read current frame 20 | _, img = camera.read() 21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 22 | 23 | # return img 24 | yield img 25 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_emotion/camera_pi.py: -------------------------------------------------------------------------------- 1 | import io 2 | import time 3 | import picamera 4 | import picamera.array 5 | import cv2 6 | from base_camera import BaseCamera 7 | 8 | 9 | class Camera(BaseCamera): 10 | video_source = 0 11 | 12 | @staticmethod 13 | def set_video_source(source): 14 | pass 15 | 16 | @staticmethod 17 | def frames(): 18 | with picamera.PiCamera(resolution = (1280,720)) as camera: 19 | # let camera warm up 20 | time.sleep(2) 21 | 22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream: 23 | while True: 24 | 25 | camera.capture(stream, format='bgr', use_video_port=True) 26 | # At this point the image is available as stream.array 27 | image = stream.array 28 | stream.truncate(0) 29 | yield image 30 | 31 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_emotion/multi_stage_file.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess 9 | from tflite_runtime.interpreter import Interpreter 10 | 11 | def process_face_expression(roi_img): 12 | 13 | emotion_list = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown'] 14 | 15 | results = np.squeeze(second_stage_network.run(roi_img)) 16 | emotion_idx = np.argmax(results) 17 | emotion_confience = np.max(results) 18 | 19 | label = f'{emotion_list[emotion_idx]} {emotion_confience:.4f}%' 20 | 21 | return label 22 | 23 | class NetworkExecutor(object): 24 | 25 | def __init__(self, model_file): 26 | 27 | self.interpreter = Interpreter(model_file, num_threads=3) 28 | self.interpreter.allocate_tensors() 29 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 30 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 31 | 32 | def get_output_tensors(self): 33 | 34 | output_details = self.interpreter.get_output_details() 35 | tensor_indices = [] 36 | tensor_list = [] 37 | 38 | for output in output_details: 39 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 40 | tensor_list.append(tensor) 41 | 42 | return tensor_list 43 | 44 | def run(self, image): 45 | if image.shape[1:2] != (self.input_height, self.input_width): 46 | img = cv2.resize(image, (self.input_width, self.input_height)) 47 | img = preprocess(img) 48 | self.interpreter.set_tensor(self.tensor_index, img) 49 | self.interpreter.invoke() 50 | return self.get_output_tensors() 51 | 52 | def main(args): 53 | video, video_writer, frame_count = init_video_file_capture(args.file, 'emotion_demo') 54 | 55 | frame_num = len(frame_count) 56 | times = [] 57 | 58 | for _ in tqdm(frame_count, desc='Processing frames'): 59 | frame_present, frame = video.read() 60 | if not frame_present: 61 | continue 62 | 63 | start_time = time.time() 64 | 65 | results = first_stage_network.run(frame) 66 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold, anchors = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]], 67 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]]) 68 | draw_bounding_boxes(frame, detections, None, process_face_expression) 69 | 70 | elapsed_ms = (time.time() - start_time) * 1000 71 | 72 | times.append(elapsed_ms) 73 | video_writer.write(frame) 74 | 75 | print('Finished processing frames') 76 | video.release(), video_writer.release() 77 | 78 | print("Average time(ms): ", sum(times)//frame_num) 79 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop 80 | 81 | if __name__ == "__main__" : 82 | 83 | print("OpenCV version: {}".format(cv2. __version__)) 84 | 85 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 86 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True) 87 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 88 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7) 89 | parser.add_argument('--file', help='File path of video file', required=True) 90 | args = parser.parse_args() 91 | 92 | first_stage_network = NetworkExecutor(args.first_stage) 93 | second_stage_network = NetworkExecutor(args.second_stage) 94 | 95 | main(args) 96 | 97 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_emotion/multi_stage_stream.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | 7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes 8 | from tflite_runtime.interpreter import Interpreter 9 | from flask import Flask, render_template, request, Response 10 | 11 | app = Flask (__name__, static_url_path = '') 12 | 13 | def process_face_expression(roi_img): 14 | 15 | emotion_list = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt', 'unknown'] 16 | 17 | results = np.squeeze(second_stage_network.run(roi_img)) 18 | emotion_idx = np.argmax(results) 19 | emotion_confience = np.max(results) 20 | 21 | label = f'{emotion_list[emotion_idx]} {emotion_confience:.4f}%' 22 | 23 | return label 24 | 25 | class NetworkExecutor(object): 26 | 27 | def __init__(self, model_file): 28 | 29 | self.interpreter = Interpreter(model_file, num_threads=3) 30 | self.interpreter.allocate_tensors() 31 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 32 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 33 | 34 | def get_output_tensors(self): 35 | 36 | output_details = self.interpreter.get_output_details() 37 | tensor_indices = [] 38 | tensor_list = [] 39 | 40 | for output in output_details: 41 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 42 | tensor_list.append(tensor) 43 | 44 | return tensor_list 45 | 46 | def run(self, image): 47 | if image.shape[1:2] != (self.input_height, self.input_width): 48 | img = cv2.resize(image, (self.input_width, self.input_height)) 49 | img = preprocess(img) 50 | self.interpreter.set_tensor(self.tensor_index, img) 51 | self.interpreter.invoke() 52 | return self.get_output_tensors() 53 | 54 | class Detector(NetworkExecutor): 55 | 56 | def __init__(self, label_file, model_file, threshold): 57 | super().__init__(model_file) 58 | self.threshold = float(threshold) 59 | 60 | def detect(self, frame): 61 | start_time = time.time() 62 | results = self.run(frame) 63 | elapsed_ms = (time.time() - start_time) * 1000 64 | 65 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold, anchors = [[[0.51424575, 0.54116074], [0.29523918, 0.45838044], [0.21371929, 0.21518053]], 66 | [[0.10255913, 0.42572159], [0.05785894, 0.17925645], [0.01839256, 0.07238193]]]) 67 | draw_bounding_boxes(frame, detections, None, process_face_expression) 68 | 69 | fps = 1 / elapsed_ms*1000 70 | print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms)) 71 | 72 | return cv2.imencode('.jpg', frame)[1].tobytes() 73 | 74 | @app.route("/") 75 | def index(): 76 | return render_template('index.html', name = None) 77 | 78 | def gen(camera): 79 | while True: 80 | frame = camera.get_frame() 81 | image = detector.detect(frame) 82 | yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n') 83 | 84 | @app.route('/video_feed') 85 | def video_feed(): 86 | return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame') 87 | 88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True) 90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 91 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.8) 92 | parser.add_argument('--source', help='picamera or cv', default='cv') 93 | args = parser.parse_args() 94 | 95 | if args.source == "cv": 96 | from camera_opencv import Camera 97 | source = 0 98 | elif args.source == "picamera": 99 | from camera_pi import Camera 100 | source = 0 101 | 102 | Camera.set_video_source(source) 103 | 104 | detector = Detector(None, args.first_stage, args.threshold) 105 | second_stage_network = NetworkExecutor(args.second_stage) 106 | 107 | if __name__ == "__main__" : 108 | app.run(host = '0.0.0.0', port = 5000, debug = True) 109 | 110 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_emotion/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Video Streaming Demonstration 4 | 5 | 6 |

Tflite Multi-stage Inference Demo

7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/base_camera.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | try: 4 | from greenlet import getcurrent as get_ident 5 | except ImportError: 6 | try: 7 | from thread import get_ident 8 | except ImportError: 9 | from _thread import get_ident 10 | 11 | 12 | class CameraEvent(object): 13 | """An Event-like class that signals all active clients when a new frame is 14 | available. 15 | """ 16 | def __init__(self): 17 | self.events = {} 18 | 19 | def wait(self): 20 | """Invoked from each client's thread to wait for the next frame.""" 21 | ident = get_ident() 22 | if ident not in self.events: 23 | # this is a new client 24 | # add an entry for it in the self.events dict 25 | # each entry has two elements, a threading.Event() and a timestamp 26 | self.events[ident] = [threading.Event(), time.time()] 27 | return self.events[ident][0].wait() 28 | 29 | def set(self): 30 | """Invoked by the camera thread when a new frame is available.""" 31 | now = time.time() 32 | remove = None 33 | for ident, event in self.events.items(): 34 | if not event[0].isSet(): 35 | # if this client's event is not set, then set it 36 | # also update the last set timestamp to now 37 | event[0].set() 38 | event[1] = now 39 | else: 40 | # if the client's event is already set, it means the client 41 | # did not process a previous frame 42 | # if the event stays set for more than 5 seconds, then assume 43 | # the client is gone and remove it 44 | if now - event[1] > 5: 45 | remove = ident 46 | if remove: 47 | del self.events[remove] 48 | 49 | def clear(self): 50 | """Invoked from each client's thread after a frame was processed.""" 51 | self.events[get_ident()][0].clear() 52 | 53 | 54 | class BaseCamera(object): 55 | thread = None # background thread that reads frames from camera 56 | frame = None # current frame is stored here by background thread 57 | last_access = 0 # time of last client access to the camera 58 | event = CameraEvent() 59 | 60 | def __init__(self): 61 | """Start the background camera thread if it isn't running yet.""" 62 | if BaseCamera.thread is None: 63 | BaseCamera.last_access = time.time() 64 | 65 | # start background frame thread 66 | BaseCamera.thread = threading.Thread(target=self._thread) 67 | BaseCamera.thread.start() 68 | 69 | # wait until frames are available 70 | while self.get_frame() is None: 71 | time.sleep(0) 72 | 73 | def get_frame(self): 74 | """Return the current camera frame.""" 75 | BaseCamera.last_access = time.time() 76 | 77 | # wait for a signal from the camera thread 78 | BaseCamera.event.wait() 79 | BaseCamera.event.clear() 80 | 81 | return BaseCamera.frame 82 | 83 | @staticmethod 84 | def frames(): 85 | """"Generator that returns frames from the camera.""" 86 | raise RuntimeError('Must be implemented by subclasses.') 87 | 88 | @classmethod 89 | def _thread(cls): 90 | """Camera background thread.""" 91 | print('Starting camera thread.') 92 | frames_iterator = cls.frames() 93 | for frame in frames_iterator: 94 | BaseCamera.frame = frame 95 | BaseCamera.event.set() # send signal to clients 96 | time.sleep(0) 97 | 98 | # if there hasn't been any clients asking for frames in 99 | # the last 10 seconds then stop the thread 100 | if time.time() - BaseCamera.last_access > 10: 101 | frames_iterator.close() 102 | print('Stopping camera thread due to inactivity.') 103 | break 104 | BaseCamera.thread = None 105 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/camera_opencv.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from base_camera import BaseCamera 3 | 4 | 5 | class Camera(BaseCamera): 6 | video_source = 0 7 | 8 | @staticmethod 9 | def set_video_source(source): 10 | Camera.video_source = source 11 | 12 | @staticmethod 13 | def frames(): 14 | camera = cv2.VideoCapture(Camera.video_source) 15 | if not camera.isOpened(): 16 | raise RuntimeError('Could not start camera.') 17 | 18 | while True: 19 | # read current frame 20 | _, img = camera.read() 21 | #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 22 | 23 | # return img 24 | yield img 25 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/camera_pi.py: -------------------------------------------------------------------------------- 1 | import io 2 | import time 3 | import picamera 4 | import picamera.array 5 | import cv2 6 | from base_camera import BaseCamera 7 | 8 | 9 | class Camera(BaseCamera): 10 | video_source = 0 11 | 12 | @staticmethod 13 | def set_video_source(source): 14 | pass 15 | 16 | @staticmethod 17 | def frames(): 18 | with picamera.PiCamera(resolution = (1280,720)) as camera: 19 | # let camera warm up 20 | time.sleep(2) 21 | 22 | with picamera.array.PiRGBArray(camera, size=(1280,720)) as stream: 23 | while True: 24 | 25 | camera.capture(stream, format='bgr', use_video_port=True) 26 | # At this point the image is available as stream.array 27 | image = stream.array 28 | stream.truncate(0) 29 | yield image 30 | 31 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/labels.txt: -------------------------------------------------------------------------------- 1 | AM-General-Hummer-SUV-2000 2 | Acura-Integra-Type-R-2001 3 | Acura-RL-Sedan-2012 4 | Acura-TL-Sedan-2012 5 | Acura-TL-Type-S-2008 6 | Acura-TSX-Sedan-2012 7 | Acura-ZDX-Hatchback-2012 8 | Aston-Martin-V8-Vantage-Convertible-2012 9 | Aston-Martin-V8-Vantage-Coupe-2012 10 | Aston-Martin-Virage-Convertible-2012 11 | Aston-Martin-Virage-Coupe-2012 12 | Audi-100-Sedan-1994 13 | Audi-100-Wagon-1994 14 | Audi-A5-Coupe-2012 15 | Audi-R8-Coupe-2012 16 | Audi-RS-4-Convertible-2008 17 | Audi-S4-Sedan-2007 18 | Audi-S4-Sedan-2012 19 | Audi-S5-Convertible-2012 20 | Audi-S5-Coupe-2012 21 | Audi-S6-Sedan-2011 22 | Audi-TT-Hatchback-2011 23 | Audi-TT-RS-Coupe-2012 24 | Audi-TTS-Coupe-2012 25 | Audi-V8-Sedan-1994 26 | BMW-1-Series-Convertible-2012 27 | BMW-1-Series-Coupe-2012 28 | BMW-3-Series-Sedan-2012 29 | BMW-3-Series-Wagon-2012 30 | BMW-6-Series-Convertible-2007 31 | BMW-ActiveHybrid-5-Sedan-2012 32 | BMW-M3-Coupe-2012 33 | BMW-M5-Sedan-2010 34 | BMW-M6-Convertible-2010 35 | BMW-X3-SUV-2012 36 | BMW-X5-SUV-2007 37 | BMW-X6-SUV-2012 38 | BMW-Z4-Convertible-2012 39 | Bentley-Arnage-Sedan-2009 40 | Bentley-Continental-Flying-Spur-Sedan-2007 41 | Bentley-Continental-GT-Coupe-2007 42 | Bentley-Continental-GT-Coupe-2012 43 | Bentley-Continental-Supersports-Conv.-Convertible-2012 44 | Bentley-Mulsanne-Sedan-2011 45 | Bugatti-Veyron-16.4-Convertible-2009 46 | Bugatti-Veyron-16.4-Coupe-2009 47 | Buick-Enclave-SUV-2012 48 | Buick-Rainier-SUV-2007 49 | Buick-Regal-GS-2012 50 | Buick-Verano-Sedan-2012 51 | Cadillac-CTS-V-Sedan-2012 52 | Cadillac-Escalade-EXT-Crew-Cab-2007 53 | Cadillac-SRX-SUV-2012 54 | Chevrolet-Avalanche-Crew-Cab-2012 55 | Chevrolet-Camaro-Convertible-2012 56 | Chevrolet-Cobalt-SS-2010 57 | Chevrolet-Corvette-Convertible-2012 58 | Chevrolet-Corvette-Ron-Fellows-Edition-Z06-2007 59 | Chevrolet-Corvette-ZR1-2012 60 | Chevrolet-Express-Cargo-Van-2007 61 | Chevrolet-Express-Van-2007 62 | Chevrolet-HHR-SS-2010 63 | Chevrolet-Impala-Sedan-2007 64 | Chevrolet-Malibu-Hybrid-Sedan-2010 65 | Chevrolet-Malibu-Sedan-2007 66 | Chevrolet-Monte-Carlo-Coupe-2007 67 | Chevrolet-Silverado-1500-Classic-Extended-Cab-2007 68 | Chevrolet-Silverado-1500-Extended-Cab-2012 69 | Chevrolet-Silverado-1500-Hybrid-Crew-Cab-2012 70 | Chevrolet-Silverado-1500-Regular-Cab-2012 71 | Chevrolet-Silverado-2500HD-Regular-Cab-2012 72 | Chevrolet-Sonic-Sedan-2012 73 | Chevrolet-Tahoe-Hybrid-SUV-2012 74 | Chevrolet-TrailBlazer-SS-2009 75 | Chevrolet-Traverse-SUV-2012 76 | Chrysler-300-SRT-8-2010 77 | Chrysler-Aspen-SUV-2009 78 | Chrysler-Crossfire-Convertible-2008 79 | Chrysler-PT-Cruiser-Convertible-2008 80 | Chrysler-Sebring-Convertible-2010 81 | Chrysler-Town-and-Country-Minivan-2012 82 | Daewoo-Nubira-Wagon-2002 83 | Dodge-Caliber-Wagon-2007 84 | Dodge-Caliber-Wagon-2012 85 | Dodge-Caravan-Minivan-1997 86 | Dodge-Challenger-SRT8-2011 87 | Dodge-Charger-SRT-8-2009 88 | Dodge-Charger-Sedan-2012 89 | Dodge-Dakota-Club-Cab-2007 90 | Dodge-Dakota-Crew-Cab-2010 91 | Dodge-Durango-SUV-2007 92 | Dodge-Durango-SUV-2012 93 | Dodge-Journey-SUV-2012 94 | Dodge-Magnum-Wagon-2008 95 | Dodge-Ram-Pickup-3500-Crew-Cab-2010 96 | Dodge-Ram-Pickup-3500-Quad-Cab-2009 97 | Dodge-Sprinter-Cargo-Van-2009 98 | Eagle-Talon-Hatchback-1998 99 | FIAT-500-Abarth-2012 100 | FIAT-500-Convertible-2012 101 | Ferrari-458-Italia-Convertible-2012 102 | Ferrari-458-Italia-Coupe-2012 103 | Ferrari-California-Convertible-2012 104 | Ferrari-FF-Coupe-2012 105 | Fisker-Karma-Sedan-2012 106 | Ford-E-Series-Wagon-Van-2012 107 | Ford-Edge-SUV-2012 108 | Ford-Expedition-EL-SUV-2009 109 | Ford-F-150-Regular-Cab-2007 110 | Ford-F-150-Regular-Cab-2012 111 | Ford-F-450-Super-Duty-Crew-Cab-2012 112 | Ford-Fiesta-Sedan-2012 113 | Ford-Focus-Sedan-2007 114 | Ford-Freestar-Minivan-2007 115 | Ford-GT-Coupe-2006 116 | Ford-Mustang-Convertible-2007 117 | Ford-Ranger-SuperCab-2011 118 | GMC-Acadia-SUV-2012 119 | GMC-Canyon-Extended-Cab-2012 120 | GMC-Savana-Van-2012 121 | GMC-Terrain-SUV-2012 122 | GMC-Yukon-Hybrid-SUV-2012 123 | Geo-Metro-Convertible-1993 124 | HUMMER-H2-SUT-Crew-Cab-2009 125 | HUMMER-H3T-Crew-Cab-2010 126 | Honda-Accord-Coupe-2012 127 | Honda-Accord-Sedan-2012 128 | Honda-Odyssey-Minivan-2007 129 | Honda-Odyssey-Minivan-2012 130 | Hyundai-Accent-Sedan-2012 131 | Hyundai-Azera-Sedan-2012 132 | Hyundai-Elantra-Sedan-2007 133 | Hyundai-Elantra-Touring-Hatchback-2012 134 | Hyundai-Genesis-Sedan-2012 135 | Hyundai-Santa-Fe-SUV-2012 136 | Hyundai-Sonata-Hybrid-Sedan-2012 137 | Hyundai-Sonata-Sedan-2012 138 | Hyundai-Tucson-SUV-2012 139 | Hyundai-Veloster-Hatchback-2012 140 | Hyundai-Veracruz-SUV-2012 141 | Infiniti-G-Coupe-IPL-2012 142 | Infiniti-QX56-SUV-2011 143 | Isuzu-Ascender-SUV-2008 144 | Jaguar-XK-XKR-2012 145 | Jeep-Compass-SUV-2012 146 | Jeep-Grand-Cherokee-SUV-2012 147 | Jeep-Liberty-SUV-2012 148 | Jeep-Patriot-SUV-2012 149 | Jeep-Wrangler-SUV-2012 150 | Lamborghini-Aventador-Coupe-2012 151 | Lamborghini-Diablo-Coupe-2001 152 | Lamborghini-Gallardo-LP-570-4-Superleggera-2012 153 | Lamborghini-Reventon-Coupe-2008 154 | Land-Rover-LR2-SUV-2012 155 | Land-Rover-Range-Rover-SUV-2012 156 | Lincoln-Town-Car-Sedan-2011 157 | MINI-Cooper-Roadster-Convertible-2012 158 | Maybach-Landaulet-Convertible-2012 159 | Mazda-Tribute-SUV-2011 160 | McLaren-MP4-12C-Coupe-2012 161 | Mercedes-Benz-300-Class-Convertible-1993 162 | Mercedes-Benz-C-Class-Sedan-2012 163 | Mercedes-Benz-E-Class-Sedan-2012 164 | Mercedes-Benz-S-Class-Sedan-2012 165 | Mercedes-Benz-SL-Class-Coupe-2009 166 | Mercedes-Benz-Sprinter-Van-2012 167 | Mitsubishi-Lancer-Sedan-2012 168 | Nissan-240SX-Coupe-1998 169 | Nissan-Juke-Hatchback-2012 170 | Nissan-Leaf-Hatchback-2012 171 | Nissan-NV-Passenger-Van-2012 172 | Plymouth-Neon-Coupe-1999 173 | Porsche-Panamera-Sedan-2012 174 | Ram-C-V-Cargo-Van-Minivan-2012 175 | Rolls-Royce-Ghost-Sedan-2012 176 | Rolls-Royce-Phantom-Drophead-Coupe-Convertible-2012 177 | Rolls-Royce-Phantom-Sedan-2012 178 | Scion-xD-Hatchback-2012 179 | Spyker-C8-Convertible-2009 180 | Spyker-C8-Coupe-2009 181 | Suzuki-Aerio-Sedan-2007 182 | Suzuki-Kizashi-Sedan-2012 183 | Suzuki-SX4-Hatchback-2012 184 | Suzuki-SX4-Sedan-2012 185 | Tesla-Model-S-Sedan-2012 186 | Toyota-4Runner-SUV-2012 187 | Toyota-Camry-Sedan-2012 188 | Toyota-Corolla-Sedan-2012 189 | Toyota-Sequoia-SUV-2012 190 | Volkswagen-Beetle-Hatchback-2012 191 | Volkswagen-Golf-Hatchback-1991 192 | Volkswagen-Golf-Hatchback-2012 193 | Volvo-240-Sedan-1993 194 | Volvo-C30-Hatchback-2012 195 | Volvo-XC90-SUV-2007 196 | smart-fortwo-Convertible-2012 -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/multi_stage_file.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | from tqdm import tqdm 7 | 8 | from cv_utils import init_video_file_capture, decode_yolov3, decode_classifier, draw_classification, draw_bounding_boxes, preprocess 9 | from tflite_runtime.interpreter import Interpreter 10 | 11 | def load_labels(path): 12 | with open(path, 'r') as f: 13 | return {i: line.strip() for i, line in enumerate(f.readlines())} 14 | 15 | def process_vehicle_type(roi_img): 16 | 17 | results = second_stage_network.run(roi_img) 18 | vehicle_type = np.argmax(results[0]) 19 | confidence = np.max(results[0]) 20 | label = f'{labels[vehicle_type]} : {confidence}' 21 | 22 | return label 23 | 24 | class NetworkExecutor(object): 25 | 26 | def __init__(self, model_file, num_threads=3): 27 | 28 | self.interpreter = Interpreter(model_file, num_threads=num_threads) 29 | self.interpreter.allocate_tensors() 30 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 31 | print(self.input_height, self.input_width) 32 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 33 | 34 | def get_output_tensors(self): 35 | 36 | output_details = self.interpreter.get_output_details() 37 | tensor_indices = [] 38 | tensor_list = [] 39 | 40 | for output in output_details: 41 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 42 | tensor_list.append(tensor) 43 | 44 | return tensor_list 45 | 46 | def run(self, image): 47 | if image.shape[1:2] != (self.input_height, self.input_width): 48 | img = cv2.resize(image, (self.input_width, self.input_height)) 49 | img = preprocess(img) 50 | self.interpreter.set_tensor(self.tensor_index, img) 51 | self.interpreter.invoke() 52 | return self.get_output_tensors() 53 | 54 | def main(args): 55 | video, video_writer, frame_count = init_video_file_capture(args.file, 'vehicle_type_demo') 56 | 57 | frame_num = len(frame_count) 58 | times = [] 59 | 60 | for _ in tqdm(frame_count, desc='Processing frames'): 61 | frame_present, frame = video.read() 62 | if not frame_present: 63 | continue 64 | 65 | start_time = time.time() 66 | 67 | results = first_stage_network.run(frame) 68 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = args.threshold) 69 | draw_bounding_boxes(frame, detections, None, process_vehicle_type) 70 | 71 | elapsed_ms = (time.time() - start_time) * 1000 72 | 73 | times.append(elapsed_ms) 74 | video_writer.write(frame) 75 | 76 | print('Finished processing frames') 77 | video.release(), video_writer.release() 78 | 79 | print("Average time(ms): ", sum(times)//frame_num) 80 | print("FPS: ", 1000.0 / (sum(times)//frame_num)) # FPS = 1 / time to process loop 81 | 82 | if __name__ == "__main__" : 83 | 84 | print("OpenCV version: {}".format(cv2. __version__)) 85 | 86 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 87 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True) 88 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 89 | parser.add_argument('--labels', nargs="+", help='File path of labels file.', required=True) 90 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.7) 91 | parser.add_argument('--file', help='File path of video file', required=True) 92 | args = parser.parse_args() 93 | 94 | first_stage_network = NetworkExecutor(args.first_stage, num_threads=2) 95 | second_stage_network = NetworkExecutor(args.second_stage, num_threads=2) 96 | 97 | if not os.path.exists(args.labels[0]): 98 | labels = args.labels 99 | else: 100 | labels = load_labels(args.labels[0]) 101 | 102 | main(args) 103 | 104 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/multi_stage_stream.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import os 4 | import cv2 5 | import numpy as np 6 | 7 | from cv_utils import decode_yolov3, preprocess, draw_bounding_boxes 8 | from tflite_runtime.interpreter import Interpreter 9 | from flask import Flask, render_template, request, Response 10 | 11 | app = Flask (__name__, static_url_path = '') 12 | 13 | def load_labels(path): 14 | with open(path, 'r') as f: 15 | return {i: line.strip() for i, line in enumerate(f.readlines())} 16 | 17 | def process_vehicle_type(roi_img): 18 | 19 | results = second_stage_network.run(roi_img) 20 | vehicle_type = np.argmax(results[0]) 21 | confidence = np.max(results[0]) 22 | label = f'{labels[vehicle_type]} : {confidence}' 23 | 24 | return label 25 | 26 | class NetworkExecutor(object): 27 | 28 | def __init__(self, model_file, num_threads=3): 29 | 30 | self.interpreter = Interpreter(model_file, num_threads=num_threads) 31 | self.interpreter.allocate_tensors() 32 | _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape'] 33 | self.tensor_index = self.interpreter.get_input_details()[0]['index'] 34 | 35 | def get_output_tensors(self): 36 | 37 | output_details = self.interpreter.get_output_details() 38 | tensor_indices = [] 39 | tensor_list = [] 40 | 41 | for output in output_details: 42 | tensor = np.squeeze(self.interpreter.get_tensor(output['index'])) 43 | tensor_list.append(tensor) 44 | 45 | return tensor_list 46 | 47 | def run(self, image): 48 | if image.shape[1:2] != (self.input_height, self.input_width): 49 | img = cv2.resize(image, (self.input_width, self.input_height)) 50 | img = preprocess(img) 51 | self.interpreter.set_tensor(self.tensor_index, img) 52 | self.interpreter.invoke() 53 | return self.get_output_tensors() 54 | 55 | class Detector(NetworkExecutor): 56 | 57 | def __init__(self, label_file, model_file, threshold): 58 | super().__init__(model_file) 59 | self.threshold = float(threshold) 60 | 61 | def detect(self, frame): 62 | start_time = time.time() 63 | results = self.run(frame) 64 | elapsed_ms = (time.time() - start_time) * 1000 65 | 66 | detections = decode_yolov3(netout = results, nms_threshold = 0.1, threshold = self.threshold) 67 | draw_bounding_boxes(frame, detections, None, process_vehicle_type) 68 | 69 | fps = 1 / elapsed_ms*1000 70 | print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".format(fps, elapsed_ms)) 71 | 72 | return cv2.imencode('.jpg', frame)[1].tobytes() 73 | 74 | @app.route("/") 75 | def index(): 76 | return render_template('index.html', name = None) 77 | 78 | def gen(camera): 79 | while True: 80 | frame = camera.get_frame() 81 | image = detector.detect(frame) 82 | yield (b'--frame\r\n'+b'Content-Type: image/jpeg\r\n\r\n' + image + b'\r\n') 83 | 84 | @app.route('/video_feed') 85 | def video_feed(): 86 | return Response(gen(Camera()), mimetype='multipart/x-mixed-replace; boundary=frame') 87 | 88 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 89 | parser.add_argument('--first_stage', help='File path of .tflite file.', required=True) 90 | parser.add_argument('--second_stage', help='File path of .tflite file.', required=True) 91 | parser.add_argument('--labels', nargs="+", help='File path of labels file.', required=True) 92 | parser.add_argument('--threshold', help='Confidence threshold.', default=0.9) 93 | parser.add_argument('--source', help='picamera or cv', default='cv') 94 | args = parser.parse_args() 95 | 96 | if args.source == "cv": 97 | from camera_opencv import Camera 98 | source = 0 99 | elif args.source == "picamera": 100 | from camera_pi import Camera 101 | source = 0 102 | 103 | Camera.set_video_source(source) 104 | 105 | detector = Detector(None, args.first_stage, args.threshold) 106 | second_stage_network = NetworkExecutor(args.second_stage) 107 | 108 | if not os.path.exists(args.labels[0]): 109 | labels = args.labels 110 | else: 111 | labels = load_labels(args.labels[0]) 112 | 113 | if __name__ == "__main__" : 114 | app.run(host = '0.0.0.0', port = 5000, debug = True) 115 | 116 | -------------------------------------------------------------------------------- /examples/tensorflow_lite/multi_stage_inference_vehicle_type/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Video Streaming Demonstration 4 | 5 | 6 |

Tflite Multi-stage Inference Demo

7 | 8 | 9 | 10 | --------------------------------------------------------------------------------