├── .bandit ├── .dockerignore ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── api ├── __init__.py ├── metadata.py └── predict.py ├── app.py ├── config.py ├── core ├── __init__.py ├── model.py └── tf_pose │ ├── __init__.py │ ├── common.py │ ├── estimator.py │ ├── networks.py │ ├── pafprocess │ ├── README.md │ ├── __init__.py │ ├── numpy.i │ ├── pafprocess.cpp │ ├── pafprocess.h │ ├── pafprocess.i │ ├── pafprocess.py │ ├── pafprocess_wrap.cpp │ └── setup.py │ ├── pystopwatch.py │ ├── slidingwindow │ ├── ArrayUtils.py │ ├── Batching.py │ ├── Merging.py │ ├── RectangleUtils.py │ ├── SlidingWindow.py │ ├── WindowDistance.py │ └── __init__.py │ └── tensblur │ ├── __init__.py │ └── smoother.py ├── docs ├── deploy-max-to-ibm-cloud-with-kubernetes-button.png ├── pose-lines.png └── swagger-screenshot.png ├── max-human-pose-estimator.yaml ├── requirements-test.txt ├── requirements.txt ├── samples ├── IBM.jpeg ├── Pilots.jpg ├── Pilots.png ├── Pilots.tiff ├── README.md └── demo.ipynb ├── sha512sums.txt └── tests └── test.py /.bandit: -------------------------------------------------------------------------------- 1 | [bandit] 2 | exclude: /tests,/training 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | README.* 3 | .git/ 4 | .idea/ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | .idea/ 106 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.6 4 | services: 5 | - docker 6 | install: 7 | - docker build -t max-human-pose-estimator . 8 | - docker run -it -d --rm -p 5000:5000 max-human-pose-estimator 9 | - pip install -r requirements-test.txt 10 | before_script: 11 | - flake8 . --max-line-length=127 12 | - bandit -r . 13 | - sleep 30 14 | script: 15 | - pytest tests/test.py 16 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | FROM quay.io/codait/max-base:v1.4.0 18 | 19 | # Fill in these with a link to the bucket containing the model and the model file name 20 | ARG model_bucket=https://codait-cos-max.s3.us.cloud-object-storage.appdomain.cloud/max-human-pose-estimator/1.0.0 21 | ARG model_file=assets.tar.gz 22 | 23 | RUN sudo apt-get update && sudo apt-get install -y gcc swig libgtk2.0 \ 24 | && sudo apt-get install --reinstall -y build-essential && sudo rm -rf /var/lib/apt/lists/* 25 | 26 | RUN wget -nv --show-progress --progress=bar:force:noscroll ${model_bucket}/${model_file} --output-document=assets/${model_file} && \ 27 | tar -x -C assets/ -f assets/${model_file} -v && rm assets/${model_file} 28 | 29 | COPY requirements.txt . 30 | RUN pip install -r requirements.txt 31 | 32 | COPY . . 33 | 34 | # check file integrity 35 | RUN sha512sum -c sha512sums.txt 36 | 37 | RUN cd core/tf_pose/pafprocess/ && sudo swig -python -c++ pafprocess.i && sudo /opt/conda/bin/python setup.py build_ext --inplace 38 | 39 | EXPOSE 5000 40 | 41 | CMD python app.py 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/IBM/MAX-Human-Pose-Estimator.svg?branch=master)](https://travis-ci.com/IBM/MAX-Human-Pose-Estimator) [![Website Status](https://img.shields.io/website/http/max-human-pose-estimator.codait-prod-41208c73af8fca213512856c7a09db52-0000.us-east.containers.appdomain.cloud/swagger.json.svg?label=api+demo)](http://max-human-pose-estimator.codait-prod-41208c73af8fca213512856c7a09db52-0000.us-east.containers.appdomain.cloud) 2 | 3 | [](http://ibm.biz/max-to-ibm-cloud-tutorial) 4 | 5 | # IBM Developer Model Asset Exchange: Human Pose Estimator 6 | 7 | The Human Pose Estimator model detects humans and their poses in a given image. The model first detects the humans in 8 | the input image and then identifies the body parts, including nose, neck, eyes, shoulders, elbows, wrists, hips, knees, 9 | and ankles. Next, each pair of associated body parts is connected by a "pose line"; for example, as the following image 10 | shows, a line may connect the left eye to the nose, while another may connect the nose to the neck. 11 | 12 | ![Pose Line Example](docs/pose-lines.png) 13 | 14 | Each pose line is represented by a list [x1, y1, x2, y2], where the first pair of coordinates (x1, y1) is the start 15 | point of the line for one body part, while the second pair of coordinates (x2, y2) is the end point of the line for the 16 | other associated body part. The pose lines are assembled into full body poses for each of the humans detected in the 17 | image. 18 | 19 | The model is based on the open source [TF implementation of OpenPose model](https://github.com/ildoonet/tf-pose-estimation). 20 | The code in this repository deploys the model as a web service in a Docker container. This repository was developed 21 | as part of the [IBM Developer Model Asset Exchange](https://developer.ibm.com/exchanges/models/). 22 | 23 | ## Model Metadata 24 | | Domain | Application | Industry | Framework | Training Data | Input Data Format | 25 | | ------------- | -------- | -------- | --------- | --------- | -------------- | 26 | | Vision | Pose Estimation | General | TensorFlow | [COCO](http://cocodataset.org/#home) | Image(RGB) | 27 | 28 | ## References 29 | 30 | * _Zhe Cao, Tomas Simon, Shih-En Wei, Yaser Sheikh,_ ["Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields"](https://arxiv.org/abs/1611.08050), CVPR 2017. 31 | * [TF-Pose-Estimation Github Repository](https://github.com/ildoonet/tf-pose-estimation) 32 | 33 | ## Licenses 34 | 35 | | Component | License | Link | 36 | | ------------- | -------- | -------- | 37 | | This repository | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) | [LICENSE](LICENSE) | 38 | | Model Weights (MobileNet only) | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) | [LICENSE](https://github.com/ildoonet/tf-pose-estimation/blob/master/LICENSE) | 39 | | Model Code (3rd party) | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) | [LICENSE](https://github.com/ildoonet/tf-pose-estimation/blob/master/LICENSE) | 40 | | Test Samples | Various | [Samples README](samples/README.md) | 41 | 42 | ## Pre-requisites: 43 | 44 | * `docker`: The [Docker](https://www.docker.com/) command-line interface. Follow the [installation instructions](https://docs.docker.com/install/) for your system. 45 | * The minimum recommended resources for this model is 2GB Memory and 1 CPU. 46 | * If you are on x86-64/AMD64, your CPU must support [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) at the minimum. 47 | 48 | # Deployment options 49 | 50 | * [Deploy from Quay](#deploy-from-quay) 51 | * [Deploy on Red Hat OpenShift](#deploy-on-red-hat-openshift) 52 | * [Deploy on Kubernetes](#deploy-on-kubernetes) 53 | * [Run Locally](#run-locally) 54 | 55 | ## Deploy from Quay 56 | 57 | To run the docker image, which automatically starts the model serving API, run: 58 | 59 | ``` 60 | $ docker run -it -p 5000:5000 quay.io/codait/max-human-pose-estimator 61 | ``` 62 | 63 | This will pull a pre-built image from the Quay.io container registry (or use an existing image if already cached locally) and run it. 64 | If you'd rather checkout and build the model locally you can follow the [run locally](#run-locally) steps below. 65 | 66 | ## Deploy on Red Hat OpenShift 67 | 68 | You can deploy the model-serving microservice on Red Hat OpenShift by following the instructions for the OpenShift web console or the OpenShift Container Platform CLI [in this tutorial](https://developer.ibm.com/tutorials/deploy-a-model-asset-exchange-microservice-on-red-hat-openshift/), specifying `quay.io/codait/max-human-pose-estimator` as the image name. 69 | 70 | ## Deploy on Kubernetes 71 | 72 | You can also deploy the model on Kubernetes using the latest docker image on Quay. 73 | 74 | On your Kubernetes cluster, run the following commands: 75 | 76 | ``` 77 | $ kubectl apply -f https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/master/max-human-pose-estimator.yaml 78 | ``` 79 | 80 | The model will be available internally at port `5000`, but can also be accessed externally through the `NodePort`. 81 | 82 | ## Run Locally 83 | 84 | 1. [Build the Model](#1-build-the-model) 85 | 2. [Deploy the Model](#2-deploy-the-model) 86 | 3. [Use the Model](#3-use-the-model) 87 | 4. [Run the Notebook](#4-run-the-notebook) 88 | 5. [Development](#5-development) 89 | 6. [Cleanup](#6-cleanup) 90 | 91 | 92 | ### 1. Build the Model 93 | 94 | Clone this repository locally. In a terminal, run the following command: 95 | 96 | ``` 97 | $ git clone https://github.com/IBM/MAX-Human-Pose-Estimator 98 | ``` 99 | 100 | Change directory into the repository base folder: 101 | 102 | ``` 103 | $ cd MAX-Human-Pose-Estimator 104 | ``` 105 | 106 | To build the docker image locally, run: 107 | 108 | ``` 109 | $ docker build -t max-human-pose-estimator . 110 | ``` 111 | 112 | All required model assets will be downloaded during the build process. _Note_ that currently this docker image is CPU only (we will add support for GPU images later). 113 | 114 | 115 | ### 2. Deploy the Model 116 | 117 | To run the docker image, which automatically starts the model serving API, run: 118 | 119 | ``` 120 | $ docker run -it -p 5000:5000 max-human-pose-estimator 121 | ``` 122 | 123 | ### 3. Use the Model 124 | 125 | The API server automatically generates an interactive Swagger documentation page. Go to `http://localhost:5000` to load it. From there you can explore the API and also create test requests. 126 | Use the `model/predict` endpoint to load a test image (you can use one of the test images from the `samples` folder) and get predicted labels for the image from the API. 127 | 128 | ![Swagger UI Screenshot](docs/swagger-screenshot.png) 129 | 130 | You can also test it on the command line, for example: 131 | 132 | ``` 133 | $ curl -F "file=@samples/Pilots.jpg" -XPOST http://localhost:5000/model/predict 134 | ``` 135 | 136 | You should see a JSON response like that below: 137 | 138 | ``` 139 | { 140 | "status": "ok", 141 | "predictions": [ 142 | { 143 | "human_id": 0, 144 | "pose_lines": [ 145 | { 146 | "line": [ 147 | 444, 148 | 269, 149 | 392, 150 | 269 151 | ] 152 | }, 153 | 154 | ... 155 | 156 | { 157 | "line": [ 158 | 439, 159 | 195, 160 | 464, 161 | 197 162 | ] 163 | } 164 | ], 165 | "body_parts": [ 166 | { 167 | "part_id": 0, 168 | "part_name": "Nose", 169 | "score": "0.83899", 170 | "x": 428, 171 | "y": 205 172 | }, 173 | 174 | ... 175 | 176 | { 177 | "part_id": 17, 178 | "part_name": "LEar", 179 | "score": "0.81776", 180 | "x": 464, 181 | "y": 197 182 | } 183 | ] 184 | }, 185 | 186 | ... 187 | 188 | ] 189 | } 190 | ``` 191 | 192 | ### 4. Run the Notebook 193 | 194 | Once the model server is running, you can see how to use it by walking through [the demo notebook](samples/demo.ipynb). _Note_ the demo requires `jupyter`, `numpy`, `matplotlib`, `opencv-python`, `json`, and `requests`. 195 | 196 | Run the following command from the model repo base folder, in a new terminal window (leaving the model server running in the other terminal window): 197 | 198 | ``` 199 | $ jupyter notebook 200 | ``` 201 | 202 | This will start the notebook server. You can open the simple demo notebook by clicking on `samples/demo.ipynb`. 203 | 204 | ### 5. Development 205 | 206 | To run the Flask API app in debug mode, edit `config.py` to set `DEBUG = True` under the application settings. You will then need to rebuild the docker image (see [step 1](#1-build-the-model)). 207 | 208 | ### 6. Cleanup 209 | 210 | To stop the Docker container, type `CTRL` + `C` in your terminal. 211 | 212 | ## Resources and Contributions 213 | 214 | If you are interested in contributing to the Model Asset Exchange project or have any queries, please follow the instructions [here](https://github.com/CODAIT/max-central-repo). 215 | -------------------------------------------------------------------------------- /api/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | from .metadata import ModelMetadataAPI # noqa 18 | from .predict import ModelPredictAPI # noqa 19 | -------------------------------------------------------------------------------- /api/metadata.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | from core.model import ModelWrapper 18 | from maxfw.core import MAX_API, MetadataAPI, METADATA_SCHEMA 19 | 20 | 21 | class ModelMetadataAPI(MetadataAPI): 22 | 23 | @MAX_API.marshal_with(METADATA_SCHEMA) 24 | def get(self): 25 | """Return the metadata associated with the model""" 26 | return ModelWrapper.MODEL_META_DATA 27 | -------------------------------------------------------------------------------- /api/predict.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | from core.model import ModelWrapper 18 | from flask_restplus import fields 19 | from werkzeug.datastructures import FileStorage 20 | from maxfw.core import MAX_API, PredictAPI 21 | 22 | input_parser = MAX_API.parser() 23 | input_parser.add_argument('file', type=FileStorage, location='files', 24 | required=True, 25 | help='An image encoded as JPEG, PNG, or TIFF') 26 | 27 | body_parts_prediction = MAX_API.model('body_parts_prediction', { 28 | 'part_id': fields.Integer(required=True, 29 | description='ID for the body part'), 30 | 'part_name': fields.String(required=True, 31 | description='Name of the body part'), 32 | 'score': fields.Fixed(required=True, 33 | description='The prediction score for the body part'), 34 | 'x': fields.Integer(required=True, 35 | description='X coordinate of the center point of the ' 36 | 'body part'), 37 | 'y': fields.Integer(required=True, 38 | description='Y coordinate of the center point of the ' 39 | 'body part') 40 | }) 41 | 42 | line_prediction = MAX_API.model('LinePrediction', { 43 | 'line': fields.List(fields.Integer(required=True, 44 | description='Coordinates for line ' 45 | 'connecting two body parts, ' 46 | 'in the format [x1, y1, x2, ' 47 | 'y2]; (x1, y1) represents ' 48 | 'the starting point of the ' 49 | 'line, while (x2, y2) ' 50 | 'represents the ending point'))}) 51 | 52 | label_prediction = MAX_API.model('LabelPrediction', { 53 | 'human_id': fields.Integer(required=True, 54 | description='ID for the detected person'), 55 | 'pose_lines': fields.List(fields.Nested(line_prediction), 56 | description='Detected pose lines for a person'), 57 | 'body_parts': fields.List(fields.Nested(body_parts_prediction), 58 | description='Detected body parts for a person')}) 59 | 60 | predict_response = MAX_API.model('ModelPredictResponse', { 61 | 'status': fields.String(required=True, 62 | description='Response status message'), 63 | 'predictions': fields.List(fields.Nested(label_prediction), 64 | description='Predicted labels and probabilities')}) 65 | 66 | 67 | class ModelPredictAPI(PredictAPI): 68 | 69 | model_wrapper = ModelWrapper() 70 | 71 | @MAX_API.doc('predict') 72 | @MAX_API.expect(input_parser) 73 | @MAX_API.marshal_with(predict_response) 74 | def post(self): 75 | """Make a prediction given input data""" 76 | result = {'status': 'error'} 77 | args = input_parser.parse_args() 78 | input_data = args['file'].read() 79 | image = self.model_wrapper._read_image(input_data) 80 | 81 | label_preds = self.model_wrapper.predict(image) 82 | result['predictions'] = label_preds 83 | result['status'] = 'ok' 84 | 85 | return result 86 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | from maxfw.core import MAXApp 18 | from api import ModelMetadataAPI, ModelPredictAPI 19 | from config import API_TITLE, API_DESC, API_VERSION 20 | 21 | max_app = MAXApp(API_TITLE, API_DESC, API_VERSION) 22 | max_app.add_api(ModelMetadataAPI, '/metadata') 23 | max_app.add_api(ModelPredictAPI, '/predict') 24 | max_app.run() 25 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Flask settings 18 | DEBUG = False 19 | 20 | # Flask-restplus settings 21 | RESTPLUS_MASK_SWAGGER = False 22 | 23 | # Application settings 24 | 25 | # API metadata 26 | API_TITLE = 'MAX Human Pose Estimator' 27 | API_DESC = 'Detect humans in an image and estimate the pose for each person.' 28 | API_VERSION = '1.1.0' 29 | 30 | # default model 31 | MODEL_NAME = 'MAX Human Pose Estimator' 32 | DEFAULT_MODEL_PATH = 'assets/human-pose-estimator-tensorflow.pb' 33 | MODEL_LICENSE = 'Apache License 2.0' 34 | 35 | DEFAULT_IMAGE_SIZE_STR = '432x368' 36 | DEFAULT_IMAGE_SIZE = (432, 368) # Recommends: 432x368 or 656x368 or 1312x736 37 | DEFAULT_BATCH_SIZE = 2 38 | DEFAULT_PREPROCESS_THREADS = 2 39 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | -------------------------------------------------------------------------------- /core/model.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | from maxfw.model import MAXModelWrapper 18 | 19 | import io 20 | import logging 21 | import time 22 | from PIL import Image 23 | import numpy as np 24 | from flask_restplus import abort 25 | 26 | from core.tf_pose.estimator import TfPoseEstimator 27 | from config import DEFAULT_MODEL_PATH, DEFAULT_IMAGE_SIZE, MODEL_NAME 28 | 29 | logger = logging.getLogger('MAX-Human-Pose-Estimator') 30 | logger.setLevel(logging.INFO) 31 | ch = logging.StreamHandler() 32 | formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') 33 | ch.setFormatter(formatter) 34 | logger.addHandler(ch) 35 | 36 | 37 | class ModelWrapper(MAXModelWrapper): 38 | 39 | MODEL_META_DATA = { 40 | 'id': '{}'.format(MODEL_NAME.lower()), 41 | 'name': '{} TensorFlow Model'.format(MODEL_NAME), 42 | 'description': 'TensorFlow model trained on COCO data to detect human poses', 43 | 'type': 'Human pose estimation', 44 | 'license': 'Apache License 2.0', 45 | 'source': 'https://developer.ibm.com/exchanges/models/all/max-human-pose-estimator/' 46 | } 47 | 48 | def __init__(self, path=DEFAULT_MODEL_PATH): 49 | logger.info('Loading model from: {}...'.format(path)) 50 | self.model = TfPoseEstimator(path, target_size=DEFAULT_IMAGE_SIZE) 51 | logger.info('Loaded model') 52 | # Metadata 53 | self.w, self.h = DEFAULT_IMAGE_SIZE 54 | logger.info("W = {}, H = {} ".format(self.w, self.h)) 55 | 56 | def _read_image(self, image_data): 57 | try: 58 | image = Image.open(io.BytesIO(image_data)) 59 | if image.mode != 'RGB': 60 | image = image.convert('RGB') 61 | # Convert RGB to BGR for OpenCV. 62 | image = np.array(image)[:, :, ::-1] 63 | return image 64 | except IOError as e: 65 | logger.error(str(e)) 66 | abort(400, "Please submit a valid image in PNG, TIFF or JPEG format") 67 | 68 | def _predict(self, x): 69 | t = time.time() 70 | humans = self.model.inference(x, resize_to_default=True, 71 | upsample_size=4.0) 72 | results = TfPoseEstimator.draw_human_pose_connection(x, humans) 73 | logger.info('inference in %.4f seconds.' % (time.time() - t)) 74 | return results 75 | -------------------------------------------------------------------------------- /core/tf_pose/__init__.py: -------------------------------------------------------------------------------- 1 | # from __future__ import absolute_import 2 | # from __future__ import division 3 | # from __future__ import print_function 4 | 5 | # from tf_pose.runner import infer, Estimator, get_estimator 6 | from . import * # noqa 7 | -------------------------------------------------------------------------------- /core/tf_pose/common.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import tensorflow as tf 3 | import cv2 4 | 5 | 6 | regularizer_conv = 0.004 7 | regularizer_dsconv = 0.0004 8 | batchnorm_fused = True 9 | activation_fn = tf.nn.relu 10 | 11 | 12 | class CocoPart(Enum): 13 | Nose = 0 14 | Neck = 1 15 | RShoulder = 2 16 | RElbow = 3 17 | RWrist = 4 18 | LShoulder = 5 19 | LElbow = 6 20 | LWrist = 7 21 | RHip = 8 22 | RKnee = 9 23 | RAnkle = 10 24 | LHip = 11 25 | LKnee = 12 26 | LAnkle = 13 27 | REye = 14 28 | LEye = 15 29 | REar = 16 30 | LEar = 17 31 | Background = 18 32 | 33 | 34 | class MPIIPart(Enum): 35 | RAnkle = 0 36 | RKnee = 1 37 | RHip = 2 38 | LHip = 3 39 | LKnee = 4 40 | LAnkle = 5 41 | RWrist = 6 42 | RElbow = 7 43 | RShoulder = 8 44 | LShoulder = 9 45 | LElbow = 10 46 | LWrist = 11 47 | Neck = 12 48 | Head = 13 49 | 50 | @staticmethod 51 | def from_coco(human): 52 | # t = { 53 | # MPIIPart.RAnkle: CocoPart.RAnkle, 54 | # MPIIPart.RKnee: CocoPart.RKnee, 55 | # MPIIPart.RHip: CocoPart.RHip, 56 | # MPIIPart.LHip: CocoPart.LHip, 57 | # MPIIPart.LKnee: CocoPart.LKnee, 58 | # MPIIPart.LAnkle: CocoPart.LAnkle, 59 | # MPIIPart.RWrist: CocoPart.RWrist, 60 | # MPIIPart.RElbow: CocoPart.RElbow, 61 | # MPIIPart.RShoulder: CocoPart.RShoulder, 62 | # MPIIPart.LShoulder: CocoPart.LShoulder, 63 | # MPIIPart.LElbow: CocoPart.LElbow, 64 | # MPIIPart.LWrist: CocoPart.LWrist, 65 | # MPIIPart.Neck: CocoPart.Neck, 66 | # MPIIPart.Nose: CocoPart.Nose, 67 | # } 68 | 69 | t = [ 70 | (MPIIPart.Head, CocoPart.Nose), 71 | (MPIIPart.Neck, CocoPart.Neck), 72 | (MPIIPart.RShoulder, CocoPart.RShoulder), 73 | (MPIIPart.RElbow, CocoPart.RElbow), 74 | (MPIIPart.RWrist, CocoPart.RWrist), 75 | (MPIIPart.LShoulder, CocoPart.LShoulder), 76 | (MPIIPart.LElbow, CocoPart.LElbow), 77 | (MPIIPart.LWrist, CocoPart.LWrist), 78 | (MPIIPart.RHip, CocoPart.RHip), 79 | (MPIIPart.RKnee, CocoPart.RKnee), 80 | (MPIIPart.RAnkle, CocoPart.RAnkle), 81 | (MPIIPart.LHip, CocoPart.LHip), 82 | (MPIIPart.LKnee, CocoPart.LKnee), 83 | (MPIIPart.LAnkle, CocoPart.LAnkle), 84 | ] 85 | 86 | pose_2d_mpii = [] 87 | visibilty = [] 88 | for mpi, coco in t: 89 | if coco.value not in human.body_parts.keys(): 90 | pose_2d_mpii.append((0, 0)) 91 | visibilty.append(False) 92 | continue 93 | pose_2d_mpii.append((human.body_parts[coco.value].x, human.body_parts[coco.value].y)) 94 | visibilty.append(True) 95 | return pose_2d_mpii, visibilty 96 | 97 | 98 | CocoPairs = [ 99 | (1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), 100 | (11, 12), (12, 13), (1, 0), (0, 14), (14, 16), (0, 15), (15, 17), (2, 16), (5, 17) 101 | ] # = 19 102 | CocoPairsRender = CocoPairs[:-2] 103 | # CocoPairsNetwork = [ 104 | # (12, 13), (20, 21), (14, 15), (16, 17), (22, 23), (24, 25), (0, 1), (2, 3), (4, 5), 105 | # (6, 7), (8, 9), (10, 11), (28, 29), (30, 31), (34, 35), (32, 33), (36, 37), (18, 19), (26, 27) 106 | # ] # = 19 107 | 108 | CocoColors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], 109 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], 110 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 111 | 112 | 113 | def read_imgfile(path, width=None, height=None): 114 | val_image = cv2.imread(path, cv2.IMREAD_COLOR) 115 | if width is not None and height is not None: 116 | val_image = cv2.resize(val_image, (width, height)) 117 | return val_image 118 | 119 | 120 | def get_sample_images(w, h): 121 | val_image = [ 122 | read_imgfile('./images/Pilots.jpg', w, h), 123 | ] 124 | return val_image 125 | -------------------------------------------------------------------------------- /core/tf_pose/estimator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | 4 | import slidingwindow as sw 5 | 6 | import cv2 7 | import numpy as np 8 | import tensorflow as tf 9 | import time 10 | 11 | from core.tf_pose import common 12 | from core.tf_pose.common import CocoPart 13 | from core.tf_pose.tensblur.smoother import Smoother 14 | 15 | logger = logging.getLogger('TfPoseEstimator') 16 | logger.setLevel(logging.DEBUG) 17 | ch = logging.StreamHandler() 18 | formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') 19 | ch.setFormatter(formatter) 20 | logger.addHandler(ch) 21 | 22 | try: 23 | from core.tf_pose.pafprocess import pafprocess 24 | except ModuleNotFoundError as e: 25 | print(e) 26 | print('you need to build c++ library for pafprocess.') 27 | print('See: https://github.com/ildoonet/tf-pose-estimation/tree/master/tf_pose/pafprocess') 28 | exit(-1) 29 | 30 | 31 | def _round(v): 32 | return int(round(v)) 33 | 34 | 35 | def _include_part(part_list, part_idx): 36 | for part in part_list: 37 | if part_idx == part.part_idx: 38 | return True, part 39 | return False, None 40 | 41 | 42 | class Human: 43 | """ 44 | body_parts: list of BodyPart 45 | """ 46 | __slots__ = ('body_parts', 'pairs', 'uidx_list', 'score') 47 | 48 | def __init__(self, pairs): 49 | self.pairs = [] 50 | self.uidx_list = set() 51 | self.body_parts = {} 52 | for pair in pairs: 53 | self.add_pair(pair) 54 | self.score = 0.0 55 | 56 | @staticmethod 57 | def _get_uidx(part_idx, idx): 58 | return '%d-%d' % (part_idx, idx) 59 | 60 | def add_pair(self, pair): 61 | self.pairs.append(pair) 62 | self.body_parts[pair.part_idx1] = BodyPart(Human._get_uidx(pair.part_idx1, pair.idx1), 63 | pair.part_idx1, 64 | pair.coord1[0], pair.coord1[1], pair.score) 65 | self.body_parts[pair.part_idx2] = BodyPart(Human._get_uidx(pair.part_idx2, pair.idx2), 66 | pair.part_idx2, 67 | pair.coord2[0], pair.coord2[1], pair.score) 68 | self.uidx_list.add(Human._get_uidx(pair.part_idx1, pair.idx1)) 69 | self.uidx_list.add(Human._get_uidx(pair.part_idx2, pair.idx2)) 70 | 71 | def is_connected(self, other): 72 | return len(self.uidx_list & other.uidx_list) > 0 73 | 74 | def merge(self, other): 75 | for pair in other.pairs: 76 | self.add_pair(pair) 77 | 78 | def part_count(self): 79 | return len(self.body_parts.keys()) 80 | 81 | def get_max_score(self): 82 | return max([x.score for _, x in self.body_parts.items()]) 83 | 84 | def get_face_box(self, img_w, img_h, mode=0): 85 | """ 86 | Get Face box compared to img size (w, h) 87 | :param img_w: 88 | :param img_h: 89 | :param mode: 90 | :return: 91 | """ 92 | # SEE : https://github.com/ildoonet/tf-pose-estimation/blob/master/tf_pose/common.py#L13 93 | _NOSE = CocoPart.Nose.value 94 | _NECK = CocoPart.Neck.value 95 | _REye = CocoPart.REye.value 96 | _LEye = CocoPart.LEye.value 97 | _REar = CocoPart.REar.value 98 | _LEar = CocoPart.LEar.value 99 | 100 | _THRESHOLD_PART_CONFIDENCE = 0.2 101 | parts = [part for idx, part in self.body_parts.items() if part.score > _THRESHOLD_PART_CONFIDENCE] 102 | 103 | is_nose, part_nose = _include_part(parts, _NOSE) 104 | if not is_nose: 105 | return None 106 | 107 | size = 0 108 | is_neck, part_neck = _include_part(parts, _NECK) 109 | if is_neck: 110 | size = max(size, img_h * (part_neck.y - part_nose.y) * 0.8) 111 | 112 | is_reye, part_reye = _include_part(parts, _REye) 113 | is_leye, part_leye = _include_part(parts, _LEye) 114 | if is_reye and is_leye: 115 | size = max(size, img_w * (part_reye.x - part_leye.x) * 2.0) 116 | size = max(size, 117 | img_w * math.sqrt((part_reye.x - part_leye.x) ** 2 + (part_reye.y - part_leye.y) ** 2) * 2.0) 118 | 119 | if mode == 1: 120 | if not is_reye and not is_leye: 121 | return None 122 | 123 | is_rear, part_rear = _include_part(parts, _REar) 124 | is_lear, part_lear = _include_part(parts, _LEar) 125 | if is_rear and is_lear: 126 | size = max(size, img_w * (part_rear.x - part_lear.x) * 1.6) 127 | 128 | if size <= 0: 129 | return None 130 | 131 | if not is_reye and is_leye: 132 | x = part_nose.x * img_w - (size // 3 * 2) 133 | elif is_reye and not is_leye: 134 | x = part_nose.x * img_w - (size // 3) 135 | else: # is_reye and is_leye: 136 | x = part_nose.x * img_w - size // 2 137 | 138 | x2 = x + size 139 | if mode == 0: 140 | y = part_nose.y * img_h - size // 3 141 | else: 142 | y = part_nose.y * img_h - _round(size / 2 * 1.2) 143 | y2 = y + size 144 | 145 | # fit into the image frame 146 | x = max(0, x) 147 | y = max(0, y) 148 | x2 = min(img_w - x, x2 - x) + x 149 | y2 = min(img_h - y, y2 - y) + y 150 | 151 | if _round(x2 - x) == 0.0 or _round(y2 - y) == 0.0: 152 | return None 153 | if mode == 0: 154 | return {"x": _round((x + x2) / 2), 155 | "y": _round((y + y2) / 2), 156 | "w": _round(x2 - x), 157 | "h": _round(y2 - y)} 158 | else: 159 | return {"x": _round(x), 160 | "y": _round(y), 161 | "w": _round(x2 - x), 162 | "h": _round(y2 - y)} 163 | 164 | def get_upper_body_box(self, img_w, img_h): 165 | """ 166 | Get Upper body box compared to img size (w, h) 167 | :param img_w: 168 | :param img_h: 169 | :return: 170 | """ 171 | 172 | if not (img_w > 0 and img_h > 0): 173 | raise Exception("img size should be positive") 174 | 175 | _NOSE = CocoPart.Nose.value 176 | _NECK = CocoPart.Neck.value 177 | _RSHOULDER = CocoPart.RShoulder.value 178 | _LSHOULDER = CocoPart.LShoulder.value 179 | _THRESHOLD_PART_CONFIDENCE = 0.3 180 | parts = [part for idx, part in self.body_parts.items() if part.score > _THRESHOLD_PART_CONFIDENCE] 181 | part_coords = [(img_w * part.x, img_h * part.y) for part in parts if 182 | part.part_idx in [0, 1, 2, 5, 8, 11, 14, 15, 16, 17]] 183 | 184 | if len(part_coords) < 5: 185 | return None 186 | 187 | # Initial Bounding Box 188 | x = min([part[0] for part in part_coords]) 189 | y = min([part[1] for part in part_coords]) 190 | x2 = max([part[0] for part in part_coords]) 191 | y2 = max([part[1] for part in part_coords]) 192 | 193 | # # ------ Adjust heuristically + 194 | # if face points are detected, adjust y value 195 | 196 | is_nose, part_nose = _include_part(parts, _NOSE) 197 | is_neck, part_neck = _include_part(parts, _NECK) 198 | 199 | if is_nose and is_neck: 200 | y -= (part_neck.y * img_h - y) * 0.8 201 | 202 | # # by using shoulder position, adjust width 203 | is_rshoulder, part_rshoulder = _include_part(parts, _RSHOULDER) 204 | is_lshoulder, part_lshoulder = _include_part(parts, _LSHOULDER) 205 | if is_rshoulder and is_lshoulder: 206 | half_w = x2 - x 207 | dx = half_w * 0.15 208 | x -= dx 209 | x2 += dx 210 | elif is_neck: 211 | if is_lshoulder and not is_rshoulder: 212 | half_w = abs(part_lshoulder.x - part_neck.x) * img_w * 1.15 213 | x = min(part_neck.x * img_w - half_w, x) 214 | x2 = max(part_neck.x * img_w + half_w, x2) 215 | elif not is_lshoulder and is_rshoulder: 216 | half_w = abs(part_rshoulder.x - part_neck.x) * img_w * 1.15 217 | x = min(part_neck.x * img_w - half_w, x) 218 | x2 = max(part_neck.x * img_w + half_w, x2) 219 | 220 | # ------ Adjust heuristically - 221 | 222 | # fit into the image frame 223 | x = max(0, x) 224 | y = max(0, y) 225 | x2 = min(img_w - x, x2 - x) + x 226 | y2 = min(img_h - y, y2 - y) + y 227 | 228 | if _round(x2 - x) == 0.0 or _round(y2 - y) == 0.0: 229 | return None 230 | return {"x": _round((x + x2) / 2), 231 | "y": _round((y + y2) / 2), 232 | "w": _round(x2 - x), 233 | "h": _round(y2 - y)} 234 | 235 | def __str__(self): 236 | return ' '.join([str(x) for x in self.body_parts.values()]) 237 | 238 | def __repr__(self): 239 | return self.__str__() 240 | 241 | 242 | class BodyPart: 243 | """ 244 | part_idx : part index(eg. 0 for nose) 245 | x, y: coordinate of body part 246 | score : confidence score 247 | """ 248 | __slots__ = ('uidx', 'part_idx', 'x', 'y', 'score') 249 | 250 | def __init__(self, uidx, part_idx, x, y, score): 251 | self.uidx = uidx 252 | self.part_idx = part_idx 253 | self.x, self.y = x, y 254 | self.score = score 255 | 256 | def get_part_name(self): 257 | return CocoPart(self.part_idx) 258 | 259 | def __str__(self): 260 | return 'BodyPart:%d-(%.2f, %.2f) score=%.2f' % (self.part_idx, self.x, self.y, self.score) 261 | 262 | def __repr__(self): 263 | return self.__str__() 264 | 265 | 266 | class PoseEstimator: 267 | def __init__(self): 268 | pass 269 | 270 | @staticmethod 271 | def estimate_paf(peaks, heat_mat, paf_mat): 272 | pafprocess.process_paf(peaks, heat_mat, paf_mat) 273 | 274 | humans = [] 275 | for human_id in range(pafprocess.get_num_humans()): 276 | human = Human([]) 277 | is_added = False 278 | 279 | for part_idx in range(18): 280 | c_idx = int(pafprocess.get_part_cid(human_id, part_idx)) 281 | if c_idx < 0: 282 | continue 283 | 284 | is_added = True 285 | human.body_parts[part_idx] = BodyPart( 286 | '%d-%d' % (human_id, part_idx), part_idx, 287 | float(pafprocess.get_part_x(c_idx)) / heat_mat.shape[1], 288 | float(pafprocess.get_part_y(c_idx)) / heat_mat.shape[0], 289 | pafprocess.get_part_score(c_idx) 290 | ) 291 | 292 | if is_added: 293 | score = pafprocess.get_score(human_id) 294 | human.score = score 295 | humans.append(human) 296 | 297 | return humans 298 | 299 | 300 | class TfPoseEstimator: 301 | # TODO : multi-scale 302 | 303 | def __init__(self, graph_path, target_size=(320, 240), tf_config=None): 304 | self.target_size = target_size 305 | 306 | # load graph 307 | logger.info('loading graph from %s(default size=%dx%d)' % (graph_path, target_size[0], target_size[1])) 308 | with tf.gfile.GFile(graph_path, 'rb') as f: 309 | graph_def = tf.GraphDef() 310 | graph_def.ParseFromString(f.read()) 311 | 312 | self.graph = tf.get_default_graph() 313 | tf.import_graph_def(graph_def, name='TfPoseEstimator') 314 | self.persistent_sess = tf.Session(graph=self.graph, config=tf_config) 315 | self.tensor_image = self.graph.get_tensor_by_name('TfPoseEstimator/image:0') 316 | self.tensor_output = self.graph.get_tensor_by_name('TfPoseEstimator/Openpose/concat_stage7:0') 317 | self.tensor_heatMat = self.tensor_output[:, :, :, :19] 318 | self.tensor_pafMat = self.tensor_output[:, :, :, 19:] 319 | self.upsample_size = tf.placeholder(dtype=tf.int32, shape=(2,), name='upsample_size') 320 | self.tensor_heatMat_up = tf.image.resize_area(self.tensor_output[:, :, :, :19], self.upsample_size, 321 | align_corners=False, name='upsample_heatmat') 322 | self.tensor_pafMat_up = tf.image.resize_area(self.tensor_output[:, :, :, 19:], self.upsample_size, 323 | align_corners=False, name='upsample_pafmat') 324 | smoother = Smoother({'data': self.tensor_heatMat_up}, 25, 3.0) 325 | gaussian_heatMat = smoother.get_output() 326 | 327 | max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') 328 | self.tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, 329 | tf.zeros_like(gaussian_heatMat)) 330 | 331 | self.heatMat = self.pafMat = None 332 | 333 | # warm-up 334 | self.persistent_sess.run(tf.variables_initializer( 335 | [v for v in tf.global_variables() if 336 | v.name.split(':')[0] in [x.decode('utf-8') for x in 337 | self.persistent_sess.run(tf.report_uninitialized_variables())] 338 | ]) 339 | ) 340 | self.persistent_sess.run( 341 | [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], 342 | feed_dict={ 343 | self.tensor_image: [np.ndarray(shape=(target_size[1], target_size[0], 3), dtype=np.float32)], 344 | self.upsample_size: [target_size[1], target_size[0]] 345 | } 346 | ) 347 | self.persistent_sess.run( 348 | [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], 349 | feed_dict={ 350 | self.tensor_image: [np.ndarray(shape=(target_size[1], target_size[0], 3), dtype=np.float32)], 351 | self.upsample_size: [target_size[1] // 2, target_size[0] // 2] 352 | } 353 | ) 354 | self.persistent_sess.run( 355 | [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], 356 | feed_dict={ 357 | self.tensor_image: [np.ndarray(shape=(target_size[1], target_size[0], 3), dtype=np.float32)], 358 | self.upsample_size: [target_size[1] // 4, target_size[0] // 4] 359 | } 360 | ) 361 | 362 | def __del__(self): 363 | # self.persistent_sess.close() 364 | pass 365 | 366 | @staticmethod 367 | def _quantize_img(npimg): 368 | npimg_q = npimg + 1.0 369 | npimg_q /= (2.0 / 2 ** 8) 370 | # npimg_q += 0.5 371 | npimg_q = npimg_q.astype(np.uint8) 372 | return npimg_q 373 | 374 | @staticmethod 375 | def draw_humans(npimg, humans, imgcopy=False): 376 | if imgcopy: 377 | npimg = np.copy(npimg) 378 | image_h, image_w = npimg.shape[:2] 379 | centers = {} 380 | for human in humans: 381 | # draw point 382 | for i in range(common.CocoPart.Background.value): 383 | if i not in human.body_parts.keys(): 384 | continue 385 | 386 | body_part = human.body_parts[i] 387 | center = (int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)) 388 | centers[i] = center 389 | cv2.circle(npimg, center, 3, common.CocoColors[i], thickness=3, lineType=8, shift=0) 390 | 391 | # draw line 392 | for pair_order, pair in enumerate(common.CocoPairsRender): 393 | if pair[0] not in human.body_parts.keys() or pair[1] not in human.body_parts.keys(): 394 | continue 395 | 396 | # npimg = cv2.line(npimg, centers[pair[0]], centers[pair[1]], common.CocoColors[pair_order], 3) 397 | cv2.line(npimg, centers[pair[0]], centers[pair[1]], common.CocoColors[pair_order], 3) 398 | 399 | return npimg 400 | 401 | @staticmethod 402 | def draw_human_pose_connection(npimg, humans): 403 | image_h, image_w = npimg.shape[:2] 404 | centers = {} 405 | results = [] 406 | for human in humans: 407 | pose_lines = [] 408 | body_parts = [] 409 | # draw point 410 | for i in range(common.CocoPart.Background.value): 411 | if i not in human.body_parts.keys(): 412 | continue 413 | 414 | body_part = human.body_parts[i] 415 | center = [int(body_part.x * image_w + 0.5), int(body_part.y * image_h + 0.5)] 416 | centers[i] = center 417 | body_parts.append({"part_id": body_part.part_idx, 418 | "part_name": body_part.get_part_name().name, 419 | "score": body_part.score, 420 | "x": center[0], 421 | "y": center[1]}) 422 | 423 | # draw line 424 | for pair_order, pair in enumerate(common.CocoPairsRender): 425 | if pair[0] not in human.body_parts.keys() or pair[1] not in human.body_parts.keys(): 426 | continue 427 | 428 | line = centers[pair[0]] + centers[pair[1]] 429 | pose_lines.append({"line": line}) 430 | 431 | results.append({"human_id": len(results), "pose_lines": pose_lines, "body_parts": body_parts}) 432 | return results 433 | 434 | def _get_scaled_img(self, npimg, scale): 435 | 436 | def get_base_scale(s, w, h): 437 | return max(self.target_size[0] / float(h), self.target_size[1] / float(w)) * s 438 | 439 | img_h, img_w = npimg.shape[:2] 440 | 441 | if scale is None: 442 | if npimg.shape[:2] != (self.target_size[1], self.target_size[0]): 443 | # resize 444 | npimg = cv2.resize(npimg, self.target_size, interpolation=cv2.INTER_CUBIC) 445 | return [npimg], [(0.0, 0.0, 1.0, 1.0)] 446 | elif isinstance(scale, float): 447 | # scaling with center crop 448 | base_scale = get_base_scale(scale, img_w, img_h) 449 | npimg = cv2.resize(npimg, dsize=None, fx=base_scale, fy=base_scale, interpolation=cv2.INTER_CUBIC) 450 | 451 | o_size_h, o_size_w = npimg.shape[:2] 452 | if npimg.shape[0] < self.target_size[1] or npimg.shape[1] < self.target_size[0]: 453 | newimg = np.zeros( 454 | (max(self.target_size[1], npimg.shape[0]), max(self.target_size[0], npimg.shape[1]), 3), 455 | dtype=np.uint8) 456 | newimg[:npimg.shape[0], :npimg.shape[1], :] = npimg 457 | npimg = newimg 458 | 459 | windows = sw.generate(npimg, sw.DimOrder.HeightWidthChannel, self.target_size[0], self.target_size[1], 0.2) 460 | 461 | rois = [] 462 | ratios = [] 463 | for window in windows: 464 | indices = window.indices() 465 | roi = npimg[indices] 466 | rois.append(roi) 467 | ratio_x, ratio_y = float(indices[1].start) / o_size_w, float(indices[0].start) / o_size_h 468 | ratio_w, ratio_h = float(indices[1].stop - indices[1].start) / o_size_w, float( 469 | indices[0].stop - indices[0].start) / o_size_h 470 | ratios.append((ratio_x, ratio_y, ratio_w, ratio_h)) 471 | 472 | return rois, ratios 473 | elif isinstance(scale, tuple) and len(scale) == 2: 474 | # scaling with sliding window : (scale, step) 475 | base_scale = get_base_scale(scale[0], img_w, img_h) 476 | npimg = cv2.resize(npimg, dsize=None, fx=base_scale, fy=base_scale, interpolation=cv2.INTER_CUBIC) 477 | o_size_h, o_size_w = npimg.shape[:2] 478 | if npimg.shape[0] < self.target_size[1] or npimg.shape[1] < self.target_size[0]: 479 | newimg = np.zeros( 480 | (max(self.target_size[1], npimg.shape[0]), max(self.target_size[0], npimg.shape[1]), 3), 481 | dtype=np.uint8) 482 | newimg[:npimg.shape[0], :npimg.shape[1], :] = npimg 483 | npimg = newimg 484 | 485 | window_step = scale[1] 486 | 487 | windows = sw.generate(npimg, sw.DimOrder.HeightWidthChannel, self.target_size[0], self.target_size[1], 488 | window_step) 489 | 490 | rois = [] 491 | ratios = [] 492 | for window in windows: 493 | indices = window.indices() 494 | roi = npimg[indices] 495 | rois.append(roi) 496 | ratio_x, ratio_y = float(indices[1].start) / o_size_w, float(indices[0].start) / o_size_h 497 | ratio_w, ratio_h = float(indices[1].stop - indices[1].start) / o_size_w, float( 498 | indices[0].stop - indices[0].start) / o_size_h 499 | ratios.append((ratio_x, ratio_y, ratio_w, ratio_h)) 500 | 501 | return rois, ratios 502 | elif isinstance(scale, tuple) and len(scale) == 3: 503 | # scaling with ROI : (want_x, want_y, scale_ratio) 504 | base_scale = get_base_scale(scale[2], img_w, img_h) 505 | npimg = cv2.resize(npimg, dsize=None, fx=base_scale, fy=base_scale, interpolation=cv2.INTER_CUBIC) 506 | ratio_w = self.target_size[0] / float(npimg.shape[1]) 507 | ratio_h = self.target_size[1] / float(npimg.shape[0]) 508 | 509 | want_x, want_y = scale[:2] 510 | ratio_x = want_x - ratio_w / 2. 511 | ratio_y = want_y - ratio_h / 2. 512 | ratio_x = max(ratio_x, 0.0) 513 | ratio_y = max(ratio_y, 0.0) 514 | if ratio_x + ratio_w > 1.0: 515 | ratio_x = 1. - ratio_w 516 | if ratio_y + ratio_h > 1.0: 517 | ratio_y = 1. - ratio_h 518 | 519 | roi = self._crop_roi(npimg, ratio_x, ratio_y) 520 | return [roi], [(ratio_x, ratio_y, ratio_w, ratio_h)] 521 | 522 | def _crop_roi(self, npimg, ratio_x, ratio_y): 523 | target_w, target_h = self.target_size 524 | h, w = npimg.shape[:2] 525 | x = max(int(w * ratio_x - .5), 0) 526 | y = max(int(h * ratio_y - .5), 0) 527 | cropped = npimg[y:y + target_h, x:x + target_w] 528 | 529 | cropped_h, cropped_w = cropped.shape[:2] 530 | if cropped_w < target_w or cropped_h < target_h: 531 | npblank = np.zeros((self.target_size[1], self.target_size[0], 3), dtype=np.uint8) 532 | 533 | copy_x, copy_y = (target_w - cropped_w) // 2, (target_h - cropped_h) // 2 534 | npblank[copy_y:copy_y + cropped_h, copy_x:copy_x + cropped_w] = cropped 535 | else: 536 | return cropped 537 | 538 | def inference(self, npimg, resize_to_default=True, upsample_size=1.0): 539 | if npimg is None: 540 | raise Exception('The image is not valid. Please check your image exists.') 541 | 542 | if resize_to_default: 543 | upsample_size = [int(self.target_size[1] / 8 * upsample_size), int(self.target_size[0] / 8 * upsample_size)] 544 | else: 545 | upsample_size = [int(npimg.shape[0] / 8 * upsample_size), int(npimg.shape[1] / 8 * upsample_size)] 546 | 547 | if self.tensor_image.dtype == tf.quint8: 548 | # quantize input image 549 | npimg = TfPoseEstimator._quantize_img(npimg) 550 | pass 551 | 552 | logger.debug('inference+ original shape=%dx%d' % (npimg.shape[1], npimg.shape[0])) 553 | img = npimg 554 | if resize_to_default: 555 | img = self._get_scaled_img(npimg, None)[0][0] 556 | peaks, heatMat_up, pafMat_up = self.persistent_sess.run( 557 | [self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], feed_dict={ 558 | self.tensor_image: [img], self.upsample_size: upsample_size 559 | }) 560 | peaks = peaks[0] 561 | self.heatMat = heatMat_up[0] 562 | self.pafMat = pafMat_up[0] 563 | logger.debug('inference- heatMat=%dx%d pafMat=%dx%d' % ( 564 | self.heatMat.shape[1], self.heatMat.shape[0], self.pafMat.shape[1], self.pafMat.shape[0])) 565 | 566 | t = time.time() 567 | humans = PoseEstimator.estimate_paf(peaks, self.heatMat, self.pafMat) 568 | logger.debug('estimate time=%.5f' % (time.time() - t)) 569 | return humans 570 | 571 | 572 | if __name__ == '__main__': 573 | import pickle # nosec - B403:blacklist - we are using pickle on a known file 574 | 575 | f = open('./etcs/heatpaf1.pkl', 'rb') 576 | data = pickle.load(f) # nosec - B301:blacklist - this is a known file 577 | logger.info('size={}'.format(data['heatMat'].shape)) 578 | f.close() 579 | 580 | t = time.time() 581 | humans = PoseEstimator.estimate_paf(data['peaks'], data['heatMat'], data['pafMat']) 582 | dt = time.time() - t 583 | t = time.time() 584 | logger.info('elapsed #humans=%d time=%.8f' % (len(humans), dt)) 585 | -------------------------------------------------------------------------------- /core/tf_pose/networks.py: -------------------------------------------------------------------------------- 1 | def model_wh(resolution_str): 2 | width, height = map(int, resolution_str.split('x')) 3 | if width % 16 != 0 or height % 16 != 0: 4 | raise Exception('Width and height should be multiples of 16. w=%d, h=%d' % (width, height)) 5 | return int(width), int(height) 6 | -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/README.md: -------------------------------------------------------------------------------- 1 | # post-processing for Part-Affinity Fields Map implemented in C++ & Swig 2 | 3 | Need to install swig. 4 | 5 | ```bash 6 | $ sudo apt install swig 7 | ``` 8 | 9 | You need to build pafprocess module which is written in c++. It will be used for post processing. 10 | 11 | ```bash 12 | $ swig -python -c++ pafprocess.i && python3 setup.py build_ext --inplace 13 | ``` 14 | 15 | -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/core/tf_pose/pafprocess/__init__.py -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/pafprocess.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "pafprocess.h" 5 | 6 | #define PEAKS(i, j, k) peaks[k+p3*(j+p2*i)] 7 | #define HEAT(i, j, k) heatmap[k+h3*(j+h2*i)] 8 | #define PAF(i, j, k) pafmap[k+f3*(j+f2*i)] 9 | 10 | using namespace std; 11 | 12 | vector > subset; 13 | vector peak_infos_line; 14 | 15 | int roundpaf(float v); 16 | vector get_paf_vectors(float *pafmap, const int& ch_id1, const int& ch_id2, int& f2, int& f3, Peak& peak1, Peak& peak2); 17 | bool comp_candidate(ConnectionCandidate a, ConnectionCandidate b); 18 | 19 | int process_paf(int p1, int p2, int p3, float *peaks, int h1, int h2, int h3, float *heatmap, int f1, int f2, int f3, float *pafmap) { 20 | // const int THRE_CNT = 4; 21 | // const double THRESH_PAF = 0.40; 22 | vector peak_infos[NUM_PART]; 23 | int peak_cnt = 0; 24 | for (int part_id = 0; part_id < NUM_PART; part_id ++) { 25 | for (int y = 0; y < p1; y ++) { 26 | for (int x = 0; x < p2; x ++) { 27 | if (PEAKS(y, x, part_id) > THRESH_HEAT) { 28 | Peak info; 29 | info.id = peak_cnt++; 30 | info.x = x; 31 | info.y = y; 32 | info.score = HEAT(y, x, part_id); 33 | peak_infos[part_id].push_back(info); 34 | } 35 | } 36 | } 37 | } 38 | 39 | peak_infos_line.clear(); 40 | for (int part_id = 0; part_id < NUM_PART; part_id ++) { 41 | for (int i = 0; i < (int) peak_infos[part_id].size(); i ++) { 42 | peak_infos_line.push_back(peak_infos[part_id][i]); 43 | } 44 | } 45 | 46 | // Start to Connect 47 | vector connection_all[COCOPAIRS_SIZE]; 48 | for (int pair_id = 0; pair_id < COCOPAIRS_SIZE; pair_id ++) { 49 | vector candidates; 50 | vector& peak_a_list = peak_infos[COCOPAIRS[pair_id][0]]; 51 | vector& peak_b_list = peak_infos[COCOPAIRS[pair_id][1]]; 52 | 53 | if (peak_a_list.size() == 0 || peak_b_list.size() == 0) { 54 | continue; 55 | } 56 | 57 | for (int peak_a_id = 0; peak_a_id < (int) peak_a_list.size(); peak_a_id ++) { 58 | Peak& peak_a = peak_a_list[peak_a_id]; 59 | for (int peak_b_id = 0; peak_b_id < (int) peak_b_list.size(); peak_b_id ++) { 60 | Peak& peak_b = peak_b_list[peak_b_id]; 61 | 62 | // calculate vector(direction) 63 | VectorXY vec; 64 | vec.x = peak_b.x - peak_a.x; 65 | vec.y = peak_b.y - peak_a.y; 66 | float norm = (float) sqrt(vec.x * vec.x + vec.y * vec.y); 67 | if (norm < 1e-12) continue; 68 | vec.x = vec.x / norm; 69 | vec.y = vec.y / norm; 70 | 71 | vector paf_vecs = get_paf_vectors(pafmap, COCOPAIRS_NET[pair_id][0], COCOPAIRS_NET[pair_id][1], f2, f3, peak_a, peak_b); 72 | float scores = 0.0f; 73 | 74 | // criterion 1 : score treshold count 75 | int criterion1 = 0; 76 | for (int i = 0; i < STEP_PAF; i ++) { 77 | float score = vec.x * paf_vecs[i].x + vec.y * paf_vecs[i].y; 78 | scores += score; 79 | 80 | if (score > THRESH_VECTOR_SCORE) criterion1 += 1; 81 | } 82 | 83 | float criterion2 = scores / STEP_PAF + min(0.0, 0.5 * h1 / norm - 1.0); 84 | 85 | if (criterion1 > THRESH_VECTOR_CNT1 && criterion2 > 0) { 86 | ConnectionCandidate candidate; 87 | candidate.idx1 = peak_a_id; 88 | candidate.idx2 = peak_b_id; 89 | candidate.score = criterion2; 90 | candidate.etc = criterion2 + peak_a.score + peak_b.score; 91 | candidates.push_back(candidate); 92 | } 93 | } 94 | } 95 | 96 | vector& conns = connection_all[pair_id]; 97 | sort(candidates.begin(), candidates.end(), comp_candidate); 98 | for (int c_id = 0; c_id < (int) candidates.size(); c_id ++) { 99 | ConnectionCandidate& candidate = candidates[c_id]; 100 | bool assigned = false; 101 | for (int conn_id = 0; conn_id < (int) conns.size(); conn_id ++) { 102 | if (conns[conn_id].peak_id1 == candidate.idx1) { 103 | // already assigned 104 | assigned = true; 105 | break; 106 | } 107 | if (assigned) break; 108 | if (conns[conn_id].peak_id2 == candidate.idx2) { 109 | // already assigned 110 | assigned = true; 111 | break; 112 | } 113 | if (assigned) break; 114 | } 115 | if (assigned) continue; 116 | 117 | Connection conn; 118 | conn.peak_id1 = candidate.idx1; 119 | conn.peak_id2 = candidate.idx2; 120 | conn.score = candidate.score; 121 | conn.cid1 = peak_a_list[candidate.idx1].id; 122 | conn.cid2 = peak_b_list[candidate.idx2].id; 123 | conns.push_back(conn); 124 | } 125 | } 126 | 127 | // Generate subset 128 | subset.clear(); 129 | for (int pair_id = 0; pair_id < COCOPAIRS_SIZE; pair_id ++) { 130 | vector& conns = connection_all[pair_id]; 131 | int part_id1 = COCOPAIRS[pair_id][0]; 132 | int part_id2 = COCOPAIRS[pair_id][1]; 133 | 134 | for (int conn_id = 0; conn_id < (int) conns.size(); conn_id ++) { 135 | int found = 0; 136 | int subset_idx1=0, subset_idx2=0; 137 | for (int subset_id = 0; subset_id < (int) subset.size(); subset_id ++) { 138 | if (subset[subset_id][part_id1] == conns[conn_id].cid1 || subset[subset_id][part_id2] == conns[conn_id].cid2) { 139 | if (found == 0) subset_idx1 = subset_id; 140 | if (found == 1) subset_idx2 = subset_id; 141 | found += 1; 142 | } 143 | } 144 | 145 | if (found == 1) { 146 | if (subset[subset_idx1][part_id2] != conns[conn_id].cid2) { 147 | subset[subset_idx1][part_id2] = conns[conn_id].cid2; 148 | subset[subset_idx1][19] += 1; 149 | subset[subset_idx1][18] += peak_infos_line[conns[conn_id].cid2].score + conns[conn_id].score; 150 | } 151 | } else if (found == 2) { 152 | int membership; 153 | for (int subset_id = 0; subset_id < 18; subset_id ++) { 154 | if (subset[subset_idx1][subset_id] > 0 && subset[subset_idx2][subset_id] > 0) { 155 | membership = 2; 156 | } 157 | } 158 | 159 | if (membership == 0) { 160 | for (int subset_id = 0; subset_id < 18; subset_id ++) subset[subset_idx1][subset_id] += (subset[subset_idx2][subset_id] + 1); 161 | 162 | subset[subset_idx1][19] += subset[subset_idx2][19]; 163 | subset[subset_idx1][18] += subset[subset_idx2][18]; 164 | subset[subset_idx1][18] += conns[conn_id].score; 165 | subset.erase(subset.begin() + subset_idx2); 166 | } else { 167 | subset[subset_idx1][part_id2] = conns[conn_id].cid2; 168 | subset[subset_idx1][19] += 1; 169 | subset[subset_idx1][18] += peak_infos_line[conns[conn_id].cid2].score + conns[conn_id].score; 170 | } 171 | } else if (found == 0 && pair_id < 17) { 172 | vector row(20); 173 | for (int i = 0; i < 20; i ++) row[i] = -1; 174 | row[part_id1] = conns[conn_id].cid1; 175 | row[part_id2] = conns[conn_id].cid2; 176 | row[19] = 2; 177 | row[18] = peak_infos_line[conns[conn_id].cid1].score + 178 | peak_infos_line[conns[conn_id].cid2].score + 179 | conns[conn_id].score; 180 | subset.push_back(row); 181 | } 182 | } 183 | } 184 | 185 | // delete some rows 186 | for (int i = subset.size() - 1; i >= 0; i --) { 187 | if (subset[i][19] < THRESH_PART_CNT || subset[i][18] / subset[i][19] < THRESH_HUMAN_SCORE) 188 | subset.erase(subset.begin() + i); 189 | } 190 | 191 | return 0; 192 | } 193 | 194 | int get_num_humans() { 195 | return subset.size(); 196 | } 197 | 198 | int get_part_cid(int human_id, int part_id) { 199 | return subset[human_id][part_id]; 200 | } 201 | 202 | float get_score(int human_id) { 203 | return subset[human_id][18] / subset[human_id][19]; 204 | } 205 | 206 | int get_part_x(int cid) { 207 | return peak_infos_line[cid].x; 208 | } 209 | int get_part_y(int cid) { 210 | return peak_infos_line[cid].y; 211 | } 212 | float get_part_score(int cid) { 213 | return peak_infos_line[cid].score; 214 | } 215 | 216 | vector get_paf_vectors(float *pafmap, const int& ch_id1, const int& ch_id2, int& f2, int& f3, Peak& peak1, Peak& peak2) { 217 | vector paf_vectors; 218 | 219 | const float STEP_X = (peak2.x - peak1.x) / float(STEP_PAF); 220 | const float STEP_Y = (peak2.y - peak1.y) / float(STEP_PAF); 221 | 222 | for (int i = 0; i < STEP_PAF; i ++) { 223 | int location_x = roundpaf(peak1.x + i * STEP_X); 224 | int location_y = roundpaf(peak1.y + i * STEP_Y); 225 | 226 | VectorXY v; 227 | v.x = PAF(location_y, location_x, ch_id1); 228 | v.y = PAF(location_y, location_x, ch_id2); 229 | paf_vectors.push_back(v); 230 | } 231 | 232 | return paf_vectors; 233 | } 234 | 235 | int roundpaf(float v) { 236 | return (int) (v + 0.5); 237 | } 238 | 239 | bool comp_candidate(ConnectionCandidate a, ConnectionCandidate b) { 240 | return a.score > b.score; 241 | } 242 | -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/pafprocess.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef PAFPROCESS 4 | #define PAFPROCESS 5 | 6 | const float THRESH_HEAT = 0.05; 7 | const float THRESH_VECTOR_SCORE = 0.05; 8 | const int THRESH_VECTOR_CNT1 = 8; 9 | const int THRESH_PART_CNT = 4; 10 | const float THRESH_HUMAN_SCORE = 0.4; 11 | const int NUM_PART = 18; 12 | 13 | const int STEP_PAF = 10; 14 | 15 | const int COCOPAIRS_SIZE = 19; 16 | const int COCOPAIRS_NET[COCOPAIRS_SIZE][2] = { 17 | {12, 13}, {20, 21}, {14, 15}, {16, 17}, {22, 23}, {24, 25}, {0, 1}, {2, 3}, {4, 5}, 18 | {6, 7}, {8, 9}, {10, 11}, {28, 29}, {30, 31}, {34, 35}, {32, 33}, {36, 37}, {18, 19}, {26, 27} 19 | }; 20 | 21 | const int COCOPAIRS[COCOPAIRS_SIZE][2] = { 22 | {1, 2}, {1, 5}, {2, 3}, {3, 4}, {5, 6}, {6, 7}, {1, 8}, {8, 9}, {9, 10}, {1, 11}, 23 | {11, 12}, {12, 13}, {1, 0}, {0, 14}, {14, 16}, {0, 15}, {15, 17}, {2, 16}, {5, 17} 24 | }; 25 | 26 | struct Peak { 27 | int x; 28 | int y; 29 | float score; 30 | int id; 31 | }; 32 | 33 | struct VectorXY { 34 | float x; 35 | float y; 36 | }; 37 | 38 | struct ConnectionCandidate { 39 | int idx1; 40 | int idx2; 41 | float score; 42 | float etc; 43 | }; 44 | 45 | struct Connection { 46 | int cid1; 47 | int cid2; 48 | float score; 49 | int peak_id1; 50 | int peak_id2; 51 | }; 52 | 53 | int process_paf(int p1, int p2, int p3, float *peaks, int h1, int h2, int h3, float *heatmap, int f1, int f2, int f3, float *pafmap); 54 | int get_num_humans(); 55 | int get_part_cid(int human_id, int part_id); 56 | float get_score(int human_id); 57 | int get_part_x(int cid); 58 | int get_part_y(int cid); 59 | float get_part_score(int cid); 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/pafprocess.i: -------------------------------------------------------------------------------- 1 | %module pafprocess 2 | %{ 3 | #define SWIG_FILE_WITH_INIT 4 | #include "pafprocess.h" 5 | %} 6 | 7 | %include "numpy.i" 8 | %init %{ 9 | import_array(); 10 | %} 11 | 12 | //%apply (int DIM1, int DIM2, int* IN_ARRAY2) {(int p1, int p2, int *peak_idxs)} 13 | //%apply (int DIM1, int DIM2, int DIM3, float* IN_ARRAY3) {(int h1, int h2, int h3, float *heatmap), (int f1, int f2, int f3, float *pafmap)}; 14 | %apply (int DIM1, int DIM2, int DIM3, float* IN_ARRAY3) {(int p1, int p2, int p3, float *peaks), (int h1, int h2, int h3, float *heatmap), (int f1, int f2, int f3, float *pafmap)}; 15 | %include "pafprocess.h" 16 | -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/pafprocess.py: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by SWIG (http://www.swig.org). 2 | # Version 3.0.12 3 | # 4 | # Do not make changes to this file unless you know what you are doing--modify 5 | # the SWIG interface file instead. 6 | 7 | from sys import version_info as _swig_python_version_info 8 | if _swig_python_version_info >= (2, 7, 0): 9 | def swig_import_helper(): 10 | import importlib 11 | pkg = __name__.rpartition('.')[0] 12 | mname = '.'.join((pkg, '_pafprocess')).lstrip('.') 13 | try: 14 | return importlib.import_module(mname) 15 | except ImportError: 16 | return importlib.import_module('_pafprocess') 17 | _pafprocess = swig_import_helper() 18 | del swig_import_helper 19 | elif _swig_python_version_info >= (2, 6, 0): 20 | def swig_import_helper(): 21 | from os.path import dirname 22 | import imp 23 | fp = None 24 | try: 25 | fp, pathname, description = imp.find_module('_pafprocess', [dirname(__file__)]) 26 | except ImportError: 27 | import _pafprocess 28 | return _pafprocess 29 | try: 30 | _mod = imp.load_module('_pafprocess', fp, pathname, description) 31 | finally: 32 | if fp is not None: 33 | fp.close() 34 | return _mod 35 | _pafprocess = swig_import_helper() 36 | del swig_import_helper 37 | else: 38 | import _pafprocess 39 | del _swig_python_version_info 40 | 41 | try: 42 | _swig_property = property 43 | except NameError: 44 | pass # Python < 2.2 doesn't have 'property'. 45 | 46 | try: 47 | import builtins as __builtin__ 48 | except ImportError: 49 | import __builtin__ 50 | 51 | 52 | def _swig_setattr_nondynamic(self, class_type, name, value, static=1): 53 | if name == "thisown": 54 | return self.this.own(value) 55 | if name == "this": 56 | if type(value).__name__ == 'SwigPyObject': 57 | self.__dict__[name] = value 58 | return 59 | method = class_type.__swig_setmethods__.get(name, None) 60 | if method: 61 | return method(self, value) 62 | if not static: 63 | if _newclass: 64 | object.__setattr__(self, name, value) 65 | else: 66 | self.__dict__[name] = value 67 | else: 68 | raise AttributeError("You cannot add attributes to %s" % self) 69 | 70 | 71 | def _swig_setattr(self, class_type, name, value): 72 | return _swig_setattr_nondynamic(self, class_type, name, value, 0) 73 | 74 | 75 | def _swig_getattr(self, class_type, name): 76 | if name == "thisown": 77 | return self.this.own() 78 | method = class_type.__swig_getmethods__.get(name, None) 79 | if method: 80 | return method(self) 81 | raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name)) 82 | 83 | 84 | def _swig_repr(self): 85 | try: 86 | strthis = "proxy of " + self.this.__repr__() 87 | except __builtin__.Exception: 88 | strthis = "" 89 | return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) 90 | 91 | 92 | try: 93 | _object = object 94 | _newclass = 1 95 | except __builtin__.Exception: 96 | class _object: 97 | pass 98 | _newclass = 0 99 | 100 | 101 | class Peak(_object): 102 | __swig_setmethods__ = {} 103 | 104 | def __setattr__(self, name, value): 105 | return _swig_setattr(self, Peak, name, value) 106 | 107 | __swig_getmethods__ = {} 108 | 109 | def __getattr__(self, name): 110 | return _swig_getattr(self, Peak, name) 111 | 112 | __repr__ = _swig_repr 113 | __swig_setmethods__["x"] = _pafprocess.Peak_x_set 114 | __swig_getmethods__["x"] = _pafprocess.Peak_x_get 115 | if _newclass: 116 | x = _swig_property(_pafprocess.Peak_x_get, _pafprocess.Peak_x_set) 117 | __swig_setmethods__["y"] = _pafprocess.Peak_y_set 118 | __swig_getmethods__["y"] = _pafprocess.Peak_y_get 119 | if _newclass: 120 | y = _swig_property(_pafprocess.Peak_y_get, _pafprocess.Peak_y_set) 121 | __swig_setmethods__["score"] = _pafprocess.Peak_score_set 122 | __swig_getmethods__["score"] = _pafprocess.Peak_score_get 123 | if _newclass: 124 | score = _swig_property(_pafprocess.Peak_score_get, _pafprocess.Peak_score_set) 125 | __swig_setmethods__["id"] = _pafprocess.Peak_id_set 126 | __swig_getmethods__["id"] = _pafprocess.Peak_id_get 127 | if _newclass: 128 | id = _swig_property(_pafprocess.Peak_id_get, _pafprocess.Peak_id_set) 129 | 130 | def __init__(self): 131 | this = _pafprocess.new_Peak() 132 | try: 133 | self.this.append(this) 134 | except __builtin__.Exception: 135 | self.this = this 136 | __swig_destroy__ = _pafprocess.delete_Peak 137 | 138 | def __del__(self): 139 | return None 140 | 141 | 142 | Peak_swigregister = _pafprocess.Peak_swigregister 143 | Peak_swigregister(Peak) 144 | cvar = _pafprocess.cvar 145 | THRESH_HEAT = cvar.THRESH_HEAT 146 | THRESH_VECTOR_SCORE = cvar.THRESH_VECTOR_SCORE 147 | THRESH_VECTOR_CNT1 = cvar.THRESH_VECTOR_CNT1 148 | THRESH_PART_CNT = cvar.THRESH_PART_CNT 149 | THRESH_HUMAN_SCORE = cvar.THRESH_HUMAN_SCORE 150 | NUM_PART = cvar.NUM_PART 151 | STEP_PAF = cvar.STEP_PAF 152 | COCOPAIRS_SIZE = cvar.COCOPAIRS_SIZE 153 | COCOPAIRS_NET = cvar.COCOPAIRS_NET 154 | COCOPAIRS = cvar.COCOPAIRS 155 | 156 | 157 | class VectorXY(_object): 158 | __swig_setmethods__ = {} 159 | 160 | def __setattr__(self, name, value): 161 | return _swig_setattr(self, VectorXY, name, value) 162 | 163 | __swig_getmethods__ = {} 164 | 165 | def __getattr__(self, name): 166 | return _swig_getattr(self, VectorXY, name) 167 | 168 | __repr__ = _swig_repr 169 | __swig_setmethods__["x"] = _pafprocess.VectorXY_x_set 170 | __swig_getmethods__["x"] = _pafprocess.VectorXY_x_get 171 | if _newclass: 172 | x = _swig_property(_pafprocess.VectorXY_x_get, _pafprocess.VectorXY_x_set) 173 | __swig_setmethods__["y"] = _pafprocess.VectorXY_y_set 174 | __swig_getmethods__["y"] = _pafprocess.VectorXY_y_get 175 | if _newclass: 176 | y = _swig_property(_pafprocess.VectorXY_y_get, _pafprocess.VectorXY_y_set) 177 | 178 | def __init__(self): 179 | this = _pafprocess.new_VectorXY() 180 | try: 181 | self.this.append(this) 182 | except __builtin__.Exception: 183 | self.this = this 184 | __swig_destroy__ = _pafprocess.delete_VectorXY 185 | 186 | def __del__(self): 187 | return None 188 | 189 | 190 | VectorXY_swigregister = _pafprocess.VectorXY_swigregister 191 | VectorXY_swigregister(VectorXY) 192 | 193 | 194 | class ConnectionCandidate(_object): 195 | __swig_setmethods__ = {} 196 | 197 | def __setattr__(self, name, value): 198 | return _swig_setattr(self, ConnectionCandidate, name, value) 199 | 200 | __swig_getmethods__ = {} 201 | 202 | def __getattr__(self, name): 203 | return _swig_getattr(self, ConnectionCandidate, name) 204 | 205 | __repr__ = _swig_repr 206 | __swig_setmethods__["idx1"] = _pafprocess.ConnectionCandidate_idx1_set 207 | __swig_getmethods__["idx1"] = _pafprocess.ConnectionCandidate_idx1_get 208 | if _newclass: 209 | idx1 = _swig_property(_pafprocess.ConnectionCandidate_idx1_get, _pafprocess.ConnectionCandidate_idx1_set) 210 | __swig_setmethods__["idx2"] = _pafprocess.ConnectionCandidate_idx2_set 211 | __swig_getmethods__["idx2"] = _pafprocess.ConnectionCandidate_idx2_get 212 | if _newclass: 213 | idx2 = _swig_property(_pafprocess.ConnectionCandidate_idx2_get, _pafprocess.ConnectionCandidate_idx2_set) 214 | __swig_setmethods__["score"] = _pafprocess.ConnectionCandidate_score_set 215 | __swig_getmethods__["score"] = _pafprocess.ConnectionCandidate_score_get 216 | if _newclass: 217 | score = _swig_property(_pafprocess.ConnectionCandidate_score_get, _pafprocess.ConnectionCandidate_score_set) 218 | __swig_setmethods__["etc"] = _pafprocess.ConnectionCandidate_etc_set 219 | __swig_getmethods__["etc"] = _pafprocess.ConnectionCandidate_etc_get 220 | if _newclass: 221 | etc = _swig_property(_pafprocess.ConnectionCandidate_etc_get, _pafprocess.ConnectionCandidate_etc_set) 222 | 223 | def __init__(self): 224 | this = _pafprocess.new_ConnectionCandidate() 225 | try: 226 | self.this.append(this) 227 | except __builtin__.Exception: 228 | self.this = this 229 | __swig_destroy__ = _pafprocess.delete_ConnectionCandidate 230 | 231 | def __del__(self): 232 | return None 233 | 234 | 235 | ConnectionCandidate_swigregister = _pafprocess.ConnectionCandidate_swigregister 236 | ConnectionCandidate_swigregister(ConnectionCandidate) 237 | 238 | 239 | class Connection(_object): 240 | __swig_setmethods__ = {} 241 | 242 | def __setattr__(self, name, value): 243 | return _swig_setattr(self, Connection, name, value) 244 | 245 | __swig_getmethods__ = {} 246 | 247 | def __getattr__(self, name): 248 | return _swig_getattr(self, Connection, name) 249 | __repr__ = _swig_repr 250 | __swig_setmethods__["cid1"] = _pafprocess.Connection_cid1_set 251 | __swig_getmethods__["cid1"] = _pafprocess.Connection_cid1_get 252 | if _newclass: 253 | cid1 = _swig_property(_pafprocess.Connection_cid1_get, _pafprocess.Connection_cid1_set) 254 | __swig_setmethods__["cid2"] = _pafprocess.Connection_cid2_set 255 | __swig_getmethods__["cid2"] = _pafprocess.Connection_cid2_get 256 | if _newclass: 257 | cid2 = _swig_property(_pafprocess.Connection_cid2_get, _pafprocess.Connection_cid2_set) 258 | __swig_setmethods__["score"] = _pafprocess.Connection_score_set 259 | __swig_getmethods__["score"] = _pafprocess.Connection_score_get 260 | if _newclass: 261 | score = _swig_property(_pafprocess.Connection_score_get, _pafprocess.Connection_score_set) 262 | __swig_setmethods__["peak_id1"] = _pafprocess.Connection_peak_id1_set 263 | __swig_getmethods__["peak_id1"] = _pafprocess.Connection_peak_id1_get 264 | if _newclass: 265 | peak_id1 = _swig_property(_pafprocess.Connection_peak_id1_get, _pafprocess.Connection_peak_id1_set) 266 | __swig_setmethods__["peak_id2"] = _pafprocess.Connection_peak_id2_set 267 | __swig_getmethods__["peak_id2"] = _pafprocess.Connection_peak_id2_get 268 | if _newclass: 269 | peak_id2 = _swig_property(_pafprocess.Connection_peak_id2_get, _pafprocess.Connection_peak_id2_set) 270 | 271 | def __init__(self): 272 | this = _pafprocess.new_Connection() 273 | try: 274 | self.this.append(this) 275 | except __builtin__.Exception: 276 | self.this = this 277 | __swig_destroy__ = _pafprocess.delete_Connection 278 | 279 | def __del__(self): 280 | return None 281 | 282 | 283 | Connection_swigregister = _pafprocess.Connection_swigregister 284 | Connection_swigregister(Connection) 285 | 286 | 287 | def process_paf(p1, h1, f1): 288 | return _pafprocess.process_paf(p1, h1, f1) 289 | process_paf = _pafprocess.process_paf # noqa 290 | 291 | 292 | def get_num_humans(): 293 | return _pafprocess.get_num_humans() 294 | get_num_humans = _pafprocess.get_num_humans # noqa 295 | 296 | 297 | def get_part_cid(human_id, part_id): 298 | return _pafprocess.get_part_cid(human_id, part_id) 299 | get_part_cid = _pafprocess.get_part_cid # noqa 300 | 301 | 302 | def get_score(human_id): 303 | return _pafprocess.get_score(human_id) 304 | get_score = _pafprocess.get_score # noqa 305 | 306 | 307 | def get_part_x(cid): 308 | return _pafprocess.get_part_x(cid) 309 | get_part_x = _pafprocess.get_part_x # noqa 310 | 311 | 312 | def get_part_y(cid): 313 | return _pafprocess.get_part_y(cid) 314 | get_part_y = _pafprocess.get_part_y # noqa 315 | 316 | 317 | def get_part_score(cid): 318 | return _pafprocess.get_part_score(cid) 319 | get_part_score = _pafprocess.get_part_score # noqa 320 | # This file is compatible with both classic and new-style classes. 321 | -------------------------------------------------------------------------------- /core/tf_pose/pafprocess/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | import numpy 3 | 4 | setup(name='pafprocess_ext', version='1.0', 5 | ext_modules=[Extension('_pafprocess', ['pafprocess.cpp', 'pafprocess.i'], swig_opts=['-c++'], 6 | depends=["pafprocess.h"], include_dirs=[numpy.get_include(), '.'])], 7 | py_modules=["pafprocess"]) 8 | -------------------------------------------------------------------------------- /core/tf_pose/pystopwatch.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | 4 | 5 | class StopWatchManager: 6 | def __init__(self): 7 | self.watches = defaultdict(StopWatch) 8 | 9 | def get(self, name): 10 | return self.watches[name] 11 | 12 | def start(self, name): 13 | self.get(name).start() 14 | 15 | def stop(self, name): 16 | self.get(name).stop() 17 | 18 | def reset(self, name): 19 | self.get(name).reset() 20 | 21 | def get_elapsed(self, name): 22 | return self.get(name).get_elapsed() 23 | 24 | def __repr__(self): 25 | return '\n'.join(['%s: %.8f' % (k, v.elapsed_accumulated) for k, v in self.watches.items()]) 26 | 27 | 28 | class StopWatch: 29 | def __init__(self): 30 | self.elapsed_accumulated = 0.0 31 | self.started_at = time.time() 32 | 33 | def start(self): 34 | self.started_at = time.time() 35 | 36 | def stop(self): 37 | self.elapsed_accumulated += time.time() - self.started_at 38 | 39 | def reset(self): 40 | self.elapsed_accumulated = 0.0 41 | 42 | def get_elapsed(self): 43 | return self.elapsed_accumulated 44 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/ArrayUtils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import mmap 3 | import tempfile 4 | import numpy as np 5 | import psutil 6 | 7 | 8 | def _requiredSize(shape, dtype): 9 | """ 10 | Determines the number of bytes required to store a NumPy array with 11 | the specified shape and datatype. 12 | """ 13 | return math.floor(np.prod(np.asarray(shape, dtype=np.uint64)) * np.dtype(dtype).itemsize) 14 | 15 | 16 | class TempfileBackedArray(np.ndarray): 17 | """ 18 | A NumPy ndarray that uses a memory-mapped temp file as its backing 19 | """ 20 | 21 | def __new__(subtype, shape, dtype=float, buffer=None, offset=0, strides=None, order=None, info=None): 22 | # Determine the size in bytes required to hold the array 23 | numBytes = _requiredSize(shape, dtype) 24 | 25 | # Create the temporary file, resize it, and map it into memory 26 | tempFile = tempfile.TemporaryFile() 27 | tempFile.truncate(numBytes) 28 | buf = mmap.mmap(tempFile.fileno(), numBytes, access=mmap.ACCESS_WRITE) 29 | 30 | # Create the ndarray with the memory map as the underlying buffer 31 | obj = super(TempfileBackedArray, subtype).__new__(subtype, shape, dtype, buf, 0, None, order) 32 | 33 | # Attach the file reference to the ndarray object 34 | obj._file = tempFile 35 | return obj 36 | 37 | def __array_finalize__(self, obj): 38 | if obj is None: 39 | return 40 | self._file = getattr(obj, '_file', None) 41 | 42 | 43 | def arrayFactory(shape, dtype=float): 44 | """ 45 | Creates a new ndarray of the specified shape and datatype, storing 46 | it in memory if there is sufficient available space or else using 47 | a memory-mapped temporary file to provide the underlying buffer. 48 | """ 49 | 50 | # Determine the number of bytes required to store the array 51 | requiredBytes = _requiredSize(shape, dtype) 52 | 53 | # Determine if there is sufficient available memory 54 | vmem = psutil.virtual_memory() 55 | if vmem.available > requiredBytes: 56 | return np.ndarray(shape=shape, dtype=dtype) 57 | else: 58 | return TempfileBackedArray(shape=shape, dtype=dtype) 59 | 60 | 61 | def zerosFactory(shape, dtype=float): 62 | """ 63 | Creates a new NumPy array using `arrayFactory()` and fills it with zeros. 64 | """ 65 | arr = arrayFactory(shape=shape, dtype=dtype) 66 | arr.fill(0) 67 | return arr 68 | 69 | 70 | def arrayCast(source, dtype): 71 | """ 72 | Casts a NumPy array to the specified datatype, storing the copy 73 | in memory if there is sufficient available space or else using a 74 | memory-mapped temporary file to provide the underlying buffer. 75 | """ 76 | 77 | # Determine the number of bytes required to store the array 78 | requiredBytes = _requiredSize(source.shape, dtype) 79 | 80 | # Determine if there is sufficient available memory 81 | vmem = psutil.virtual_memory() 82 | if vmem.available > requiredBytes: 83 | return source.astype(dtype, subok=False) 84 | else: 85 | dest = arrayFactory(source.shape, dtype) 86 | np.copyto(dest, source, casting='unsafe') 87 | return dest 88 | 89 | 90 | def determineMaxWindowSize(dtype, limit=None): 91 | """ 92 | Determines the largest square window size that can be used, based on 93 | the specified datatype and amount of currently available system memory. 94 | 95 | If `limit` is specified, then this value will be returned in the event 96 | that it is smaller than the maximum computed size. 97 | """ 98 | vmem = psutil.virtual_memory() 99 | maxSize = math.floor(math.sqrt(vmem.available / np.dtype(dtype).itemsize)) 100 | if limit is None or limit >= maxSize: 101 | return maxSize 102 | else: 103 | return limit 104 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/Batching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def batchWindows(windows, batchSize): 5 | """ 6 | Splits a list of windows into a series of batches. 7 | """ 8 | return np.array_split(np.array(windows), len(windows) // batchSize) 9 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/Merging.py: -------------------------------------------------------------------------------- 1 | from .SlidingWindow import generate 2 | from .Batching import batchWindows 3 | import numpy as np 4 | 5 | 6 | def mergeWindows(data, dimOrder, maxWindowSize, overlapPercent, batchSize, transform, progressCallback=None): 7 | """ 8 | Generates sliding windows for the specified dataset and applies the specified 9 | transformation function to each window. Where multiple overlapping windows 10 | include an element of the input dataset, the overlap is resolved by computing 11 | the mean transform result value for that element. 12 | 13 | Irrespective of the order of the dimensions of the input dataset, the 14 | transformation function should return a NumPy array with dimensions 15 | [batch, height, width, resultChannels]. 16 | 17 | If a progress callback is supplied, it will be called immediately before 18 | applying the transformation function to each batch of windows. The callback 19 | should accept the current batch index and number of batches as arguments. 20 | """ 21 | 22 | # Determine the dimensions of the input data 23 | sourceWidth = data.shape[dimOrder.index('w')] 24 | sourceHeight = data.shape[dimOrder.index('h')] 25 | 26 | # Generate the sliding windows and group them into batches 27 | windows = generate(data, dimOrder, maxWindowSize, overlapPercent) 28 | batches = batchWindows(windows, batchSize) 29 | 30 | # Apply the transform to the first batch of windows and determine the result dimensionality 31 | exemplarResult = transform(data, batches[0]) 32 | resultDimensions = exemplarResult.shape[len(exemplarResult.shape) - 1] 33 | 34 | # Create the matrices to hold the sums and counts for the transform result values 35 | sums = np.zeros((sourceHeight, sourceWidth, resultDimensions), dtype=np.float) 36 | counts = np.zeros((sourceHeight, sourceWidth), dtype=np.uint32) 37 | 38 | # Iterate over the batches and apply the transformation function to each batch 39 | for batchNum, batch in enumerate(batches): 40 | 41 | # If a progress callback was supplied, call it 42 | if progressCallback is not None: 43 | progressCallback(batchNum, len(batches)) 44 | 45 | # Apply the transformation function to the current batch 46 | batchResult = transform(data, batch) 47 | 48 | # Iterate over the windows in the batch and update the sums matrix 49 | for windowNum, window in enumerate(batch): 50 | # Create views into the larger matrices that correspond to the current window 51 | windowIndices = window.indices(False) 52 | sumsView = sums[windowIndices] 53 | countsView = counts[windowIndices] 54 | 55 | # Update the result sums for each of the dataset elements in the window 56 | sumsView[:] += batchResult[windowNum] 57 | countsView[:] += 1 58 | 59 | # Use the sums and the counts to compute the mean values 60 | for dim in range(0, resultDimensions): 61 | sums[:, :, dim] /= counts 62 | 63 | # Return the mean values 64 | return sums 65 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/RectangleUtils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def cropRect(rect, cropTop, cropBottom, cropLeft, cropRight): 5 | """ 6 | Crops a rectangle by the specified number of pixels on each side. 7 | 8 | The input rectangle and return value are both a tuple of (x,y,w,h). 9 | """ 10 | 11 | # Unpack the rectangle 12 | x, y, w, h = rect 13 | 14 | # Crop by the specified value 15 | x += cropLeft 16 | y += cropTop 17 | w -= (cropLeft + cropRight) 18 | h -= (cropTop + cropBottom) 19 | 20 | # Re-pack the padded rect 21 | return (x, y, w, h) 22 | 23 | 24 | def padRect(rect, padTop, padBottom, padLeft, padRight, bounds, clipExcess=True): 25 | """ 26 | Pads a rectangle by the specified values on each individual side, 27 | ensuring the padded rectangle falls within the specified bounds. 28 | 29 | The input rectangle, bounds, and return value are all a tuple of (x,y,w,h). 30 | """ 31 | 32 | # Unpack the rectangle 33 | x, y, w, h = rect 34 | 35 | # Pad by the specified value 36 | x -= padLeft 37 | y -= padTop 38 | w += (padLeft + padRight) 39 | h += (padTop + padBottom) 40 | 41 | # Determine if we are clipping overflows/underflows or 42 | # shifting the centre of the rectangle to compensate 43 | if clipExcess: 44 | 45 | # Clip any underflows 46 | x = max(0, x) 47 | y = max(0, y) 48 | 49 | # Clip any overflows 50 | overflowY = max(0, (y + h) - bounds[0]) 51 | overflowX = max(0, (x + w) - bounds[1]) 52 | h -= overflowY 53 | w -= overflowX 54 | 55 | else: 56 | 57 | # Compensate for any underflows 58 | underflowX = max(0, 0 - x) 59 | underflowY = max(0, 0 - y) 60 | x += underflowX 61 | y += underflowY 62 | 63 | # Compensate for any overflows 64 | overflowY = max(0, (y + h) - bounds[0]) 65 | overflowX = max(0, (x + w) - bounds[1]) 66 | x -= overflowX 67 | w += overflowX 68 | y -= overflowY 69 | h += overflowY 70 | 71 | # If there are still overflows or underflows after our 72 | # modifications, we have no choice but to clip them 73 | x, y, w, h = padRect((x, y, w, h), 0, 0, 0, 0, bounds, True) 74 | 75 | # Re-pack the padded rect 76 | return (x, y, w, h) 77 | 78 | 79 | def cropRectEqually(rect, cropping): 80 | """ 81 | Crops a rectangle by the specified number of pixels on all sides. 82 | 83 | The input rectangle and return value are both a tuple of (x,y,w,h). 84 | """ 85 | return cropRect(rect, cropping, cropping, cropping, cropping) 86 | 87 | 88 | def padRectEqually(rect, padding, bounds, clipExcess=True): 89 | """ 90 | Applies equal padding to all sides of a rectangle, 91 | ensuring the padded rectangle falls within the specified bounds. 92 | 93 | The input rectangle, bounds, and return value are all a tuple of (x,y,w,h). 94 | """ 95 | return padRect(rect, padding, padding, padding, padding, bounds, clipExcess) 96 | 97 | 98 | def squareAspect(rect): 99 | """ 100 | Crops either the width or height, as necessary, to make a rectangle into a square. 101 | 102 | The input rectangle and return value are both a tuple of (x,y,w,h). 103 | """ 104 | 105 | # Determine which dimension needs to be cropped 106 | x, y, w, h = rect 107 | if w > h: 108 | cropX = (w - h) // 2 109 | return cropRect(rect, 0, 0, cropX, cropX) 110 | elif w < h: 111 | cropY = (h - w) // 2 112 | return cropRect(rect, cropY, cropY, 0, 0) 113 | 114 | # Already a square 115 | return rect 116 | 117 | 118 | def fitToSize(rect, targetWidth, targetHeight, bounds): 119 | """ 120 | Pads or crops a rectangle as necessary to achieve the target dimensions, 121 | ensuring the modified rectangle falls within the specified bounds. 122 | 123 | The input rectangle, bounds, and return value are all a tuple of (x,y,w,h). 124 | """ 125 | 126 | # Determine the difference between the current size and target size 127 | x, y, w, h = rect 128 | diffX = w - targetWidth 129 | diffY = h - targetHeight 130 | 131 | # Determine if we are cropping or padding the width 132 | if diffX > 0: 133 | cropLeft = math.floor(diffX / 2) 134 | cropRight = diffX - cropLeft 135 | x, y, w, h = cropRect((x, y, w, h), 0, 0, cropLeft, cropRight) 136 | elif diffX < 0: 137 | padLeft = math.floor(abs(diffX) / 2) 138 | padRight = abs(diffX) - padLeft 139 | x, y, w, h = padRect((x, y, w, h), 0, 0, padLeft, padRight, bounds, False) 140 | 141 | # Determine if we are cropping or padding the height 142 | if diffY > 0: 143 | cropTop = math.floor(diffY / 2) 144 | cropBottom = diffY - cropTop 145 | x, y, w, h = cropRect((x, y, w, h), cropTop, cropBottom, 0, 0) 146 | elif diffY < 0: 147 | padTop = math.floor(abs(diffY) / 2) 148 | padBottom = abs(diffY) - padTop 149 | x, y, w, h = padRect((x, y, w, h), padTop, padBottom, 0, 0, bounds, False) 150 | 151 | return x, y, w, h 152 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/SlidingWindow.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | class DimOrder(object): 5 | """ 6 | Represents the order of the dimensions in a dataset's shape. 7 | """ 8 | ChannelHeightWidth = ['c', 'h', 'w'] 9 | HeightWidthChannel = ['h', 'w', 'c'] 10 | 11 | 12 | class SlidingWindow(object): 13 | """ 14 | Represents a single window into a larger dataset. 15 | """ 16 | 17 | def __init__(self, x, y, w, h, dimOrder, transform=None): 18 | """ 19 | Creates a new window with the specified dimensions and transform 20 | """ 21 | self.x = x 22 | self.y = y 23 | self.w = w 24 | self.h = h 25 | self.dimOrder = dimOrder 26 | self.transform = transform 27 | 28 | def apply(self, matrix): 29 | """ 30 | Slices the supplied matrix and applies any transform bound to this window 31 | """ 32 | view = matrix[self.indices()] 33 | return self.transform(view) if self.transform is not None else view 34 | 35 | def getRect(self): 36 | """ 37 | Returns the window bounds as a tuple of (x,y,w,h) 38 | """ 39 | return self.x, self.y, self.w, self.h 40 | 41 | def setRect(self, rect): 42 | """ 43 | Sets the window bounds from a tuple of (x,y,w,h) 44 | """ 45 | self.x, self.y, self.w, self.h = rect 46 | 47 | def indices(self, includeChannel=True): 48 | """ 49 | Retrieves the indices for this window as a tuple of slices 50 | """ 51 | if self.dimOrder == DimOrder.HeightWidthChannel: 52 | 53 | # Equivalent to [self.y:self.y+self.h+1, self.x:self.x+self.w+1] 54 | return ( 55 | slice(self.y, self.y + self.h), 56 | slice(self.x, self.x + self.w) 57 | ) 58 | 59 | elif self.dimOrder == DimOrder.ChannelHeightWidth: 60 | 61 | if includeChannel is True: 62 | 63 | # Equivalent to [:, self.y:self.y+self.h+1, self.x:self.x+self.w+1] 64 | return ( 65 | slice(None, None), 66 | slice(self.y, self.y + self.h), 67 | slice(self.x, self.x + self.w) 68 | ) 69 | 70 | else: 71 | 72 | # Equivalent to [self.y:self.y+self.h+1, self.x:self.x+self.w+1] 73 | return ( 74 | slice(self.y, self.y + self.h), 75 | slice(self.x, self.x + self.w) 76 | ) 77 | 78 | else: 79 | raise ValueError('Unsupported order of dimensions: ' + str(self.dimOrder)) 80 | 81 | def __str__(self): 82 | return '(' + str(self.x) + ',' + str(self.y) + ',' + str(self.w) + ',' + str(self.h) + ')' 83 | 84 | def __repr__(self): 85 | return self.__str__() 86 | 87 | 88 | def generate(data, dimOrder, maxWindowSizeW, maxWindowSizeH, overlapPercent, transforms=[]): 89 | """ 90 | Generates a set of sliding windows for the specified dataset. 91 | """ 92 | 93 | # Determine the dimensions of the input data 94 | width = data.shape[dimOrder.index('w')] 95 | height = data.shape[dimOrder.index('h')] 96 | 97 | # Generate the windows 98 | return generateForSize(width, height, dimOrder, maxWindowSizeW, maxWindowSizeH, overlapPercent, transforms) 99 | 100 | 101 | def generateForSize(width, height, dimOrder, maxWindowSizeW, maxWindowSizeH, overlapPercent, transforms=[]): 102 | """ 103 | Generates a set of sliding windows for a dataset with the specified dimensions and order. 104 | """ 105 | 106 | # If the input data is smaller than the specified window size, 107 | # clip the window size to the input size on both dimensions 108 | windowSizeX = min(maxWindowSizeW, width) 109 | windowSizeY = min(maxWindowSizeH, height) 110 | 111 | # Compute the window overlap and step size 112 | windowOverlapX = int(math.floor(windowSizeX * overlapPercent)) 113 | windowOverlapY = int(math.floor(windowSizeY * overlapPercent)) 114 | stepSizeX = windowSizeX - windowOverlapX 115 | stepSizeY = windowSizeY - windowOverlapY 116 | 117 | # Determine how many windows we will need in order to cover the input data 118 | lastX = width - windowSizeX 119 | lastY = height - windowSizeY 120 | xOffsets = list(range(0, lastX + 1, stepSizeX)) 121 | yOffsets = list(range(0, lastY + 1, stepSizeY)) 122 | 123 | # Unless the input data dimensions are exact multiples of the step size, 124 | # we will need one additional row and column of windows to get 100% coverage 125 | if len(xOffsets) == 0 or xOffsets[-1] != lastX: 126 | xOffsets.append(lastX) 127 | if len(yOffsets) == 0 or yOffsets[-1] != lastY: 128 | yOffsets.append(lastY) 129 | 130 | # Generate the list of windows 131 | windows = [] 132 | for xOffset in xOffsets: 133 | for yOffset in yOffsets: 134 | for transform in [None] + transforms: 135 | windows.append(SlidingWindow( 136 | x=xOffset, 137 | y=yOffset, 138 | w=windowSizeX, 139 | h=windowSizeY, 140 | dimOrder=dimOrder, 141 | transform=transform 142 | )) 143 | 144 | return windows 145 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/WindowDistance.py: -------------------------------------------------------------------------------- 1 | from .ArrayUtils import zerosFactory 2 | import numpy as np 3 | import math 4 | 5 | 6 | def generateDistanceMatrix(width, height): 7 | """ 8 | Generates a matrix specifying the distance of each point in a window to its centre. 9 | """ 10 | 11 | # Determine the coordinates of the exact centre of the window 12 | originX = width / 2 13 | originY = height / 2 14 | 15 | # Generate the distance matrix 16 | distances = zerosFactory((height, width), dtype=np.float) 17 | for index, val in np.ndenumerate(distances): 18 | y, x = index 19 | distances[(y, x)] = math.sqrt(math.pow(x - originX, 2) + math.pow(y - originY, 2)) 20 | 21 | return distances 22 | -------------------------------------------------------------------------------- /core/tf_pose/slidingwindow/__init__.py: -------------------------------------------------------------------------------- 1 | from .SlidingWindow import DimOrder, SlidingWindow, generate, generateForSize # noqa 2 | from .WindowDistance import generateDistanceMatrix # noqa 3 | from .RectangleUtils import * # noqa 4 | from .ArrayUtils import * # noqa 5 | from .Batching import * # noqa 6 | from .Merging import * # noqa 7 | -------------------------------------------------------------------------------- /core/tf_pose/tensblur/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/core/tf_pose/tensblur/__init__.py -------------------------------------------------------------------------------- /core/tf_pose/tensblur/smoother.py: -------------------------------------------------------------------------------- 1 | # vim: sta:et:sw=2:ts=2:sts=2 2 | # Written by Antonio Loquercio 3 | 4 | import numpy as np 5 | import scipy.stats as st 6 | import tensorflow as tf 7 | 8 | 9 | def layer(op): 10 | def layer_decorated(self, *args, **kwargs): 11 | # Automatically set a name if not provided. 12 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 13 | # Figure out the layer inputs. 14 | if len(self.terminals) == 0: 15 | raise RuntimeError('No input variables found for layer %s.' % name) 16 | elif len(self.terminals) == 1: 17 | layer_input = self.terminals[0] 18 | else: 19 | layer_input = list(self.terminals) 20 | # Perform the operation and get the output. 21 | layer_output = op(self, layer_input, *args, **kwargs) 22 | # Add to layer LUT. 23 | self.layers[name] = layer_output 24 | # This output is now the input for the next layer. 25 | self.feed(layer_output) 26 | # Return self for chained calls. 27 | return self 28 | 29 | return layer_decorated 30 | 31 | 32 | class Smoother(object): 33 | def __init__(self, inputs, filter_size, sigma): 34 | self.inputs = inputs 35 | self.terminals = [] 36 | self.layers = dict(inputs) 37 | self.filter_size = filter_size 38 | self.sigma = sigma 39 | self.setup() 40 | 41 | def setup(self): 42 | self.feed('data').conv(name='smoothing') 43 | 44 | def get_unique_name(self, prefix): 45 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 46 | return '%s_%d' % (prefix, ident) 47 | 48 | def feed(self, *args): 49 | if len(args) == 0: 50 | raise ValueError("len(args) must not be zero") 51 | 52 | self.terminals = [] 53 | for fed_layer in args: 54 | if isinstance(fed_layer, str): 55 | try: 56 | fed_layer = self.layers[fed_layer] 57 | except KeyError: 58 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 59 | self.terminals.append(fed_layer) 60 | return self 61 | 62 | def gauss_kernel(self, kernlen=21, nsig=3, channels=1): 63 | interval = (2*nsig+1.)/(kernlen) 64 | x = np.linspace(-nsig-interval/2., nsig+interval/2., kernlen+1) 65 | kern1d = np.diff(st.norm.cdf(x)) 66 | kernel_raw = np.sqrt(np.outer(kern1d, kern1d)) 67 | kernel = kernel_raw/kernel_raw.sum() 68 | out_filter = np.array(kernel, dtype=np.float32) 69 | out_filter = out_filter.reshape((kernlen, kernlen, 1, 1)) 70 | out_filter = np.repeat(out_filter, channels, axis=2) 71 | return out_filter 72 | 73 | def make_gauss_var(self, name, size, sigma, c_i): 74 | # with tf.device("/cpu:0"): 75 | kernel = self.gauss_kernel(size, sigma, c_i) 76 | var = tf.Variable(tf.convert_to_tensor(kernel), name=name) 77 | return var 78 | 79 | def get_output(self): 80 | '''Returns the smoother output.''' 81 | return self.terminals[-1] 82 | 83 | @layer 84 | def conv(self, 85 | input, 86 | name, 87 | padding='SAME'): 88 | # Get the number of channels in the input 89 | c_i = input.get_shape().as_list()[3] 90 | 91 | # Convolution for a given input and kernel 92 | def convolve(i, k): 93 | return tf.nn.depthwise_conv2d(i, k, [1, 1, 1, 1], padding=padding) 94 | with tf.variable_scope(name): 95 | kernel = self.make_gauss_var('gauss_weight', self.filter_size, self.sigma, c_i) 96 | output = convolve(input, kernel) 97 | return output 98 | -------------------------------------------------------------------------------- /docs/deploy-max-to-ibm-cloud-with-kubernetes-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/docs/deploy-max-to-ibm-cloud-with-kubernetes-button.png -------------------------------------------------------------------------------- /docs/pose-lines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/docs/pose-lines.png -------------------------------------------------------------------------------- /docs/swagger-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/docs/swagger-screenshot.png -------------------------------------------------------------------------------- /max-human-pose-estimator.yaml: -------------------------------------------------------------------------------- 1 | 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: max-human-pose-estimator 6 | spec: 7 | selector: 8 | app: max-human-pose-estimator 9 | ports: 10 | - port: 5000 11 | type: NodePort 12 | --- 13 | apiVersion: apps/v1 14 | kind: Deployment 15 | metadata: 16 | name: max-human-pose-estimator 17 | labels: 18 | app: max-human-pose-estimator 19 | spec: 20 | selector: 21 | matchLabels: 22 | app: max-human-pose-estimator 23 | replicas: 1 24 | template: 25 | metadata: 26 | labels: 27 | app: max-human-pose-estimator 28 | spec: 29 | containers: 30 | - name: max-human-pose-estimator 31 | image: quay.io/codait/max-human-pose-estimator:latest 32 | ports: 33 | - containerPort: 5000 34 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | flake8==3.8.4 2 | pytest==6.1.2 3 | requests==2.25.0 4 | bandit==1.6.2 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse==1.4.0 2 | dill==0.2.9 3 | fire==0.1.3 4 | matplotlib==3.0.3 5 | opencv-python==4.1.0.25 6 | protobuf==3.7.1 7 | psutil==5.6.6 8 | requests==2.21.0 9 | scikit-image==0.15.0 10 | scipy==1.2.1 11 | slidingwindow==0.0.13 12 | tensorflow==1.15.4 13 | tqdm==4.31.1 14 | git+https://github.com/ppwwyyxx/tensorpack.git 15 | numpy==1.17.0 16 | -------------------------------------------------------------------------------- /samples/IBM.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/samples/IBM.jpeg -------------------------------------------------------------------------------- /samples/Pilots.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/samples/Pilots.jpg -------------------------------------------------------------------------------- /samples/Pilots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/samples/Pilots.png -------------------------------------------------------------------------------- /samples/Pilots.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/MAX-Human-Pose-Estimator/7e1205e5edf04d258dc1d655aea02fc7c0b26ac6/samples/Pilots.tiff -------------------------------------------------------------------------------- /samples/README.md: -------------------------------------------------------------------------------- 1 | # Sample Details 2 | 3 | ## Test Images 4 | 5 | The following table details licenses applicable to certain test images. Any test images not in the table are from the [CODAIT team](codait.org) and released under a [CC0 License](https://creativecommons.org/publicdomain/zero/1.0/). 6 | 7 | | Asset | License | Link | 8 | | ------------- | -------- | -------- | 9 | | [Pilots](Pilots.jpg) | Custom | [Public domain](https://en.wikipedia.org/wiki/Public_domain) | -------------------------------------------------------------------------------- /sha512sums.txt: -------------------------------------------------------------------------------- 1 | 80093da251fd8640f2bd72d32a0e047dc3fcb85e0f925dc1ea600f973044145bf854010dd4696e4e86bd1509e3f68190d223f352893464ff218459674d3e4526 assets/human-pose-estimator-tensorflow.pb 2 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2019 IBM Corp. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | import requests 18 | import pytest 19 | 20 | 21 | def test_swagger(): 22 | 23 | model_endpoint = 'http://localhost:5000/swagger.json' 24 | 25 | r = requests.get(url=model_endpoint) 26 | assert r.status_code == 200 27 | assert r.headers['Content-Type'] == 'application/json' 28 | 29 | json = r.json() 30 | assert 'swagger' in json 31 | assert json.get('info') and json.get('info').get('title') == 'MAX Human Pose Estimator' 32 | 33 | 34 | def test_metadata(): 35 | 36 | model_endpoint = 'http://localhost:5000/model/metadata' 37 | 38 | r = requests.get(url=model_endpoint) 39 | assert r.status_code == 200 40 | 41 | metadata = r.json() 42 | assert metadata['id'] == 'max human pose estimator' 43 | assert metadata['name'] == 'MAX Human Pose Estimator TensorFlow Model' 44 | assert metadata['description'] == 'TensorFlow model trained on COCO data to detect human poses' 45 | assert metadata['license'] == 'Apache License 2.0' 46 | 47 | 48 | def _check_response(r): 49 | assert r.status_code == 200 50 | response = r.json() 51 | 52 | assert response['status'] == 'ok' 53 | assert len(response['predictions']) == 3 54 | assert response['predictions'][0]['human_id'] == 0 55 | assert len(response['predictions'][0]['pose_lines']) > 0 56 | assert len(response['predictions'][0]['body_parts']) > 0 57 | 58 | 59 | def test_predict(): 60 | 61 | model_endpoint = 'http://localhost:5000/model/predict' 62 | formats = ['jpg', 'png', 'tiff'] 63 | img_path = 'samples/Pilots.{}' 64 | 65 | for f in formats: 66 | p = img_path.format(f) 67 | with open(p, 'rb') as file: 68 | file_form = {'file': (p, file, 'image/{}'.format(f))} 69 | r = requests.post(url=model_endpoint, files=file_form) 70 | _check_response(r) 71 | 72 | # Test by the image without faces 73 | img2_path = 'samples/IBM.jpeg' 74 | 75 | with open(img2_path, 'rb') as file: 76 | file_form = {'file': (img2_path, file, 'image/jpeg')} 77 | r = requests.post(url=model_endpoint, files=file_form) 78 | 79 | assert r.status_code == 200 80 | response = r.json() 81 | 82 | assert response['status'] == 'ok' 83 | assert len(response['predictions']) == 0 84 | 85 | # Test by the text data 86 | img3_path = 'README.md' 87 | 88 | with open(img3_path, 'rb') as file: 89 | file_form = {'file': (img3_path, file, 'image/jpeg')} 90 | r = requests.post(url=model_endpoint, files=file_form) 91 | 92 | assert r.status_code == 400 93 | 94 | 95 | if __name__ == '__main__': 96 | pytest.main([__file__]) 97 | --------------------------------------------------------------------------------