├── .gitattributes ├── .github └── workflows │ └── main.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── DS_DATA.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── farl ├── __init__.py ├── datasets │ ├── __init__.py │ ├── aflw.py │ ├── celebamask_hq.py │ ├── ibug300w.py │ ├── lapa.py │ ├── prepare.py │ └── wflw.py ├── experiments │ ├── __init__.py │ ├── face_alignment │ │ ├── __init__.py │ │ ├── augmenters │ │ │ ├── aflw19_test.yaml │ │ │ ├── aflw19_test_post.yaml │ │ │ ├── test.yaml │ │ │ ├── test_post.yaml │ │ │ └── train.yaml │ │ ├── eval_data │ │ │ ├── aflw19_test.yaml │ │ │ ├── ibug300w_test.yaml │ │ │ └── wflw_test.yaml │ │ ├── network.py │ │ ├── networks │ │ │ └── farl.yaml │ │ ├── optimizers │ │ │ ├── freeze_backbone.yaml │ │ │ └── refine_backbone.yaml │ │ ├── scorer.py │ │ ├── scorers │ │ │ ├── aflw19.yaml │ │ │ ├── ibug300w.yaml │ │ │ └── wflw.yaml │ │ ├── task.py │ │ ├── train_aflw19_farl-b-50m-ep16_448_refinebb.yaml │ │ ├── train_aflw19_farl-b-ep16_448_refinebb.yaml │ │ ├── train_aflw19_farl-b-ep64_448_refinebb.yaml │ │ ├── train_ibug300w_farl-b-50m-ep16_448_refinebb.yaml │ │ ├── train_ibug300w_farl-b-ep16_448_refinebb.yaml │ │ ├── train_ibug300w_farl-b-ep64_448_refinebb.yaml │ │ ├── train_wflw_farl-b-50m-ep16_448_refinebb.yaml │ │ ├── train_wflw_farl-b-ep16_448_refinebb.yaml │ │ ├── train_wflw_farl-b-ep64_448_refinebb.yaml │ │ └── trainers │ │ │ ├── aflw19_farl.yaml │ │ │ ├── ibug300w_farl.yaml │ │ │ └── wflw_farl.yaml │ └── face_parsing │ │ ├── __init__.py │ │ ├── augmenters │ │ ├── celebm │ │ │ ├── test.yaml │ │ │ ├── test_post.yaml │ │ │ └── train.yaml │ │ └── lapa │ │ │ ├── test.yaml │ │ │ ├── test_post.yaml │ │ │ └── train.yaml │ │ ├── network.py │ │ ├── networks │ │ └── farl.yaml │ │ ├── optimizers │ │ ├── freeze_backbone.yaml │ │ └── refine_backbone.yaml │ │ ├── scorer.py │ │ ├── scorers │ │ ├── celebm.yaml │ │ └── lapa.yaml │ │ ├── task.py │ │ ├── train_celebm_farl-b-50m-ep16_448_refinebb.yaml │ │ ├── train_celebm_farl-b-ep16_448_refinebb.yaml │ │ ├── train_celebm_farl-b-ep64_448_refinebb.yaml │ │ ├── train_lapa_farl-b-50m-ep16_448_refinebb.yaml │ │ ├── train_lapa_farl-b-ep16_448_refinebb.yaml │ │ ├── train_lapa_farl-b-ep64_448_refinebb.yaml │ │ └── trainers │ │ ├── celebm_farl.yaml │ │ └── lapa_farl.yaml └── network │ ├── __init__.py │ ├── common.py │ ├── ext │ └── p2i_ops │ │ ├── .clang-format │ │ ├── .gitignore │ │ ├── README.md │ │ ├── __init__.py │ │ ├── common.h │ │ ├── ext.cpp │ │ ├── p2i_max.cu │ │ ├── p2i_max.h │ │ ├── p2i_sum.cu │ │ ├── p2i_sum.h │ │ ├── sample.ipynb │ │ └── utility.h │ ├── farl │ ├── __init__.py │ └── model.py │ ├── geometry.py │ ├── mmseg.py │ ├── transformers.py │ └── viz.py ├── figures ├── framework.jpg └── framework2.jpg ├── logs ├── paper │ ├── face_alignment.train_aflw19_farl-b-ep16_448_refinebb │ │ ├── eval.aflw19_test_0.tsv │ │ └── eval.aflw19_test_frontal_0.tsv │ ├── face_alignment.train_ibug300w_farl-b-ep16_448_refinebb │ │ ├── eval.ibug300w_test_challenging_0.tsv │ │ ├── eval.ibug300w_test_common_0.tsv │ │ └── eval.ibug300w_test_full_0.tsv │ ├── face_alignment.train_wflw_farl-b-ep16_448_refinebb │ │ ├── eval.wflw_test_all_0.tsv │ │ ├── eval.wflw_test_blur_0.tsv │ │ ├── eval.wflw_test_expression_0.tsv │ │ ├── eval.wflw_test_illumination_0.tsv │ │ ├── eval.wflw_test_largepose_0.tsv │ │ ├── eval.wflw_test_makeup_0.tsv │ │ └── eval.wflw_test_occlusion_0.tsv │ ├── face_parsing.train_celebm_farl-b-ep16-448_refinebb │ │ └── eval.celebm_test_0.tsv │ └── face_parsing.train_lapa_farl-b-ep16_448_refinebb │ │ └── eval.lapa_test_0.tsv └── reproduce │ ├── face_alignment.train_aflw19_farl-b-50m-ep16_448_refinebb │ ├── eval.aflw19_test_0.tsv │ └── eval.aflw19_test_frontal_0.tsv │ ├── face_alignment.train_aflw19_farl-b-ep16_448_refinebb │ ├── eval.aflw19_test_0.tsv │ └── eval.aflw19_test_frontal_0.tsv │ ├── face_alignment.train_aflw19_farl-b-ep64_448_refinebb │ ├── eval.aflw19_test_0.tsv │ └── eval.aflw19_test_frontal_0.tsv │ ├── face_alignment.train_ibug300w_farl-b-50m-ep16_448_refinebb │ ├── eval.ibug300w_test_challenging_0.tsv │ ├── eval.ibug300w_test_common_0.tsv │ └── eval.ibug300w_test_full_0.tsv │ ├── face_alignment.train_ibug300w_farl-b-ep16_448_refinebb │ ├── eval.ibug300w_test_challenging_0.tsv │ ├── eval.ibug300w_test_common_0.tsv │ └── eval.ibug300w_test_full_0.tsv │ ├── face_alignment.train_ibug300w_farl-b-ep64_448_refinebb │ ├── eval.ibug300w_test_challenging_0.tsv │ ├── eval.ibug300w_test_common_0.tsv │ └── eval.ibug300w_test_full_0.tsv │ ├── face_alignment.train_wflw_farl-b-50m-ep16_448_refinebb │ ├── eval.wflw_test_all_0.tsv │ ├── eval.wflw_test_blur_0.tsv │ ├── eval.wflw_test_expression_0.tsv │ ├── eval.wflw_test_illumination_0.tsv │ ├── eval.wflw_test_largepose_0.tsv │ ├── eval.wflw_test_makeup_0.tsv │ └── eval.wflw_test_occlusion_0.tsv │ ├── face_alignment.train_wflw_farl-b-ep16_448_refinebb │ ├── eval.wflw_test_all_0.tsv │ ├── eval.wflw_test_blur_0.tsv │ ├── eval.wflw_test_expression_0.tsv │ ├── eval.wflw_test_illumination_0.tsv │ ├── eval.wflw_test_largepose_0.tsv │ ├── eval.wflw_test_makeup_0.tsv │ └── eval.wflw_test_occlusion_0.tsv │ ├── face_alignment.train_wflw_farl-b-ep64_448_refinebb │ ├── eval.wflw_test_all_0.tsv │ ├── eval.wflw_test_blur_0.tsv │ ├── eval.wflw_test_expression_0.tsv │ ├── eval.wflw_test_illumination_0.tsv │ ├── eval.wflw_test_largepose_0.tsv │ ├── eval.wflw_test_makeup_0.tsv │ └── eval.wflw_test_occlusion_0.tsv │ ├── face_parsing.train_celebm_farl-b-50m-ep16_448_refinebb │ └── eval.celebm_test_0.tsv │ ├── face_parsing.train_celebm_farl-b-ep16_448_refinebb │ └── eval.celebm_test_0.tsv │ ├── face_parsing.train_celebm_farl-b-ep64_448_refinebb │ └── eval.celebm_test_0.tsv │ ├── face_parsing.train_lapa_farl-b-50m-ep16_448_refinebb │ └── eval.lapa_test_0.tsv │ ├── face_parsing.train_lapa_farl-b-ep16_448_refinebb │ └── eval.lapa_test_0.tsv │ └── face_parsing.train_lapa_farl-b-ep64_448_refinebb │ └── eval.lapa_test_0.tsv └── requirement.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | *.jpg filter=lfs diff=lfs merge=lfs -text 2 | *.ipynb filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: "Code Scanning - Action" 2 | 3 | on: 4 | push: 5 | branches: [main, pre-release] 6 | pull_request: 7 | branches: [main, pre-release] 8 | schedule: 9 | # ┌───────────── minute (0 - 59) 10 | # │ ┌───────────── hour (0 - 23) 11 | # │ │ ┌───────────── day of the month (1 - 31) 12 | # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) 13 | # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) 14 | # │ │ │ │ │ 15 | # │ │ │ │ │ 16 | # │ │ │ │ │ 17 | # * * * * * 18 | - cron: '30 1 * * 0' 19 | 20 | jobs: 21 | CodeQL-Build: 22 | # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest 23 | runs-on: ubuntu-latest 24 | 25 | permissions: 26 | # required for all workflows 27 | security-events: write 28 | 29 | # only required for workflows in private repositories 30 | actions: read 31 | contents: read 32 | 33 | steps: 34 | - name: Checkout repository 35 | uses: actions/checkout@v2 36 | 37 | # Initializes the CodeQL tools for scanning. 38 | - name: Initialize CodeQL 39 | uses: github/codeql-action/init@v1 40 | # Override language selection by uncommenting this and choosing your languages 41 | # with: 42 | # languages: go, javascript, csharp, python, cpp, java 43 | 44 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 45 | # If this step fails, then you should remove it and run the build manually (see below). 46 | - name: Autobuild 47 | uses: github/codeql-action/autobuild@v1 48 | 49 | # ℹ️ Command-line programs to run using the OS shell. 50 | # 📚 https://git.io/JvXDl 51 | 52 | # ✏️ If the Autobuild fails above, remove it and uncomment the following 53 | # three lines and modify them (or add more) to build your code if your 54 | # project uses a compiled language 55 | 56 | #- run: | 57 | # make bootstrap 58 | # make release 59 | 60 | - name: Perform CodeQL Analysis 61 | uses: github/codeql-action/analyze@v1 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | _debug/ 131 | blob/ 132 | blob -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /DS_DATA.md: -------------------------------------------------------------------------------- 1 | # Prepare Downstream Data 2 | 3 | First, create directory `./blob/data` and download all the datasets. 4 | 5 | ### LaPa 6 | 7 | * Download LaPa.tar.gz from https://github.com/JDAI-CV/lapa-dataset. 8 | * Uncompress to `./blob/data/LaPa`, make sure `./blob/data/LaPa/{test, train, val}/` all exist. 9 | 10 | ### CelebAMask-HQ 11 | 12 | * Download CelebAMask-HQ.zip from https://github.com/switchablenorms/CelebAMask-HQ. 13 | * Uncompress to `./blob/data/CelebAMask-HQ`, make sure `./blob/data/CelebAMask-HQ/{CelebA-HQ-img, CelebAMask-HQ-mask-anno}/` all exist. 14 | 15 | ### AFLW-19 16 | 17 | * Download the annotations from http://mmlab.ie.cuhk.edu.hk/projects/compositional/AFLWinfo_release.mat to `./blob/data/AFLW-19/AFLWinfo_release.mat`. 18 | * Download the images following instructions given by https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/aflw/#download. Uncompress the aflw-images-{0,2,3}.tar.gz files to `./blob/data/AFLW-19/`, make sure `./blob/data/AFLW-19/data/flickr/{0, 2, 3}/` exists. 19 | 20 | 21 | ### IBUG300W & WFLW 22 | 23 | * Download the IBUG300W and WFLW annotations from https://github.com/HRNet/HRNet-Facial-Landmark-Detection#data. 24 | * Download IBUG300W images from 25 | * https://ibug.doc.ic.ac.uk/download/annotations/ibug.zip 26 | * https://ibug.doc.ic.ac.uk/download/annotations/afw.zip 27 | * https://ibug.doc.ic.ac.uk/download/annotations/helen.zip 28 | * https://ibug.doc.ic.ac.uk/download/annotations/lfpw.zip 29 | * Download WFLW images from https://wywu.github.io/projects/LAB/WFLW.html. 30 | * Uncompress these files, make sure these paths exist: 31 | * IBUG300W images: `./blob/data/IBUG300W/{ibug, afw, helen, lfpw}/` 32 | * IBUG300W annotations (from HRNet): `./blob/data/IBUG300W/face_landmarks_300w_{train, valid_challenge, valid_common}.csv` 33 | * WFLW images: `./blob/data/WFLW/WFLW_images/` 34 | * WFLW annotations (from HRNet): `./blob/data/WFLW/face_landmarks_300w_{train, test, test_{blur, expression, illumination, largepose, makeup, occlusion}}.csv` 35 | 36 | 37 | The tree of `./blob/data` should look like: 38 | 39 | ``` 40 | blob/data/ 41 | │ 42 | ├── LaPa/ 43 | │ ├── test/ 44 | │ ├── train/ 45 | │ └── val/ 46 | │ 47 | ├── CelebAMask-HQ/ 48 | │ ├── CelebA-HQ-img/ 49 | │ ├── CelebAMask-HQ-mask-anno/ 50 | │ ├── list_eval_partition.txt 51 | │ └── CelebA-HQ-to-CelebA-mapping.txt 52 | │ 53 | ├── AFLW-19/ 54 | │ ├── AFLWinfo_release.mat 55 | │ └── data/ 56 | │ └── flickr/ 57 | │ 58 | ├── IBUG300W/ 59 | │ ├── ibug/ 60 | │ ├── afw/ 61 | │ ├── helen/ 62 | │ ├── lfpw/ 63 | │ ├── face_landmarks_300w_train.csv 64 | │ ├── face_landmarks_300w_valid_challenge.csv 65 | │ └── face_landmarks_300w_valid_common.csv 66 | │ 67 | └── WFLW/ 68 | ├── WFLW_images/ 69 | ├── face_landmarks_wflw_test_blur.csv 70 | ├── face_landmarks_wflw_test_expression.csv 71 | ├── face_landmarks_wflw_test_largepose.csv 72 | ├── face_landmarks_wflw_test_occlusion.csv 73 | ├── face_landmarks_wflw_test.csv 74 | ├── face_landmarks_wflw_test_illumination.csv 75 | ├── face_landmarks_wflw_test_makeup.csv 76 | └── face_landmarks_wflw_train.csv 77 | 78 | ``` 79 | 80 | Now let's repack all these datasets into uniform formats for efficient reading. Just run with 81 | 82 | ```bash 83 | python -m farl.datasets.prepare ./blob/data 84 | ``` 85 | 86 | Finally, we should have the following files under `./blob/data`: 87 | 88 | ``` 89 | LaPa.train.zip 90 | LaPa.test.zip 91 | 92 | CelebAMaskHQ.train.zip 93 | CelebAMaskHQ.test.zip 94 | 95 | AFLW-19.train.zip 96 | AFLW-19.test.zip 97 | AFLW-19.test_frontal.zip 98 | 99 | IBUG300W.train.zip 100 | IBUG300W.test_common.zip 101 | IBUG300W.test_challenging.zip 102 | 103 | WFLW.train.zip 104 | WFLW.test_all.zip 105 | WFLW.test_blur.zip 106 | WFLW.test_expression.zip 107 | WFLW.test_illumination.zip 108 | WFLW.test_largepose.zip 109 | WFLW.test_makeup.zip 110 | WFLW.test_occlusion.zip 111 | ``` 112 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please contact [Hao Yang](https://haya.pro) ([haya@microsoft.com](mailto:haya@microsoft.com)). 10 | 11 | ## Microsoft Support Policy 12 | 13 | Support for this project is limited to the resources listed above. 14 | -------------------------------------------------------------------------------- /farl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. -------------------------------------------------------------------------------- /farl/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. -------------------------------------------------------------------------------- /farl/datasets/aflw.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import cv2 7 | import scipy.io 8 | 9 | from blueprint.ml import Dataset, Split 10 | 11 | 12 | class AFLW_19(Dataset): 13 | def __init__(self, root, split=Split.ALL, subset: str = 'full'): 14 | self.images_root = os.path.join(root, 'data', 'flickr') 15 | info = scipy.io.loadmat(os.path.join( 16 | root, 'AFLWinfo_release.mat')) 17 | self.bbox = info['bbox'] # 24386x4 left, right, top bottom 18 | self.data = info['data'] # 24386x38 x1,x2...,xn,y1,y2...,yn 19 | self.mask = info['mask_new'] # 24386x19 20 | self.name_list = [s[0][0] for s in info['nameList']] 21 | 22 | ra = np.reshape(info['ra'].astype(np.int32), [-1])-1 23 | assert ra.min() == 0 24 | assert ra.max() == self.bbox.shape[0] - 1 25 | if split == Split.ALL: 26 | self.indices = ra 27 | elif split == Split.TRAIN: 28 | self.indices = ra[:20000] 29 | elif split == Split.TEST: 30 | if subset == 'full': 31 | self.indices = ra[20000:] 32 | elif subset == 'frontal': 33 | all_visible = np.all(self.mask == 1, axis=1) # 24386 34 | self.indices = np.array( 35 | [ind for ind in ra[20000:] if all_visible[ind]]) 36 | 37 | def __len__(self): 38 | return len(self.indices) 39 | 40 | def __getitem__(self, index): 41 | ind = self.indices[index] 42 | image_path = os.path.join( 43 | self.images_root, self.name_list[ind]) 44 | assert os.path.exists(image_path) 45 | image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB) 46 | landmarks = np.reshape(self.data[ind], [2, 19]).transpose() 47 | 48 | left, right, top, bottom = self.bbox[ind] 49 | box_y1x1y2x2 = np.array([top, left, bottom, right], dtype=np.float32) 50 | 51 | visibility = self.mask[ind] 52 | return { 53 | 'image': image, 54 | 'box': box_y1x1y2x2, 55 | 'landmarks': landmarks, 56 | 'visibility': visibility 57 | } 58 | 59 | def sample_name(self, index): 60 | return str(index) 61 | -------------------------------------------------------------------------------- /farl/datasets/celebamask_hq.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import cv2 7 | import functools 8 | from typing import Dict, List 9 | 10 | from blueprint.ml import Dataset, Split 11 | 12 | 13 | @functools.lru_cache() 14 | def _cached_imread(fname, flags=None): 15 | return cv2.imread(fname, flags=flags) 16 | 17 | 18 | class CelebAMaskHQ(Dataset): 19 | def __init__(self, root, split, label_type='all'): 20 | assert os.path.isdir(root) 21 | self.root = root 22 | self.split = split 23 | self.names = [] 24 | 25 | if split != Split.ALL: 26 | hq_to_orig_mapping = dict() 27 | orig_to_hq_mapping = dict() 28 | mapping_file = os.path.join( 29 | root, 'CelebA-HQ-to-CelebA-mapping.txt') 30 | assert os.path.exists(mapping_file) 31 | for s in open(mapping_file, 'r'): 32 | if '.jpg' not in s: 33 | continue 34 | idx, _, orig_file = s.split() 35 | hq_to_orig_mapping[int(idx)] = orig_file 36 | orig_to_hq_mapping[orig_file] = int(idx) 37 | 38 | # load partition 39 | partition_file = os.path.join(root, 'list_eval_partition.txt') 40 | assert os.path.exists(partition_file) 41 | for s in open(partition_file, 'r'): 42 | if '.jpg' not in s: 43 | continue 44 | orig_file, group = s.split() 45 | group = int(group) 46 | if orig_file not in orig_to_hq_mapping: 47 | continue 48 | hq_id = orig_to_hq_mapping[orig_file] 49 | if split == Split.TRAIN and group == 0: 50 | self.names.append(str(hq_id)) 51 | elif split == Split.VAL and group == 1: 52 | self.names.append(str(hq_id)) 53 | elif split == Split.TEST and group == 2: 54 | self.names.append(str(hq_id)) 55 | elif split == Split.TOY: 56 | self.names.append(str(hq_id)) 57 | if len(self.names) >= 10: 58 | break 59 | else: 60 | self.names = [ 61 | n[:-(len('.jpg'))] 62 | for n in os.listdir(os.path.join(self.root, 'CelebA-HQ-img')) 63 | if n.endswith('.jpg') 64 | ] 65 | 66 | self.label_setting = { 67 | 'human': { 68 | 'suffix': [ 69 | 'neck', 'skin', 'cloth', 'l_ear', 'r_ear', 'l_brow', 'r_brow', 70 | 'l_eye', 'r_eye', 'nose', 'mouth', 'l_lip', 'u_lip', 'hair' 71 | ], 72 | 'names': [ 73 | 'bg', 'neck', 'face', 'cloth', 'rr', 'lr', 'rb', 'lb', 're', 74 | 'le', 'nose', 'imouth', 'llip', 'ulip', 'hair' 75 | ] 76 | }, 77 | 'aux': { 78 | 'suffix': [ 79 | 'eye_g', 'hat', 'ear_r', 'neck_l', 80 | ], 81 | 'names': [ 82 | 'normal', 'glass', 'hat', 'earr', 'neckl' 83 | ] 84 | }, 85 | 'all': { 86 | 'suffix': [ 87 | 'neck', 'skin', 'cloth', 'l_ear', 'r_ear', 'l_brow', 'r_brow', 88 | 'l_eye', 'r_eye', 'nose', 'mouth', 'l_lip', 'u_lip', 'hair', 89 | 'eye_g', 'hat', 'ear_r', 'neck_l', 90 | ], 91 | 'names': [ 92 | 'bg', 'neck', 'face', 'cloth', 'rr', 'lr', 'rb', 'lb', 're', 93 | 'le', 'nose', 'imouth', 'llip', 'ulip', 'hair', 94 | 'glass', 'hat', 'earr', 'neckl' 95 | ] 96 | } 97 | }[label_type] 98 | 99 | def make_label(self, index, ordered_label_suffix): 100 | label = np.zeros((512, 512), np.uint8) 101 | name = self.names[index] 102 | name_id = int(name) 103 | name5 = '%05d' % name_id 104 | p = os.path.join(self.root, 'CelebAMask-HQ-mask-anno', 105 | str(name_id // 2000), name5) 106 | for i, label_suffix in enumerate(ordered_label_suffix): 107 | label_value = i + 1 108 | label_fname = os.path.join(p + '_' + label_suffix + '.png') 109 | if os.path.exists(label_fname): 110 | mask = _cached_imread(label_fname, cv2.IMREAD_GRAYSCALE) 111 | label = np.where(mask > 0, 112 | np.ones_like(label) * label_value, label) 113 | return label 114 | 115 | def __getitem__(self, index): 116 | name = self.names[index] 117 | image = cv2.resize( 118 | cv2.imread(os.path.join(self.root, 'CelebA-HQ-img', 119 | name + '.jpg'))[:, :, ::-1], 120 | (512, 512), 121 | interpolation=cv2.INTER_LINEAR) 122 | 123 | data = {'image': image} 124 | label = self.make_label(index, self.label_setting['suffix']) 125 | data[f'label'] = label 126 | 127 | return data 128 | 129 | def __len__(self): 130 | return len(self.names) 131 | 132 | def sample_name(self, index): 133 | return self.names[index] 134 | 135 | @property 136 | def label_names(self) -> List[str]: 137 | return self.label_setting['names'] 138 | -------------------------------------------------------------------------------- /farl/datasets/ibug300w.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | from typing import Optional 6 | import numpy as np 7 | import cv2 8 | 9 | from blueprint.ml import Dataset, Split 10 | 11 | 12 | class IBUG300W(Dataset): 13 | def __init__(self, root: str, split: Split, subset: Optional[str] = None): 14 | self.root = root 15 | self.anno = [] 16 | 17 | if split == Split.TRAIN: 18 | anno_file = 'face_landmarks_300w_train.csv' 19 | elif split == Split.TEST: 20 | if subset == 'Common': 21 | anno_file = 'face_landmarks_300w_valid_common.csv' 22 | elif subset == 'Challenging': 23 | anno_file = 'face_landmarks_300w_valid_challenge.csv' 24 | else: 25 | raise RuntimeError( 26 | f'Invalid subset {subset} for IBUG300W test set (should be "Common" or "Challenging")') 27 | else: 28 | raise RuntimeError(f'Unsupported split {split} for IBUG300W') 29 | 30 | error_im_paths = { 31 | 'ibug/image_092_01.jpg': 'ibug/image_092 _01.jpg' 32 | } 33 | 34 | self.info_list = [] 35 | with open(os.path.join(self.root, anno_file), 'r') as fd: 36 | fd.readline() # skip the first line 37 | for line in fd: 38 | line = line.strip() 39 | if len(line) == 0: 40 | continue 41 | if line.startswith('#'): 42 | continue 43 | im_path, scale, center_w, center_h, * \ 44 | landmarks = line.split(',') 45 | 46 | if im_path in error_im_paths: 47 | im_path = error_im_paths[im_path] 48 | 49 | sample_name = os.path.splitext(im_path)[0].replace('/', '_') 50 | 51 | im_path = os.path.join(self.root, im_path) 52 | assert os.path.exists(im_path) 53 | 54 | self.info_list.append({ 55 | 'sample_name': sample_name, 56 | 'im_path': im_path, 57 | 'landmarks': np.reshape(np.array([float(v)-2.0 for v in landmarks], dtype=np.float32), [68, 2]), 58 | 'box_info': (float(scale), float(center_w)-2.0, float(center_h)-2.0) 59 | }) 60 | 61 | def __len__(self): 62 | return len(self.info_list) 63 | 64 | def __getitem__(self, index): 65 | info = self.info_list[index] 66 | image = cv2.cvtColor(cv2.imread(info['im_path']), cv2.COLOR_BGR2RGB) 67 | scale, center_w, center_h = info['box_info'] 68 | box_half_size = 100.0 * scale 69 | 70 | return { 71 | 'image': image, 72 | 'box': np.array([center_h-box_half_size, center_w-box_half_size, 73 | center_h+box_half_size, center_w+box_half_size], 74 | dtype=np.float32), 75 | 'landmarks': info['landmarks'] 76 | } 77 | 78 | def sample_name(self, index): 79 | return self.info_list[index]['sample_name'] 80 | -------------------------------------------------------------------------------- /farl/datasets/lapa.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import cv2 7 | 8 | from blueprint.ml import Dataset, Split 9 | 10 | 11 | class LaPa(Dataset): 12 | """LaPa face parsing dataset 13 | 14 | Args: 15 | root (str): The directory that contains subdirs 'image', 'labels' 16 | """ 17 | 18 | def __init__(self, root, split=Split.ALL): 19 | assert os.path.isdir(root) 20 | self.root = root 21 | 22 | subfolders = [] 23 | if split == Split.TRAIN: 24 | subfolders = ['train'] 25 | elif split == Split.VAL: 26 | subfolders = ['val'] 27 | elif split in {Split.TEST, Split.TOY}: 28 | subfolders = ['test'] 29 | elif split == Split.ALL: 30 | subfolders = ['train', 'val', 'test'] 31 | 32 | self.info = [] 33 | for subf in subfolders: 34 | for name in os.listdir(os.path.join(self.root, subf, 'images')): 35 | if not name.endswith('.jpg'): 36 | continue 37 | name = name.split('.')[0] 38 | image_path = os.path.join( 39 | self.root, subf, 'images', f'{name}.jpg') 40 | label_path = os.path.join( 41 | self.root, subf, 'labels', f'{name}.png') 42 | landmark_path = os.path.join( 43 | self.root, subf, 'landmarks', f'{name}.txt') 44 | assert os.path.exists(image_path) 45 | assert os.path.exists(label_path) 46 | assert os.path.exists(landmark_path) 47 | landmarks = [float(v) for v in open( 48 | landmark_path, 'r').read().split()] 49 | assert landmarks[0] == 106 and len(landmarks) == 106*2+1 50 | landmarks = np.reshape( 51 | np.array(landmarks[1:], np.float32), [106, 2]) 52 | sample_name = f'{subf}.{name}' 53 | self.info.append( 54 | {'image_path': image_path, 'label_path': label_path, 55 | 'landmarks': landmarks, 'sample_name': sample_name}) 56 | if split == Split.TOY and len(self.info) >= 10: 57 | break 58 | 59 | def __getitem__(self, index): 60 | info = self.info[index] 61 | image = cv2.imread(info['image_path'])[:, :, ::-1] 62 | label = cv2.imread(info['label_path'], cv2.IMREAD_GRAYSCALE) 63 | landmarks = info['landmarks'] 64 | return {'image': image, 'label': label, 'landmarks': landmarks} 65 | 66 | def __len__(self): 67 | return len(self.info) 68 | 69 | def sample_name(self, index): 70 | return self.info[index]['sample_name'] 71 | 72 | @property 73 | def label_names(self): 74 | return ['background', 'face_lr_rr', 'lb', 'rb', 'le', 're', 'nose', 'ul', 'im', 'll', 'hair'] 75 | 76 | @staticmethod 77 | def draw_landmarks(im, landmarks, color, thickness=5, eye_radius=3): 78 | landmarks = landmarks.astype(np.int32) 79 | cv2.polylines(im, [landmarks[0:33]], False, 80 | color, thickness, cv2.LINE_AA) 81 | cv2.polylines(im, [landmarks[33:42]], True, 82 | color, thickness, cv2.LINE_AA) 83 | cv2.polylines(im, [landmarks[42:51]], True, 84 | color, thickness, cv2.LINE_AA) 85 | cv2.polylines(im, [landmarks[51:55]], False, 86 | color, thickness, cv2.LINE_AA) 87 | cv2.polylines(im, [landmarks[55:66]], False, 88 | color, thickness, cv2.LINE_AA) 89 | cv2.polylines(im, [landmarks[66:74]], True, 90 | color, thickness, cv2.LINE_AA) 91 | cv2.circle(im, (landmarks[74, 0], landmarks[74, 1]), 92 | eye_radius, color, thickness, cv2.LINE_AA) 93 | cv2.polylines(im, [landmarks[75:83]], True, 94 | color, thickness, cv2.LINE_AA) 95 | cv2.circle(im, (landmarks[83, 0], landmarks[83, 1]), 96 | eye_radius, color, thickness, cv2.LINE_AA) 97 | cv2.polylines(im, [landmarks[84:96]], True, 98 | color, thickness, cv2.LINE_AA) 99 | cv2.polylines(im, [landmarks[96:-2]], True, 100 | color, thickness, cv2.LINE_AA) 101 | return im 102 | -------------------------------------------------------------------------------- /farl/datasets/prepare.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import argparse 7 | 8 | from blueprint.ml import TRAIN, TEST, freeze, storage 9 | from blueprint.ml import augmenters as aug 10 | 11 | from .lapa import LaPa 12 | from .celebamask_hq import CelebAMaskHQ 13 | from .aflw import AFLW_19 14 | from .ibug300w import IBUG300W 15 | from .wflw import WFLW 16 | 17 | 18 | def freeze_lapa(data_root): 19 | print('Freezing LaPa ...') 20 | ds_train = LaPa(os.path.join(data_root, 'LaPa'), TRAIN) 21 | ds_test = LaPa(os.path.join(data_root, 'LaPa'), TEST) 22 | print(f'train: {len(ds_train)}, test: {len(ds_test)}') 23 | 24 | aug_512 = [ 25 | aug.With('landmarks', 'face_align_pts', 26 | lambda landmarks:landmarks[[104, 105, 54, 84, 90], :]), 27 | aug.With('face_align_pts', 'align_matrix', 28 | aug.GetFaceAlignMatrix(target_shape=(512, 512))), 29 | aug.Filter(['image', 'label', 'align_matrix']) 30 | ] 31 | 32 | ds_train_aug = ds_train.augment(aug_512) 33 | ds_test_aug = ds_test.augment(aug_512) 34 | 35 | freeze(ds_test_aug, os.path.join(data_root, 'LaPa.test.zip'), 36 | {'image': storage.IMAGE_JPG, 37 | 'label': storage.IMAGE_LABEL}, with_prog=True) 38 | freeze(ds_train_aug, os.path.join(data_root, 'LaPa.train.zip'), 39 | {'image': storage.IMAGE_JPG, 40 | 'label': storage.IMAGE_LABEL}, with_prog=True) 41 | 42 | 43 | def freeze_celebamaskhq(data_root): 44 | print('Freezing CelebAMaskHQ ...') 45 | ds_train = CelebAMaskHQ(os.path.join(data_root, 'CelebAMask-HQ'), TRAIN) 46 | ds_test = CelebAMaskHQ(os.path.join(data_root, 'CelebAMask-HQ'), TEST) 47 | print(f'train: {len(ds_train)}, test: {len(ds_test)}') 48 | 49 | aug_512 = [ 50 | aug.AttachConstData('align_matrix', np.eye(3, dtype=np.float32)), 51 | aug.Filter(['image', 'label', 'align_matrix']) 52 | ] 53 | 54 | ds_train_aug = ds_train.augment(aug_512) 55 | ds_test_aug = ds_test.augment(aug_512) 56 | 57 | freeze(ds_test_aug, os.path.join(data_root, 'CelebAMaskHQ.test.zip'), 58 | {'image': storage.IMAGE_JPG, 59 | 'label': storage.IMAGE_LABEL}, with_prog=True) 60 | freeze(ds_train_aug, os.path.join(data_root, 'CelebAMaskHQ.train.zip'), 61 | {'image': storage.IMAGE_JPG, 62 | 'label': storage.IMAGE_LABEL}, with_prog=True) 63 | 64 | 65 | def freeze_aflw19(data_root): 66 | print('Freezing AFLW19 ...') 67 | ds_train = AFLW_19(os.path.join(data_root, 'AFLW-19'), split=TRAIN) 68 | ds_test = AFLW_19(os.path.join(data_root, 'AFLW-19'), split=TEST) 69 | ds_test_frontal = AFLW_19(os.path.join( 70 | data_root, 'AFLW-19'), split=TEST, subset='frontal') 71 | 72 | print(f'train: {len(ds_train)}, test: {len(ds_test)}, ' 73 | f'test_frontal: {len(ds_test_frontal)}') 74 | 75 | aug_512 = [ 76 | aug.With(('box', None), 'crop_matrix', aug.UpdateCropAndResizeMatrix( 77 | (512, 512), align_corners=False)), 78 | aug.Filter(['image', 'label', 'landmarks', 'crop_matrix', 'box']) 79 | ] 80 | 81 | ds_train_aug = ds_train.augment(aug_512) 82 | ds_test_aug = ds_test.augment(aug_512) 83 | ds_test_frontal_aug = ds_test_frontal.augment(aug_512) 84 | 85 | freeze(ds_test_aug, os.path.join(data_root, 'AFLW-19.test.zip'), 86 | {'image': storage.IMAGE_JPG}, with_prog=True) 87 | freeze(ds_train_aug, os.path.join(data_root, 'AFLW-19.train.zip'), 88 | {'image': storage.IMAGE_JPG}, with_prog=True) 89 | freeze(ds_test_frontal_aug, os.path.join(data_root, 'AFLW-19.test_frontal.zip'), 90 | {'image': storage.IMAGE_JPG}, with_prog=True) 91 | 92 | 93 | def freeze_ibug300w(data_root): 94 | print('Freezing IBUG300W ...') 95 | ds_train = IBUG300W(os.path.join(data_root, 'IBUG300W'), split=TRAIN) 96 | ds_test_common = IBUG300W(os.path.join( 97 | data_root, 'IBUG300W'), split=TEST, subset='Common') 98 | ds_test_challenging = IBUG300W(os.path.join( 99 | data_root, 'IBUG300W'), split=TEST, subset='Challenging') 100 | 101 | print(f'train: {len(ds_train)}, test_common: {len(ds_test_common)}, ' 102 | f'test_challenging: {len(ds_test_challenging)}') 103 | 104 | aug_512 = [ 105 | aug.With(('box', None), 'crop_matrix', aug.UpdateCropAndResizeMatrix( 106 | (512, 512), align_corners=False)), 107 | aug.Filter(['image', 'label', 'landmarks', 'crop_matrix']) 108 | ] 109 | 110 | ds_train_aug = ds_train.augment(aug_512) 111 | ds_test_common_aug = ds_test_common.augment(aug_512) 112 | ds_test_challenging_aug = ds_test_challenging.augment(aug_512) 113 | 114 | freeze(ds_test_common_aug, os.path.join(data_root, 'IBUG300W.test_common.zip'), 115 | {'image': storage.IMAGE_JPG}, with_prog=True) 116 | freeze(ds_test_challenging_aug, os.path.join(data_root, 'IBUG300W.test_challenging.zip'), 117 | {'image': storage.IMAGE_JPG}, with_prog=True) 118 | freeze(ds_train_aug, os.path.join(data_root, 'IBUG300W.train.zip'), 119 | {'image': storage.IMAGE_JPG}, with_prog=True) 120 | 121 | 122 | def freeze_wflw(data_root): 123 | print('Freezing WFLW ...') 124 | ds_train = WFLW(os.path.join(data_root, 'WFLW'), split=TRAIN) 125 | print(f'train: {len(ds_train)}') 126 | ds_tests = dict() 127 | for subset in ['all', 'blur', 'expression', 'illumination', 'largepose', 'makeup', 'occlusion']: 128 | ds_tests[subset] = WFLW(os.path.join( 129 | data_root, 'WFLW'), split=TEST, subset=subset) 130 | print(f'test_{subset}: {len(ds_tests[subset])}') 131 | 132 | aug_512 = [ 133 | aug.With(('box', None), 'crop_matrix', aug.UpdateCropAndResizeMatrix( 134 | (512, 512), align_corners=False)), 135 | aug.Filter(['image', 'label', 'landmarks', 'crop_matrix']) 136 | ] 137 | 138 | ds_train_aug = ds_train.augment(aug_512) 139 | ds_tests_aug = {subset: ds_test.augment( 140 | aug_512) for subset, ds_test in ds_tests.items()} 141 | 142 | freeze(ds_train_aug, os.path.join(data_root, 'WFLW.train.zip'), 143 | {'image': storage.IMAGE_JPG}, with_prog=True) 144 | for subset, ds_test_aug in ds_tests_aug.items(): 145 | freeze(ds_test_aug, os.path.join(data_root, f'WFLW.test_{subset}.zip'), 146 | {'image': storage.IMAGE_JPG}, with_prog=True) 147 | 148 | 149 | if __name__ == '__main__': 150 | parser = argparse.ArgumentParser() 151 | parser.add_argument('data_root', type=str) 152 | parser.add_argument('--dataset', default='all', type=str) 153 | 154 | args = parser.parse_args() 155 | 156 | if args.dataset in {'all', 'lapa'}: 157 | freeze_lapa(args.data_root) 158 | if args.dataset in {'all', 'celebamaskhq'}: 159 | freeze_celebamaskhq(args.data_root) 160 | if args.dataset in {'all', 'aflw19'}: 161 | freeze_aflw19(args.data_root) 162 | if args.dataset in {'all', 'ibug300w'}: 163 | freeze_ibug300w(args.data_root) 164 | if args.dataset in {'all', 'wflw'}: 165 | freeze_wflw(args.data_root) 166 | -------------------------------------------------------------------------------- /farl/datasets/wflw.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import numpy as np 6 | import cv2 7 | 8 | from blueprint.ml import Dataset, Split 9 | 10 | 11 | class WFLW(Dataset): 12 | def __init__(self, root, split=Split.ALL, subset='all'): 13 | self.root = root 14 | 15 | anno_file = None 16 | if split == Split.TRAIN: 17 | anno_file = 'face_landmarks_wflw_train.csv' 18 | elif split == Split.TEST: 19 | if subset == 'all': 20 | anno_file = 'face_landmarks_wflw_test.csv' 21 | else: 22 | anno_file = f'face_landmarks_wflw_test_{subset}.csv' 23 | 24 | self.info_list = [] 25 | with open(os.path.join(self.root, anno_file), 'r') as fd: 26 | fd.readline() # skip the first line 27 | for line in fd: 28 | line = line.strip() 29 | if len(line) == 0: 30 | continue 31 | if line.startswith('#'): 32 | continue 33 | im_path, scale, center_w, center_h, * \ 34 | landmarks = line.split(',') 35 | 36 | landmarks = np.reshape( 37 | np.array([float(v) for v in landmarks], dtype=np.float32), [98, 2]) 38 | cx, cy = np.mean(landmarks, axis=0) 39 | 40 | sample_name = os.path.splitext(im_path)[0].replace( 41 | '/', '.') + ('_%.3f_%.3f' % (cx, cy)) 42 | im_path = os.path.join(self.root, 'WFLW_images', im_path) 43 | 44 | assert os.path.exists(im_path) 45 | 46 | self.info_list.append({ 47 | 'sample_name': sample_name, 48 | 'im_path': im_path, 49 | 'landmarks': landmarks, 50 | 'box_info': (float(scale), float(center_w), float(center_h)) 51 | }) 52 | 53 | def __len__(self): 54 | return len(self.info_list) 55 | 56 | def __getitem__(self, index): 57 | info = self.info_list[index] 58 | image = cv2.cvtColor(cv2.imread(info['im_path']), cv2.COLOR_BGR2RGB) 59 | scale, center_w, center_h = info['box_info'] 60 | box_half_size = 100.0 * scale 61 | 62 | return { 63 | 'image': image, 64 | 'box': np.array([center_h-box_half_size, center_w-box_half_size, 65 | center_h+box_half_size, center_w+box_half_size], 66 | dtype=np.float32), 67 | 'landmarks': info['landmarks'] 68 | } 69 | 70 | def sample_name(self, index): 71 | return self.info_list[index]['sample_name'] 72 | -------------------------------------------------------------------------------- /farl/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | -------------------------------------------------------------------------------- /farl/experiments/face_alignment/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from blueprint.ml.augmenters import ( 8 | With, Maybe, FlipImage, UpdateTransformMatrix, 9 | UpdateRandomTransformMatrix, FlipLROrderedPoints, 10 | GetTransformMap, GetInvertedTransformMap, 11 | GetShape, TransformByMap, ArgMax, 12 | MakeNonStackable, UnwrapNonStackable, 13 | AttachConstData, FullLike, Filter, 14 | RandomOcclusion, NoiseFusion, 15 | RandomGray, RandomGamma, RandomBlur, 16 | Normalize255, TransformImagePerspective, 17 | TransformPoints2D, TransformPoints2DInverted, 18 | DetectFace, UpdateCropAndResizeMatrix) 19 | 20 | from ...network import FaRLVisualFeatures 21 | from .network import FaceAlignmentTransformer 22 | from .task import FaceAlignment 23 | from .scorer import (NME, NormalizeByLandmarks, 24 | NormalizeByBox, NormalizeByBoxDiag, AUC_FR) 25 | -------------------------------------------------------------------------------- /farl/experiments/face_alignment/augmenters/aflw19_test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | - class: With 5 | tags_str: image -> image 6 | aug: 7 | class: Normalize255 8 | # update crop_matrix 9 | - class: AttachConstData 10 | tag_name: shape 11 | const_data: [512, 512] 12 | - class: With 13 | tags_str: shape, crop_matrix -> shape, crop_matrix 14 | aug: 15 | class: UpdateTransformMatrix 16 | target_shape: [512, 512] 17 | scale_mu: $$scale_mu 18 | ret_shape: true 19 | # warp image & label using the crop_matrix 20 | - class: With 21 | tags_str: crop_matrix -> transform_map 22 | aug: 23 | class: GetTransformMap 24 | warped_shape: [512, 512] 25 | warp_factor: $$warp_factor 26 | - class: With 27 | tags_str: image, transform_map -> warped_image 28 | aug: 29 | class: TransformByMap 30 | interpolation: bilinear 31 | # make some original data non-stackable 32 | - class: With 33 | tags_str: image, landmarks, crop_matrix, box, sample_name -> original_data 34 | aug: 35 | class: MakeNonStackable 36 | - class: Filter 37 | tags: [warped_image, original_data] -------------------------------------------------------------------------------- /farl/experiments/face_alignment/augmenters/aflw19_test_post.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # +valid: bool 5 | # +pred_warped_landmarks: npoints x 2 6 | - class: With 7 | tags_str: original_data -> image, landmarks, crop_matrix, box, sample_name 8 | aug: 9 | class: UnwrapNonStackable 10 | - class: With 11 | tags_str: pred_warped_landmarks, crop_matrix -> pred_landmarks 12 | aug: 13 | - class: TransformPoints2DInverted 14 | warped_shape: [512, 512] 15 | warp_factor: $$warp_factor 16 | - class: Filter 17 | tags: [landmarks, pred_landmarks, box, sample_name, valid] -------------------------------------------------------------------------------- /farl/experiments/face_alignment/augmenters/test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | - class: With 5 | tags_str: image -> image 6 | aug: 7 | class: Normalize255 8 | # update crop_matrix 9 | - class: AttachConstData 10 | tag_name: shape 11 | const_data: [512, 512] 12 | - class: With 13 | tags_str: shape, crop_matrix -> shape, crop_matrix 14 | aug: 15 | class: UpdateTransformMatrix 16 | target_shape: [512, 512] 17 | scale_mu: $$scale_mu 18 | ret_shape: true 19 | # warp image & label using the crop_matrix 20 | - class: With 21 | tags_str: crop_matrix -> transform_map 22 | aug: 23 | class: GetTransformMap 24 | warped_shape: [512, 512] 25 | warp_factor: $$warp_factor 26 | - class: With 27 | tags_str: image, transform_map -> warped_image 28 | aug: 29 | class: TransformByMap 30 | interpolation: bilinear 31 | # make some original data non-stackable 32 | - class: With 33 | tags_str: image, landmarks, crop_matrix, sample_name -> original_data 34 | aug: 35 | class: MakeNonStackable 36 | - class: Filter 37 | tags: [warped_image, original_data] -------------------------------------------------------------------------------- /farl/experiments/face_alignment/augmenters/test_post.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # +valid: bool 5 | # +pred_warped_landmarks: npoints x 2 6 | - class: With 7 | tags_str: original_data -> image, landmarks, crop_matrix, sample_name 8 | aug: 9 | class: UnwrapNonStackable 10 | - class: With 11 | tags_str: pred_warped_landmarks, crop_matrix -> pred_landmarks 12 | aug: 13 | - class: TransformPoints2DInverted 14 | warped_shape: [512, 512] 15 | warp_factor: $$warp_factor 16 | - class: Filter 17 | tags: [landmarks, pred_landmarks, sample_name, valid] -------------------------------------------------------------------------------- /farl/experiments/face_alignment/augmenters/train.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | - class: With 5 | tags_str: image -> image 6 | aug: 7 | class: Normalize255 8 | # update crop_matrix 9 | - class: AttachConstData 10 | tag_name: shape 11 | const_data: [512, 512] 12 | - class: With 13 | tags_str: shape, crop_matrix -> shape, crop_matrix 14 | aug: 15 | class: UpdateRandomTransformMatrix 16 | target_shape: [512, 512] 17 | shift_sigma: $$shift_sigma 18 | rot_sigma: $$rot_sigma 19 | scale_sigma: $$scale_sigma 20 | scale_mu: $$scale_mu 21 | rot_normal: false 22 | scale_normal: false 23 | shift_normal: false 24 | ret_shape: true 25 | # warp image & label using the crop_matrix 26 | - class: With 27 | tags_str: crop_matrix -> transform_map 28 | aug: 29 | class: GetTransformMap 30 | warped_shape: [512, 512] 31 | warp_factor: $$warp_factor 32 | - class: With 33 | tags_str: image, transform_map -> warped_image 34 | aug: 35 | class: TransformByMap 36 | interpolation: bilinear 37 | - class: With 38 | tags_str: landmarks, crop_matrix -> warped_landmarks 39 | aug: 40 | class: TransformPoints2D 41 | warped_shape: [512, 512] 42 | warp_factor: $$warp_factor 43 | # color augmentation on image 44 | - class: With 45 | tags_str: warped_image -> warped_image 46 | aug: 47 | - class: RandomOcclusion 48 | - class: Maybe 49 | prob: 0.5 50 | then_branch: 51 | class: NoiseFusion 52 | - class: RandomGray 53 | - class: RandomGamma 54 | - class: RandomBlur 55 | - class: Filter 56 | tags: [warped_image, warped_landmarks] -------------------------------------------------------------------------------- /farl/experiments/face_alignment/eval_data/aflw19_test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: blueprint.ml.DataProcessor 5 | data_src: 6 | class: blueprint.ml.DataSource 7 | data_path: $BLOB('data/AFLW-19.{subset}.zip'.format(subset=ARGS['subset'])) # image, crop_matrix, landmarks, box 8 | subset_ids: $ARGS.get('subset_ids', None) 9 | augmentations: $PARSE('../augmenters/aflw19_test.yaml', scale_mu=ARGS['scale_mu'], warp_factor=ARGS['warp_factor']) 10 | post_augmentations: $PARSE('../augmenters/aflw19_test_post.yaml', warp_factor=ARGS['warp_factor']) 11 | batch_size: $$batch_size 12 | randomize: false -------------------------------------------------------------------------------- /farl/experiments/face_alignment/eval_data/ibug300w_test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: blueprint.ml.DataProcessor 5 | data_src: 6 | class: blueprint.ml.DataSource 7 | data_path: $BLOB('data/IBUG300W.test_{subset}.zip'.format(subset=ARGS['subset'])) # image, crop_matrix, landmarks 8 | subset_ids: $ARGS.get('subset_ids', None) 9 | augmentations: $PARSE('../augmenters/test.yaml', scale_mu=ARGS['scale_mu'], warp_factor=ARGS['warp_factor']) 10 | post_augmentations: $PARSE('../augmenters/test_post.yaml', warp_factor=ARGS['warp_factor']) 11 | batch_size: $$batch_size 12 | randomize: false -------------------------------------------------------------------------------- /farl/experiments/face_alignment/eval_data/wflw_test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: blueprint.ml.DataProcessor 5 | data_src: 6 | class: blueprint.ml.DataSource 7 | data_path: $BLOB('data/WFLW.test_{subset}.zip'.format(subset=ARGS['subset'])) # image, crop_matrix, landmarks 8 | subset_ids: $ARGS.get('subset_ids', None) 9 | augmentations: $PARSE('../augmenters/test.yaml', scale_mu=ARGS['scale_mu'], warp_factor=ARGS['warp_factor']) 10 | post_augmentations: $PARSE('../augmenters/test_post.yaml', warp_factor=ARGS['warp_factor']) 11 | batch_size: $$batch_size 12 | randomize: false -------------------------------------------------------------------------------- /farl/experiments/face_alignment/network.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import List, Optional 5 | import torch.nn as nn 6 | from ...network import Activation, heatmap2points 7 | 8 | 9 | class FaceAlignmentTransformer(nn.Module): 10 | """Face alignment transformer. 11 | 12 | Args: 13 | image (torch.Tensor): Float32 tensor with shape [b, 3, h, w], normalized to [0, 1]. 14 | 15 | Returns: 16 | landmark (torch.Tensor): Float32 tensor with shape [b, npoints, 2], coordinates normalized to [0, 1]. 17 | aux_outputs: 18 | heatmap (torch.Tensor): Float32 tensor with shape [b, npoints, S, S] 19 | """ 20 | 21 | def __init__(self, backbone: nn.Module, heatmap_head: nn.Module, 22 | heatmap_act: Optional[str] = 'relu'): 23 | super().__init__() 24 | self.backbone = backbone 25 | self.heatmap_head = heatmap_head 26 | self.heatmap_act = Activation(heatmap_act) 27 | self.cuda().float() 28 | 29 | def forward(self, image): 30 | features, _ = self.backbone(image) 31 | heatmap = self.heatmap_head(features) # b x npoints x s x s 32 | heatmap_acted = self.heatmap_act(heatmap) 33 | landmark = heatmap2points(heatmap_acted) # b x npoints x 2 34 | return landmark, {'heatmap': heatmap, 'heatmap_acted': heatmap_acted} 35 | -------------------------------------------------------------------------------- /farl/experiments/face_alignment/networks/farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: model_type, model_path, input_resolution, head_channel, num_landmarks 5 | 6 | class: FaceAlignmentTransformer 7 | backbone: 8 | class: farl.network.FaRLVisualFeatures 9 | model_type: $$model_type 10 | model_path: $$model_path 11 | output_indices: $ARGS.get('output_indices', None) 12 | forced_input_resolution: $$input_resolution 13 | heatmap_head: 14 | class: farl.network.MMSEG_UPerHead 15 | in_channels: $[FaRLVisualFeatures.get_output_channel(ARGS['model_type'])]*4 16 | channels: $$head_channel 17 | num_classes: $$num_landmarks 18 | heatmap_act: sigmoid -------------------------------------------------------------------------------- /farl/experiments/face_alignment/optimizers/freeze_backbone.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: [lr_factor] 5 | 6 | optimizer_type: torch.optim.AdamW 7 | optimizer_args: 8 | lr: $0.001 * ARGS.get('lr_factor', 1.0) 9 | betas: [0.9, 0.999] 10 | weight_decay: 0.00001 11 | lr_scheduler_type: torch.optim.lr_scheduler.MultiStepLR 12 | lr_scheduler_args: 13 | milestones: [200] 14 | gamma: 0.1 15 | lr_scheduler_call: epochwise 16 | network_settings: 17 | - params: main.backbone.fpns 18 | lr: $0.01 * ARGS.get('lr_factor', 1.0) 19 | - params: main.heatmap_head 20 | lr: $0.01 * ARGS.get('lr_factor', 1.0) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/optimizers/refine_backbone.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: [lr_factor] 5 | 6 | optimizer_type: torch.optim.AdamW 7 | optimizer_args: 8 | lr: $0.001 * ARGS.get('lr_factor', 1.0) 9 | betas: [0.9, 0.999] 10 | weight_decay: 0.00001 11 | lr_scheduler_type: torch.optim.lr_scheduler.MultiStepLR 12 | lr_scheduler_args: 13 | milestones: [200] 14 | gamma: 0.1 15 | lr_scheduler_call: epochwise 16 | network_settings: 17 | - params: main.backbone.visual 18 | lr: $0.0001 * ARGS.get('lr_factor', 1.0) 19 | - params: main.backbone.fpns 20 | lr: $0.01 * ARGS.get('lr_factor', 1.0) 21 | - params: main.heatmap_head 22 | lr: $0.01 * ARGS.get('lr_factor', 1.0) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/scorer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Mapping, List, Union, Optional 5 | from collections import defaultdict 6 | 7 | import math 8 | 9 | import numpy as np 10 | from scipy.integrate import simps 11 | import torch 12 | import torch.distributed as dist 13 | 14 | from blueprint.ml import Scorer, all_gather_by_part 15 | 16 | 17 | class NormalizeInfo: 18 | def get_unit_dist(self, data) -> float: 19 | raise NotImplementedError() 20 | 21 | 22 | class NormalizeByLandmarks(NormalizeInfo): 23 | def __init__(self, landmark_tag: str, left_id: Union[int, List[int]], right_id: Union[int, List[int]]): 24 | self.landmark_tag = landmark_tag 25 | if isinstance(left_id, int): 26 | left_id = [left_id] 27 | if isinstance(right_id, int): 28 | right_id = [right_id] 29 | self.left_id, self.right_id = left_id, right_id 30 | 31 | def get_unit_dist(self, data) -> float: 32 | landmark = data[self.landmark_tag] 33 | unit_dist = np.linalg.norm(landmark[self.left_id, :].mean(0) - 34 | landmark[self.right_id, :].mean(0), axis=-1) 35 | return unit_dist 36 | 37 | 38 | class NormalizeByBox(NormalizeInfo): 39 | def __init__(self, box_tag: str): 40 | self.box_tag = box_tag 41 | 42 | def get_unit_dist(self, data) -> float: 43 | y1, x1, y2, x2 = data[self.box_tag] 44 | h = y2 - y1 45 | w = x2 - x1 46 | return math.sqrt(h * w) 47 | 48 | 49 | class NormalizeByBoxDiag(NormalizeInfo): 50 | def __init__(self, box_tag: str): 51 | self.box_tag = box_tag 52 | 53 | def get_unit_dist(self, data) -> float: 54 | y1, x1, y2, x2 = data[self.box_tag] 55 | h = y2 - y1 56 | w = x2 - x1 57 | diag = math.sqrt(w * w + h * h) 58 | return diag 59 | 60 | 61 | class NME(Scorer): 62 | """Compute Normalized Mean Error among 2D landmarks and predicted 2D landmarks. 63 | 64 | Attributes: 65 | normalize_infos: Mapping[str, NormalizeInfo]: 66 | Information to normalize for NME calculation. 67 | """ 68 | 69 | def __init__(self, landmark_tag: str, pred_landmark_tag: str, 70 | normalize_infos: Mapping[str, NormalizeInfo]) -> None: 71 | self.landmark_tag = landmark_tag 72 | self.pred_landmark_tag = pred_landmark_tag 73 | self.normalize_infos = normalize_infos 74 | 75 | def init_evaluation(self): 76 | self.nmes_sum = defaultdict(float) # norm_name: str -> float 77 | self.count = defaultdict(int) # norm_name: str -> int 78 | 79 | def evaluate(self, data: Mapping[str, np.ndarray]): 80 | landmark = data[self.landmark_tag] 81 | pred_landmark = data[self.pred_landmark_tag] 82 | 83 | if landmark.shape != pred_landmark.shape: 84 | raise RuntimeError( 85 | f'The landmark shape {landmark.shape} mismatches ' 86 | f'the pred_landmark shape {pred_landmark.shape}') 87 | 88 | for norm_name, norm_info in self.normalize_infos.items(): 89 | # compute unit distance for nme normalization 90 | unit_dist = norm_info.get_unit_dist(data) 91 | 92 | # compute normalized nme for this sample 93 | # [npoints] -> scalar 94 | nme = (np.linalg.norm( 95 | landmark - pred_landmark, axis=-1) / unit_dist).mean() 96 | self.nmes_sum[norm_name] += nme 97 | 98 | self.count[norm_name] += 1 99 | 100 | def finalize_evaluation(self) -> Mapping[str, float]: 101 | # gather all nmes_sum 102 | names_array: List[str] = list(self.nmes_sum.keys()) 103 | 104 | nmes_sum = torch.tensor( 105 | [self.nmes_sum[name] for name in names_array], 106 | dtype=torch.float32, device='cuda') 107 | if dist.is_initialized(): 108 | dist.all_reduce(nmes_sum) 109 | 110 | count_sum = torch.tensor( 111 | [self.count[name] for name in names_array], 112 | dtype=torch.int64, device='cuda') 113 | if dist.is_initialized(): 114 | dist.all_reduce(count_sum) 115 | 116 | scores = dict() 117 | 118 | # compute nme scores 119 | for name, nmes_sum_val, count_val in zip(names_array, nmes_sum, count_sum): 120 | scores[name] = nmes_sum_val.item() / count_val.item() 121 | 122 | # compute final nme 123 | return scores 124 | 125 | 126 | class AUC_FR(Scorer): 127 | """Compute AUC and FR (Failure Rate). 128 | 129 | Output scores with name `'auc_{suffix_name}'` and `'fr_{suffix_name}'`. 130 | """ 131 | 132 | def __init__(self, landmark_tag: str, pred_landmark_tag: str, 133 | normalize_info: NormalizeInfo, 134 | threshold: float, suffix_name: str, step: float = 0.0001, 135 | gather_part_size: Optional[int] = 5) -> None: 136 | self.landmark_tag = landmark_tag 137 | self.pred_landmark_tag = pred_landmark_tag 138 | self.normalize_info = normalize_info 139 | self.threshold = threshold 140 | self.suffix_name = suffix_name 141 | self.step = step 142 | self.gather_part_size = gather_part_size 143 | 144 | def init_evaluation(self): 145 | self.nmes = [] 146 | 147 | def evaluate(self, data: Mapping[str, np.ndarray]): 148 | landmark = data[self.landmark_tag] 149 | pred_landmark = data[self.pred_landmark_tag] 150 | 151 | if landmark.shape != pred_landmark.shape: 152 | raise RuntimeError( 153 | f'The landmark shape {landmark.shape} mismatches ' 154 | f'the pred_landmark shape {pred_landmark.shape}') 155 | 156 | # compute unit distance for nme normalization 157 | unit_dist = self.normalize_info.get_unit_dist(data) 158 | 159 | # compute normalized nme for this sample 160 | nme = (np.linalg.norm( 161 | landmark - pred_landmark, axis=-1) / unit_dist).mean() 162 | self.nmes.append(nme) 163 | 164 | def finalize_evaluation(self) -> Mapping[str, float]: 165 | # gather all nmes 166 | 167 | if dist.is_initialized(): 168 | nmes = all_gather_by_part(self.nmes, self.gather_part_size) 169 | else: 170 | nmes = self.nmes 171 | nmes = torch.tensor(nmes) 172 | 173 | nmes = nmes.sort(dim=0).values.cpu().numpy() 174 | 175 | # from https://github.com/HRNet/HRNet-Facial-Landmark-Detection/issues/6#issuecomment-503898737 176 | count = len(nmes) 177 | xaxis = list(np.arange(0., self.threshold + self.step, self.step)) 178 | ced = [float(np.count_nonzero([nmes <= x])) / count for x in xaxis] 179 | auc = simps(ced, x=xaxis) / self.threshold 180 | fr = 1. - ced[-1] 181 | 182 | return {f'auc_{self.suffix_name}': auc, f'fr_{self.suffix_name}': fr} 183 | -------------------------------------------------------------------------------- /farl/experiments/face_alignment/scorers/aflw19.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: blueprint.ml.MultipleScorers 5 | scorers: 6 | - class: NME 7 | landmark_tag: landmarks 8 | pred_landmark_tag: pred_landmarks 9 | normalize_infos: 10 | inter_ocular: 11 | class: NormalizeByLandmarks 12 | landmark_tag: landmarks 13 | left_id: 6 14 | right_id: 11 15 | inter_pupil: 16 | class: NormalizeByLandmarks 17 | landmark_tag: landmarks 18 | left_id: 7 19 | right_id: 10 20 | box: 21 | class: NormalizeByBox 22 | box_tag: box 23 | diag: 24 | class: NormalizeByBoxDiag 25 | box_tag: box 26 | - class: AUC_FR 27 | landmark_tag: landmarks 28 | pred_landmark_tag: pred_landmarks 29 | normalize_info: 30 | class: NormalizeByBox 31 | box_tag: box 32 | threshold: 0.07 33 | suffix_name: box_7 -------------------------------------------------------------------------------- /farl/experiments/face_alignment/scorers/ibug300w.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: NME 5 | landmark_tag: landmarks 6 | pred_landmark_tag: pred_landmarks 7 | normalize_infos: 8 | inter_ocular: 9 | class: NormalizeByLandmarks 10 | landmark_tag: landmarks 11 | left_id: 36 12 | right_id: 45 13 | inter_pupil: 14 | class: NormalizeByLandmarks 15 | landmark_tag: landmarks 16 | left_id: [36, 37, 38, 39, 40, 41] 17 | right_id: [42, 43, 44, 45, 46, 47] -------------------------------------------------------------------------------- /farl/experiments/face_alignment/scorers/wflw.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: blueprint.ml.MultipleScorers 5 | scorers: 6 | - class: NME 7 | landmark_tag: landmarks 8 | pred_landmark_tag: pred_landmarks 9 | normalize_infos: 10 | inter_ocular: 11 | class: NormalizeByLandmarks 12 | landmark_tag: landmarks 13 | left_id: 60 14 | right_id: 72 15 | inter_pupil: 16 | class: NormalizeByLandmarks 17 | landmark_tag: landmarks 18 | left_id: 96 19 | right_id: 97 20 | 21 | - class: AUC_FR 22 | landmark_tag: landmarks 23 | pred_landmark_tag: pred_landmarks 24 | normalize_info: 25 | class: NormalizeByLandmarks 26 | landmark_tag: landmarks 27 | left_id: 60 28 | right_id: 72 29 | threshold: 0.1 30 | suffix_name: inter_ocular_10 31 | -------------------------------------------------------------------------------- /farl/experiments/face_alignment/task.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Mapping, Any, Tuple, Optional 5 | 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from blueprint.ml import Task, ForwardFlags 11 | 12 | from ...network import normalize_points, denormalize_points, points2heatmap 13 | from ...network.viz import visualize_in_row 14 | 15 | 16 | class FaceAlignment(Task): 17 | """ Face alignment tasks. 18 | """ 19 | 20 | def __init__(self, 21 | network_name: str = 'main', 22 | network_name_eval: Optional[str] = None, 23 | image_tag: str = 'image', landmark_tag: str = 'landmark', 24 | pred_landmark_tag: str = 'pred_landmark', 25 | loss_weights: Mapping[str, float] = {'coord_l1_loss': 1.0}, 26 | heatmap_size: int = 128, 27 | heatmap_radius: float = 16.0, 28 | heatmap_interpolate_mode: str = 'bilinear') -> None: 29 | 30 | super().__init__() 31 | self.network_name = network_name 32 | self.network_name_eval = network_name_eval 33 | if self.network_name_eval is None: 34 | self.network_name_eval = self.network_name 35 | self.image_tag = image_tag 36 | self.landmark_tag = landmark_tag 37 | self.pred_landmark_tag = pred_landmark_tag 38 | self.loss_weights = loss_weights 39 | self.heatmap_size = heatmap_size 40 | self.heatmap_radius = heatmap_radius 41 | self.heatmap_interpolate_mode = heatmap_interpolate_mode 42 | 43 | def setup_networks(self, networks: Mapping[str, nn.Module]): 44 | self.regression_net = networks[self.network_name] 45 | self.regression_net_eval = networks[self.network_name_eval] 46 | 47 | def forward(self, data: Mapping[str, torch.Tensor], flags: ForwardFlags 48 | ) -> Tuple[ 49 | Optional[torch.Tensor], 50 | Mapping[str, torch.Tensor], 51 | Mapping[str, torch.Tensor], 52 | Mapping[str, torch.Tensor]]: 53 | # b x c x h x w 54 | image = data[self.image_tag].cuda().permute(0, 3, 1, 2).contiguous() 55 | _, _, h, w = image.shape 56 | 57 | # b x n x 2 58 | if self.training: 59 | net = self.regression_net 60 | else: 61 | net = self.regression_net_eval 62 | pred_landmark, aux_outputs = net(image) 63 | 64 | cache = dict() 65 | if flags.with_losses: 66 | landmark = normalize_points( 67 | data[self.landmark_tag].to(image), h, w) 68 | 69 | # compute all losses 70 | def _compute_named_loss(name: str) -> torch.Tensor: 71 | if name == 'coord_l1_loss': 72 | return (landmark - pred_landmark).norm(dim=-1).mean([1]) 73 | 74 | if name.startswith('heatmap'): 75 | if 'pred_heatmap' not in cache: 76 | cache['pred_heatmap'] = F.interpolate( 77 | aux_outputs['heatmap'], (self.heatmap_size, 78 | self.heatmap_size), 79 | mode=self.heatmap_interpolate_mode, align_corners=False) 80 | if 'pred_heatmap_acted' not in cache: 81 | cache['pred_heatmap_acted'] = F.interpolate( 82 | aux_outputs['heatmap_acted'], (self.heatmap_size, 83 | self.heatmap_size), 84 | mode=self.heatmap_interpolate_mode, align_corners=False) 85 | if 'heatmap' not in cache: 86 | # render gt heatmap 87 | with torch.no_grad(): 88 | cache['heatmap'] = points2heatmap( 89 | landmark, (self.heatmap_size, self.heatmap_size), self.heatmap_radius) 90 | 91 | if name == 'heatmap_l1_loss': 92 | return (cache['pred_heatmap_acted'] - cache['heatmap']).abs().mean([1, 2, 3]) 93 | if name == 'heatmap_l2_loss': 94 | return (cache['pred_heatmap'] - cache['heatmap']).pow(2).mean([1, 2, 3]) 95 | if name == 'heatmap_ce_loss': 96 | bce_loss = F.binary_cross_entropy_with_logits( 97 | cache['pred_heatmap'], cache['heatmap'], reduction='none') 98 | return bce_loss.mean([1, 2, 3]) 99 | 100 | raise RuntimeError(f'Unknown loss name: {name}.') 101 | 102 | losses = {name: _compute_named_loss( 103 | name) for name, weight in self.loss_weights.items() if weight != 0.0} 104 | loss = sum([l * self.loss_weights[name] 105 | for name, l in losses.items()]).mean() 106 | else: 107 | loss, losses = None, dict() 108 | 109 | if flags.with_outputs: 110 | outputs = {self.pred_landmark_tag: denormalize_points( 111 | pred_landmark, h, w)} 112 | if 'heatmap' in cache: 113 | outputs['heatmap'] = cache['heatmap'] 114 | if 'pred_heatmap' in cache: 115 | outputs['pred_heatmap'] = cache['pred_heatmap'] 116 | if 'pred_heatmap_acted' in cache: 117 | outputs['pred_heatmap_acted'] = cache['pred_heatmap_acted'] 118 | else: 119 | outputs = dict() 120 | 121 | if flags.with_images: 122 | images = { 123 | self.pred_landmark_tag: visualize_in_row(((pred_landmark, image), 'points'))} 124 | if 'heatmap' in cache: 125 | images['heatmap'] = visualize_in_row( 126 | (cache['heatmap'], 'BNHW')) 127 | images['heatmap_sum'] = visualize_in_row( 128 | (cache['heatmap'].sum(1), 'BHW')) 129 | 130 | if 'pred_heatmap_acted' in cache: 131 | images['pred_heatmap_acted'] = visualize_in_row( 132 | (cache['pred_heatmap_acted'], 'BNHW')) 133 | images['pred_heatmap_acted_sum'] = visualize_in_row( 134 | (cache['pred_heatmap_acted'].sum(1), 'BHW')) 135 | else: 136 | images = dict() 137 | 138 | return loss, losses, outputs, images 139 | -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_aflw19_farl-b-50m-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/aflw19_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace50M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_aflw19_farl-b-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/aflw19_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_aflw19_farl-b-ep64_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/aflw19_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep64.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_ibug300w_farl-b-50m-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/ibug300w_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace50M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_ibug300w_farl-b-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/ibug300w_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_ibug300w_farl-b-ep64_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/ibug300w_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep64.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_wflw_farl-b-50m-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/wflw_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace50M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_wflw_farl-b-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/wflw_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/train_wflw_farl-b-ep64_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_alignment 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/wflw_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep64.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_alignment/trainers/aflw19_farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: cfg_file, 5 | # [train_data_ratio], [train_subset_ids], [test_subset_ids], 6 | # batch_size, 7 | # model_type, model_path, 8 | # input_resolution, head_channel, 9 | # optimizer_name, [lr_factor], enable_amp 10 | 11 | package: farl.experiments.face_alignment 12 | 13 | class: blueprint.ml.Trainer 14 | 15 | states_dir: $STATES(ARGS['cfg_file']) 16 | outputs_dir: $OUTPUTS(ARGS['cfg_file']) 17 | 18 | train_dataprocessor: 19 | class: blueprint.ml.DataProcessor 20 | data_src: 21 | class: blueprint.ml.DataSource 22 | data_path: $BLOB('data/AFLW-19.train.zip') 23 | random_ratio: $ARGS.get('train_data_ratio', None) 24 | subset_ids: $ARGS.get('train_subset_ids', None) 25 | augmentations: 26 | $PARSE('../augmenters/train.yaml', 27 | shift_sigma=0.05, rot_sigma=0.174, scale_sigma=0.1, 28 | scale_mu=1.0, warp_factor=0.0) 29 | batch_size: $$batch_size 30 | randomize: true 31 | 32 | eval_dataprocessors: 33 | aflw19_test: 34 | $PARSE('../eval_data/aflw19_test.yaml', 35 | subset='test', scale_mu=1.0, warp_factor=0.0, batch_size=ARGS['batch_size'], 36 | subset_ids=ARGS.get('test_subset_ids', None)) 37 | aflw19_test_frontal: 38 | $PARSE('../eval_data/aflw19_test.yaml', 39 | subset='test_frontal', scale_mu=1.0, warp_factor=0.0, batch_size=ARGS['batch_size'], 40 | subset_ids=ARGS.get('test_subset_ids', None)) 41 | 42 | networks: 43 | main: 44 | $PARSE('../networks/farl.yaml', 45 | model_type=ARGS['model_type'], 46 | model_path=ARGS['model_path'], 47 | input_resolution=ARGS['input_resolution'], 48 | head_channel=ARGS.get('head_channel', 768), 49 | num_landmarks=19) 50 | main_ema: 51 | class: blueprint.ml.EMAConfig 52 | network_name: main 53 | decay: 0.999 54 | 55 | task_module: 56 | class: FaceAlignment 57 | network_name: main 58 | network_name_eval: main_ema 59 | image_tag: warped_image 60 | landmark_tag: warped_landmarks 61 | pred_landmark_tag: pred_warped_landmarks 62 | loss_weights: 63 | coord_l1_loss: 1.0 64 | heatmap_ce_loss: 1.0 65 | heatmap_size: 128 66 | heatmap_radius: 5.0 67 | 68 | scorer: $PARSE('../scorers/aflw19.yaml') 69 | 70 | optimizer_cfg: 71 | $PARSE('../optimizers/{optimizer_name}.yaml'.format( 72 | optimizer_name=ARGS['optimizer_name']), 73 | lr_factor=ARGS.get('lr_factor', 1.0)) 74 | 75 | logger: 76 | class: blueprint.ml.Loggers 77 | loggers: 78 | - class: blueprint.ml.StandardLogger 79 | - class: blueprint.ml.TSVDataLogger 80 | - class: blueprint.ml.TensorBoardLogger 81 | 82 | enable_amp: $$enable_amp 83 | max_epoches: 150 84 | states_save_interval: 5 85 | eval_interval: 1 -------------------------------------------------------------------------------- /farl/experiments/face_alignment/trainers/ibug300w_farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: cfg_file, 5 | # [train_data_ratio], [train_subset_ids], [test_subset_ids], 6 | # batch_size, 7 | # model_type, model_path, 8 | # input_resolution, head_channel, 9 | # optimizer_name, [lr_factor], enable_amp 10 | 11 | package: farl.experiments.face_alignment 12 | 13 | class: blueprint.ml.Trainer 14 | 15 | states_dir: $STATES(ARGS['cfg_file']) 16 | outputs_dir: $OUTPUTS(ARGS['cfg_file']) 17 | 18 | train_dataprocessor: 19 | class: blueprint.ml.DataProcessor 20 | data_src: 21 | class: blueprint.ml.DataSource 22 | data_path: $BLOB('data/IBUG300W.train.zip') 23 | random_ratio: $ARGS.get('train_data_ratio', None) 24 | subset_ids: $ARGS.get('train_subset_ids', None) 25 | augmentations: 26 | $PARSE('../augmenters/train.yaml', 27 | shift_sigma=0.05, rot_sigma=0.174, scale_sigma=0.1, 28 | scale_mu=0.8, warp_factor=0.0) 29 | batch_size: $$batch_size 30 | randomize: true 31 | 32 | eval_dataprocessors: 33 | ibug300w_test_common: 34 | $PARSE('../eval_data/ibug300w_test.yaml', 35 | subset='common', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 36 | subset_ids=ARGS.get('test_subset_ids', None)) 37 | ibug300w_test_challenging: 38 | $PARSE('../eval_data/ibug300w_test.yaml', 39 | subset='challenging', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 40 | subset_ids=ARGS.get('test_subset_ids', None)) 41 | ibug300w_test_full: 42 | class: blueprint.ml.DataProcessor 43 | data_src: 44 | class: blueprint.ml.DataSource 45 | data_path: 46 | - $BLOB('data/IBUG300W.test_common.zip') # image, crop_matrix, landmarks 47 | - $BLOB('data/IBUG300W.test_challenging.zip') # image, crop_matrix, landmarks 48 | subset_ids: $ARGS.get('test_subset_ids', None) 49 | augmentations: 50 | $PARSE('../augmenters/test.yaml', scale_mu=0.8, warp_factor=0.0) 51 | post_augmentations: 52 | $PARSE('../augmenters/test_post.yaml', warp_factor=0.0) 53 | batch_size: $$batch_size 54 | randomize: false 55 | 56 | networks: 57 | main: 58 | $PARSE('../networks/farl.yaml', 59 | model_type=ARGS['model_type'], 60 | model_path=ARGS['model_path'], 61 | input_resolution=ARGS['input_resolution'], 62 | head_channel=ARGS.get('head_channel', 768), 63 | num_landmarks=68) 64 | main_ema: 65 | class: blueprint.ml.EMAConfig 66 | network_name: main 67 | decay: 0.999 68 | 69 | task_module: 70 | class: FaceAlignment 71 | network_name: main 72 | network_name_eval: main_ema 73 | image_tag: warped_image 74 | landmark_tag: warped_landmarks 75 | pred_landmark_tag: pred_warped_landmarks 76 | loss_weights: 77 | coord_l1_loss: 1.0 78 | heatmap_ce_loss: 1.0 79 | heatmap_size: 128 80 | heatmap_radius: 5.0 81 | 82 | scorer: $PARSE('../scorers/ibug300w.yaml') 83 | 84 | optimizer_cfg: 85 | $PARSE('../optimizers/{optimizer_name}.yaml'.format( 86 | optimizer_name=ARGS['optimizer_name']), 87 | lr_factor=ARGS.get('lr_factor', 1.0)) 88 | 89 | logger: 90 | class: blueprint.ml.Loggers 91 | loggers: 92 | - class: blueprint.ml.StandardLogger 93 | - class: blueprint.ml.TSVDataLogger 94 | - class: blueprint.ml.TensorBoardLogger 95 | 96 | enable_amp: $$enable_amp 97 | max_epoches: 250 98 | states_save_interval: 20 99 | eval_interval: 1 -------------------------------------------------------------------------------- /farl/experiments/face_alignment/trainers/wflw_farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: cfg_file, 5 | # [train_data_ratio], [train_subset_ids], [test_subset_ids], 6 | # batch_size, 7 | # model_type, model_path, 8 | # input_resolution, head_channel, 9 | # optimizer_name, [lr_factor], enable_amp 10 | 11 | package: farl.experiments.face_alignment 12 | 13 | class: blueprint.ml.Trainer 14 | 15 | states_dir: $STATES(ARGS['cfg_file']) 16 | outputs_dir: $OUTPUTS(ARGS['cfg_file']) 17 | 18 | train_dataprocessor: 19 | class: blueprint.ml.DataProcessor 20 | data_src: 21 | class: blueprint.ml.DataSource 22 | data_path: $BLOB('data/WFLW.train.zip') 23 | random_ratio: $ARGS.get('train_data_ratio', None) 24 | subset_ids: $ARGS.get('train_subset_ids', None) 25 | augmentations: 26 | $PARSE('../augmenters/train.yaml', 27 | shift_sigma=0.05, rot_sigma=0.174, scale_sigma=0.1, 28 | scale_mu=0.8, warp_factor=0.0) 29 | batch_size: $$batch_size 30 | randomize: true 31 | 32 | eval_dataprocessors: 33 | wflw_test_all: 34 | $PARSE('../eval_data/wflw_test.yaml', 35 | subset='all', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 36 | subset_ids=ARGS.get('test_subset_ids', None)) 37 | wflw_test_blur: 38 | $PARSE('../eval_data/wflw_test.yaml', 39 | subset='blur', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 40 | subset_ids=ARGS.get('test_subset_ids', None)) 41 | wflw_test_expression: 42 | $PARSE('../eval_data/wflw_test.yaml', 43 | subset='expression', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 44 | subset_ids=ARGS.get('test_subset_ids', None)) 45 | wflw_test_illumination: 46 | $PARSE('../eval_data/wflw_test.yaml', 47 | subset='illumination', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 48 | subset_ids=ARGS.get('test_subset_ids', None)) 49 | wflw_test_largepose: 50 | $PARSE('../eval_data/wflw_test.yaml', 51 | subset='largepose', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 52 | subset_ids=ARGS.get('test_subset_ids', None)) 53 | wflw_test_makeup: 54 | $PARSE('../eval_data/wflw_test.yaml', 55 | subset='makeup', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 56 | subset_ids=ARGS.get('test_subset_ids', None)) 57 | wflw_test_occlusion: 58 | $PARSE('../eval_data/wflw_test.yaml', 59 | subset='occlusion', scale_mu=0.8, warp_factor=0.0, batch_size=ARGS['batch_size'], 60 | subset_ids=ARGS.get('test_subset_ids', None)) 61 | 62 | networks: 63 | main: 64 | $PARSE('../networks/farl.yaml', 65 | model_type=ARGS['model_type'], 66 | model_path=ARGS['model_path'], 67 | input_resolution=ARGS['input_resolution'], 68 | head_channel=ARGS.get('head_channel', 768), 69 | num_landmarks=98) 70 | main_ema: 71 | class: blueprint.ml.EMAConfig 72 | network_name: main 73 | decay: 0.999 74 | 75 | task_module: 76 | class: FaceAlignment 77 | network_name: main 78 | network_name_eval: main_ema 79 | image_tag: warped_image 80 | landmark_tag: warped_landmarks 81 | pred_landmark_tag: pred_warped_landmarks 82 | loss_weights: 83 | coord_l1_loss: 1.0 84 | heatmap_ce_loss: 1.0 85 | heatmap_size: 128 86 | heatmap_radius: 5.0 87 | 88 | scorer: $PARSE('../scorers/wflw.yaml') 89 | 90 | optimizer_cfg: 91 | $PARSE('../optimizers/{optimizer_name}.yaml'.format( 92 | optimizer_name=ARGS['optimizer_name']), 93 | lr_factor=ARGS.get('lr_factor', 1.0)) 94 | 95 | logger: 96 | class: blueprint.ml.Loggers 97 | loggers: 98 | - class: blueprint.ml.StandardLogger 99 | - class: blueprint.ml.TSVDataLogger 100 | - class: blueprint.ml.TensorBoardLogger 101 | 102 | enable_amp: $$enable_amp 103 | max_epoches: 150 104 | states_save_interval: 20 105 | eval_interval: 1 -------------------------------------------------------------------------------- /farl/experiments/face_parsing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from blueprint.ml.augmenters import ( 8 | With, UpdateRandomTransformMatrix, 9 | GetTransformMap, GetInvertedTransformMap, 10 | GetShape, TransformByMap, ArgMax, 11 | MakeNonStackable, UnwrapNonStackable, 12 | AttachConstData, FullLike, Filter, 13 | RandomGray, RandomGamma, RandomBlur, 14 | Normalize255, TransformImagePerspective) 15 | 16 | from ...network import FaRLVisualFeatures 17 | from .network import FaceParsingTransformer 18 | from .task import FaceParsing 19 | from .scorer import F1Score 20 | -------------------------------------------------------------------------------- /farl/experiments/face_parsing/augmenters/celebm/test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | - class: With 5 | tags_str: image -> image 6 | aug: 7 | class: Normalize255 8 | 9 | # warp image & label using the align_matrix 10 | - class: With 11 | tags_str: align_matrix -> transform_map 12 | aug: 13 | class: GetTransformMap 14 | warped_shape: [512, 512] 15 | warp_factor: 0.0 16 | - class: With 17 | tags_str: image, transform_map -> warped_image 18 | aug: 19 | class: TransformByMap 20 | interpolation: bilinear 21 | 22 | # make some original data non-stackable 23 | - class: With 24 | tags_str: image, label, align_matrix -> original_data 25 | aug: 26 | class: MakeNonStackable 27 | 28 | - class: Filter 29 | tags: [warped_image, original_data] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/augmenters/celebm/test_post.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # +valid: bool 5 | # +pred_warped_logits: h x w x c 6 | - class: With 7 | tags_str: original_data -> image, label, align_matrix 8 | aug: 9 | class: UnwrapNonStackable 10 | - class: With 11 | tags_str: image -> image_shape 12 | aug: 13 | class: GetShape 14 | - class: With 15 | tags_str: align_matrix, image_shape -> transform_map_inv 16 | aug: 17 | class: GetInvertedTransformMap 18 | warped_shape: [512, 512] 19 | warp_factor: 0.0 20 | - class: With 21 | tags_str: pred_warped_logits, transform_map_inv -> pred_label 22 | aug: 23 | - class: TransformByMap 24 | interpolation: bilinear 25 | - class: ArgMax 26 | axis: -1 27 | - class: Filter 28 | tags: [label, pred_label, valid] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/augmenters/celebm/train.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | - class: With 5 | tags_str: image -> image 6 | aug: 7 | class: Normalize255 8 | 9 | # update align_matrix 10 | - class: AttachConstData 11 | tag_name: shape 12 | const_data: [512, 512] 13 | - class: With 14 | tags_str: shape, align_matrix -> shape, align_matrix 15 | aug: 16 | class: UpdateRandomTransformMatrix 17 | target_shape: [512, 512] 18 | shift_sigma: 0.01 19 | rot_sigma: 0.314 20 | scale_sigma: 0.1 21 | shift_normal: false 22 | ret_shape: true 23 | 24 | # warp image & label using the align_matrix 25 | - class: With 26 | tags_str: align_matrix -> transform_map 27 | aug: 28 | class: GetTransformMap 29 | warped_shape: [512, 512] 30 | warp_factor: 0.0 31 | - class: With 32 | tags_str: image, transform_map -> warped_image 33 | aug: 34 | class: TransformByMap 35 | interpolation: bilinear 36 | - class: With 37 | tags_str: label, transform_map -> warped_label 38 | aug: 39 | class: TransformByMap 40 | interpolation: nearest 41 | outlier_value: 0 42 | - class: With 43 | tags_str: label -> valid_mask 44 | aug: 45 | class: FullLike 46 | fill_value: 1 47 | - class: With 48 | tags_str: valid_mask, transform_map -> warped_valid_mask 49 | aug: 50 | class: TransformByMap 51 | interpolation: nearest 52 | outlier_value: 0 53 | 54 | # color augmentation on image 55 | - class: With 56 | tags_str: warped_image -> warped_image 57 | aug: 58 | - class: RandomGray 59 | - class: RandomGamma 60 | - class: RandomBlur 61 | 62 | - class: Filter 63 | tags: [warped_image, warped_label, warped_valid_mask] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/augmenters/lapa/test.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: warp_factor 5 | 6 | - class: With 7 | tags_str: image -> image 8 | aug: 9 | class: Normalize255 10 | 11 | # warp image & label using the align_matrix 12 | - class: With 13 | tags_str: align_matrix -> transform_map 14 | aug: 15 | class: GetTransformMap 16 | warped_shape: [512, 512] 17 | warp_factor: $$warp_factor 18 | - class: With 19 | tags_str: image, transform_map -> warped_image 20 | aug: 21 | class: TransformByMap 22 | interpolation: bilinear 23 | 24 | # make some original data non-stackable 25 | - class: With 26 | tags_str: image, label, align_matrix -> original_data 27 | aug: 28 | class: MakeNonStackable 29 | 30 | - class: Filter 31 | tags: [warped_image, original_data] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/augmenters/lapa/test_post.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: warp_factor 5 | 6 | # +valid: bool 7 | # +pred_warped_logits: h x w x c 8 | - class: With 9 | tags_str: original_data -> image, label, align_matrix 10 | aug: 11 | class: UnwrapNonStackable 12 | - class: With 13 | tags_str: image -> image_shape 14 | aug: 15 | class: GetShape 16 | - class: With 17 | tags_str: align_matrix, image_shape -> transform_map_inv 18 | aug: 19 | class: GetInvertedTransformMap 20 | warped_shape: [512, 512] 21 | warp_factor: $$warp_factor 22 | - class: With 23 | tags_str: pred_warped_logits, transform_map_inv -> pred_label 24 | aug: 25 | - class: TransformByMap 26 | interpolation: bilinear 27 | - class: ArgMax 28 | axis: -1 29 | - class: Filter 30 | tags: [label, pred_label, valid] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/augmenters/lapa/train.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: warp_factor 5 | 6 | - class: With 7 | tags_str: image -> image 8 | aug: 9 | class: Normalize255 10 | 11 | # update align_matrix 12 | - class: AttachConstData 13 | tag_name: shape 14 | const_data: [512, 512] 15 | - class: With 16 | tags_str: shape, align_matrix -> shape, align_matrix 17 | aug: 18 | class: UpdateRandomTransformMatrix 19 | target_shape: [512, 512] 20 | shift_sigma: 0.01 21 | rot_sigma: 0.314 22 | scale_sigma: 0.1 23 | shift_normal: false 24 | ret_shape: true 25 | 26 | # warp image & label using the align_matrix 27 | - class: With 28 | tags_str: align_matrix -> transform_map 29 | aug: 30 | class: GetTransformMap 31 | warped_shape: [512, 512] 32 | warp_factor: $$warp_factor 33 | - class: With 34 | tags_str: image, transform_map -> warped_image 35 | aug: 36 | class: TransformByMap 37 | interpolation: bilinear 38 | - class: With 39 | tags_str: label, transform_map -> warped_label 40 | aug: 41 | class: TransformByMap 42 | interpolation: nearest 43 | outlier_value: 0 44 | - class: With 45 | tags_str: label -> valid_mask 46 | aug: 47 | class: FullLike 48 | fill_value: 1 49 | - class: With 50 | tags_str: valid_mask, transform_map -> warped_valid_mask 51 | aug: 52 | class: TransformByMap 53 | interpolation: nearest 54 | outlier_value: 0 55 | 56 | # color augmentation on image 57 | - class: With 58 | tags_str: warped_image -> warped_image 59 | aug: 60 | - class: RandomGray 61 | - class: RandomGamma 62 | - class: RandomBlur 63 | 64 | - class: Filter 65 | tags: [warped_image, warped_label, warped_valid_mask] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/network.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import List, Tuple 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class FaceParsingTransformer(nn.Module): 11 | """Face parsing transformer. 12 | 13 | Args: 14 | image (torch.Tensor): Float32 tensor with shape [b, 3, h, w], normalized to [0, 1]. 15 | 16 | Returns: 17 | logits (torch.Tensor): Float32 tensor with shape [b, nclasses, out_size[0], out_size[1]] 18 | aux_outputs (dict): Empty. 19 | """ 20 | 21 | def __init__(self, backbone: nn.Module, head: nn.Module, out_size: Tuple[int, int]): 22 | super().__init__() 23 | self.backbone = backbone 24 | self.head = head 25 | self.out_size = out_size 26 | self.cuda().float() 27 | 28 | def forward(self, image): 29 | features, _ = self.backbone(image) 30 | logits = self.head(features) 31 | return F.interpolate(logits, size=self.out_size, mode='bilinear', align_corners=False), dict() 32 | -------------------------------------------------------------------------------- /farl/experiments/face_parsing/networks/farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: model_type, model_path, input_resolution, head_channel, num_labels 5 | 6 | class: FaceParsingTransformer 7 | backbone: 8 | class: farl.network.FaRLVisualFeatures 9 | model_type: $$model_type 10 | model_path: $$model_path 11 | output_indices: $ARGS.get('output_indices', None) 12 | forced_input_resolution: $$input_resolution 13 | head: 14 | class: farl.network.MMSEG_UPerHead 15 | in_channels: $[FaRLVisualFeatures.get_output_channel(ARGS['model_type'])]*4 16 | channels: $$head_channel 17 | num_classes: $$num_labels 18 | out_size: [512, 512] -------------------------------------------------------------------------------- /farl/experiments/face_parsing/optimizers/freeze_backbone.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | optimizer_type: torch.optim.AdamW 5 | optimizer_args: 6 | lr: 0.001 7 | betas: [0.9, 0.999] 8 | weight_decay: 0.00001 9 | lr_scheduler_type: torch.optim.lr_scheduler.MultiStepLR 10 | lr_scheduler_args: 11 | milestones: [200] 12 | gamma: 0.1 13 | lr_scheduler_call: epochwise 14 | network_settings: 15 | - params: main.backbone.fpns 16 | lr: 0.001 17 | - params: main.head 18 | lr: 0.001 -------------------------------------------------------------------------------- /farl/experiments/face_parsing/optimizers/refine_backbone.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | optimizer_type: torch.optim.AdamW 5 | optimizer_args: 6 | lr: 0.001 7 | betas: [0.9, 0.999] 8 | weight_decay: 0.00001 9 | lr_scheduler_type: torch.optim.lr_scheduler.MultiStepLR 10 | lr_scheduler_args: 11 | milestones: [200] 12 | gamma: 0.1 13 | lr_scheduler_call: epochwise 14 | network_settings: 15 | - params: main.backbone.visual 16 | lr: 0.0001 17 | - params: main.backbone.fpns 18 | lr: 0.001 19 | - params: main.head 20 | lr: 0.001 -------------------------------------------------------------------------------- /farl/experiments/face_parsing/scorer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Mapping, Optional, List 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | 10 | from blueprint.ml import Scorer 11 | 12 | 13 | class F1Score(Scorer): 14 | """Compute F1 score among label and pred_label. 15 | 16 | Args: 17 | label_tag (str): The tag for groundtruth label, which is a np.ndarray with dtype=int. 18 | pred_label_tag (str): The tag for predicted label, which is a np.ndarray with dtype=int 19 | and same shape with groundtruth label. 20 | label_names (List[str]): Names of the label values. 21 | num_labels (int): The number of valid label values. 22 | """ 23 | 24 | def __init__(self, label_tag: str = 'label', pred_label_tag: str = 'pred_label', 25 | label_names: Optional[List[str]] = None, num_labels: Optional[int] = None, 26 | compute_fg_mean: bool = False, bg_label_name: str = 'background') -> None: 27 | self.label_tag = label_tag 28 | self.pred_label_tag = pred_label_tag 29 | if label_names is None and num_labels is None: 30 | raise RuntimeError( 31 | 'The label_names and the num_labels should never both be None.') 32 | if label_names is None: 33 | label_names = [f'label.{i}' for i in range(num_labels)] 34 | if num_labels is None: 35 | num_labels = len(label_names) 36 | self.label_names = label_names 37 | self.num_labels = num_labels 38 | self.compute_fg_mean = compute_fg_mean 39 | self.bg_label_name = bg_label_name 40 | 41 | def init_evaluation(self): 42 | self.hists_sum = np.zeros( 43 | [self.num_labels, self.num_labels], dtype=np.int64) 44 | self.count = 0 45 | self.num_pixels = 0 46 | 47 | def evaluate(self, data: Mapping[str, np.ndarray]): 48 | label = data[self.label_tag] 49 | pred_label = data[self.pred_label_tag] 50 | if label.shape != pred_label.shape: 51 | raise RuntimeError( 52 | f'The label shape {label.shape} mismatches the pred_label shape {pred_label.shape}') 53 | 54 | hist = __class__._collect_hist( 55 | label, pred_label, self.num_labels, self.num_labels) 56 | self.hists_sum += hist 57 | self.count += 1 58 | self.num_pixels += label.shape[0] * label.shape[1] 59 | 60 | def finalize_evaluation(self) -> Mapping[str, float]: 61 | # gather all hists_sum 62 | hists_sum = torch.from_numpy(self.hists_sum).contiguous().cuda() 63 | if dist.is_initialized(): 64 | dist.all_reduce(hists_sum) 65 | count_sum = torch.tensor(self.count, dtype=torch.int64, device='cuda') 66 | if dist.is_initialized(): 67 | dist.all_reduce(count_sum) 68 | num_pixels = torch.tensor( 69 | self.num_pixels, dtype=torch.int64, device='cuda') 70 | if dist.is_initialized(): 71 | dist.all_reduce(num_pixels) 72 | 73 | assert hists_sum.sum() == num_pixels 74 | 75 | # compute F1 score 76 | A = hists_sum.sum(0).to(dtype=torch.float64) 77 | B = hists_sum.sum(1).to(dtype=torch.float64) 78 | intersected = hists_sum.diagonal().to(dtype=torch.float64) 79 | f1 = 2 * intersected / (A + B) 80 | 81 | f1s = {self.label_names[i]: f1[i].item() 82 | for i in range(self.num_labels)} 83 | if self.compute_fg_mean: 84 | f1s_fg = [f1[i].item() for i in range(self.num_labels) 85 | if self.label_names[i] != self.bg_label_name] 86 | f1s['fg_mean'] = sum(f1s_fg) / len(f1s_fg) 87 | return f1s 88 | 89 | @staticmethod 90 | def _collect_hist(a: np.ndarray, b: np.ndarray, na: int, nb: int) -> np.ndarray: 91 | """ 92 | fast histogram calculation 93 | 94 | Args: 95 | a, b: Non negative label ids, a.shape == b.shape, a in [0, ... na-1], b in [0, ..., nb-1] 96 | 97 | Returns: 98 | hist (np.ndarray): The histogram matrix with shape [na, nb]. 99 | """ 100 | assert np.all((a >= 0) & (a < na) & (b >= 0) & (b < nb)) 101 | hist = np.bincount( 102 | nb * a.reshape([-1]).astype(np.int64) + 103 | b.reshape([-1]).astype(np.int64), 104 | minlength=na * nb).reshape(na, nb) 105 | assert np.sum(hist) == a.size 106 | return hist 107 | -------------------------------------------------------------------------------- /farl/experiments/face_parsing/scorers/celebm.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: F1Score 5 | label_tag: label 6 | pred_label_tag: pred_label 7 | label_names: 8 | ['background', 'neck', 'face', 'cloth', 'rr', 'lr', 'rb', 'lb', 're', 9 | 'le', 'nose', 'imouth', 'llip', 'ulip', 'hair', 10 | 'glass', 'hat', 'earr', 'neckl'] 11 | compute_fg_mean: true 12 | bg_label_name: background -------------------------------------------------------------------------------- /farl/experiments/face_parsing/scorers/lapa.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | class: F1Score 5 | label_tag: label 6 | pred_label_tag: pred_label 7 | label_names: 8 | - background 9 | - face_lr_rr 10 | - lb 11 | - rb 12 | - le 13 | - re 14 | - nose 15 | - ul 16 | - im 17 | - ll 18 | - hair 19 | compute_fg_mean: true 20 | bg_label_name: background -------------------------------------------------------------------------------- /farl/experiments/face_parsing/task.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Mapping, Any, Tuple, Optional 5 | 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from blueprint.ml import Task, ForwardFlags 11 | 12 | from ...network.viz import visualize_in_row 13 | 14 | 15 | class FaceParsing(Task): 16 | """Standard segmentation with crossentropy loss supervision. 17 | 18 | Requires the corresponding network inputs an image and outputs the segmentation logits. 19 | 20 | Returns: 21 | pred_logit: [b, h, w, num_classes] 22 | """ 23 | 24 | def __init__(self, network_name: str = 'main', 25 | network_name_eval: Optional[str] = None, 26 | image_tag: str = 'image', label_tag: str = 'label', 27 | pred_logit_tag: str = 'pred_logit') -> None: 28 | super().__init__() 29 | self.network_name = network_name 30 | self.network_name_eval = network_name_eval 31 | if self.network_name_eval is None: 32 | self.network_name_eval = self.network_name 33 | self.image_tag = image_tag 34 | self.label_tag = label_tag 35 | self.pred_logit_tag = pred_logit_tag 36 | 37 | def setup_networks(self, networks: Mapping[str, nn.Module]): 38 | self.segmentation_net = networks[self.network_name] 39 | self.segmentation_net_eval = networks[self.network_name_eval] 40 | 41 | def forward(self, data: Mapping[str, torch.Tensor], flags: ForwardFlags 42 | ) -> Tuple[ 43 | Optional[torch.Tensor], 44 | Mapping[str, torch.Tensor], 45 | Mapping[str, torch.Tensor], 46 | Mapping[str, torch.Tensor]]: 47 | 48 | # b x c x h x w 49 | if self.training: 50 | pred_logit, aux_outputs = self.segmentation_net( 51 | data[self.image_tag].cuda().permute(0, 3, 1, 2).contiguous()) 52 | else: 53 | pred_logit, aux_outputs = self.segmentation_net_eval( 54 | data[self.image_tag].cuda().permute(0, 3, 1, 2).contiguous()) 55 | 56 | if flags.with_losses: 57 | gt_label = data[self.label_tag].to( 58 | device=pred_logit.device, dtype=torch.int64) # batch, h, w 59 | 60 | batch, channels, h, w = pred_logit.shape 61 | 62 | assert gt_label.shape == (batch, h, w) 63 | 64 | pred_logit_vec = pred_logit.permute( 65 | 0, 2, 3, 1).reshape(-1, channels) # (batchxhxw), channels 66 | 67 | gt_label_vec = gt_label.view(-1) # (batchxhxw) 68 | 69 | ce_loss_vec = F.cross_entropy( 70 | pred_logit_vec, target=gt_label_vec, reduction='none') # (batchxhxw) 71 | 72 | ce_loss = ce_loss_vec.view(batch, h, w).mean([1, 2]) 73 | 74 | loss, losses = ce_loss.mean(), {'ce_loss': ce_loss} 75 | else: 76 | loss, losses = None, dict() 77 | 78 | if flags.with_outputs: 79 | outputs = {**aux_outputs, 80 | self.pred_logit_tag: pred_logit.permute(0, 2, 3, 1)} 81 | else: 82 | outputs = dict() 83 | 84 | if flags.with_images: 85 | images = {self.pred_logit_tag: visualize_in_row( 86 | (pred_logit.softmax(dim=1), 'BNHW', 0.0, 1.0))} 87 | else: 88 | images = dict() 89 | 90 | return loss, losses, outputs, images 91 | -------------------------------------------------------------------------------- /farl/experiments/face_parsing/train_celebm_farl-b-50m-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_parsing 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/celebm_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace50M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_parsing/train_celebm_farl-b-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_parsing 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/celebm_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_parsing/train_celebm_farl-b-ep64_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_parsing 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/celebm_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep64.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_parsing/train_lapa_farl-b-50m-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_parsing 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/lapa_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace50M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_parsing/train_lapa_farl-b-ep16_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_parsing 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/lapa_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep16.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_parsing/train_lapa_farl-b-ep64_448_refinebb.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | package: farl.experiments.face_parsing 5 | 6 | class: blueprint.ml.DistributedGPURun 7 | local_run: 8 | $PARSE('./trainers/lapa_farl.yaml', 9 | cfg_file=FILE, 10 | train_data_ratio=None, 11 | batch_size=5, 12 | model_type='base', 13 | model_path=BLOB('checkpoint/FaRL-Base-Patch16-LAIONFace20M-ep64.pth'), 14 | input_resolution=448, 15 | head_channel=768, 16 | optimizer_name='refine_backbone', 17 | enable_amp=False) -------------------------------------------------------------------------------- /farl/experiments/face_parsing/trainers/celebm_farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: cfg_file, 5 | # [train_data_ratio], warp_factor, batch_size, 6 | # model_type, model_path, 7 | # input_resolution, head_channel, 8 | # optimizer_name, enable_amp 9 | 10 | class: blueprint.ml.Trainer 11 | 12 | states_dir: $STATES(ARGS['cfg_file']) 13 | outputs_dir: $OUTPUTS(ARGS['cfg_file']) 14 | 15 | train_dataprocessor: 16 | class: blueprint.ml.DataProcessor 17 | data_src: 18 | class: blueprint.ml.DataSource 19 | data_path: $BLOB('data/CelebAMaskHQ.train.zip') 20 | random_ratio: $ARGS.get('train_data_ratio', None) 21 | subset_ids: $ARGS.get('train_subset_ids', None) 22 | augmentations: $PARSE('../augmenters/celebm/train.yaml') 23 | batch_size: $$batch_size 24 | randomize: true 25 | 26 | eval_dataprocessors: 27 | celebm_test: 28 | class: blueprint.ml.DataProcessor 29 | data_src: 30 | class: blueprint.ml.DataSource 31 | data_path: $BLOB('data/CelebAMaskHQ.test.zip') 32 | augmentations: $PARSE('../augmenters/celebm/test.yaml') 33 | post_augmentations: $PARSE('../augmenters/celebm/test_post.yaml') 34 | batch_size: $$batch_size 35 | randomize: false 36 | 37 | networks: 38 | main: 39 | $PARSE('../networks/farl.yaml', 40 | model_type=ARGS['model_type'], 41 | model_path=ARGS['model_path'], 42 | input_resolution=ARGS['input_resolution'], 43 | head_channel=ARGS.get('head_channel', 768), 44 | num_labels=19) 45 | main_ema: 46 | class: blueprint.ml.EMAConfig 47 | network_name: main 48 | decay: 0.999 49 | 50 | task_module: 51 | class: FaceParsing 52 | network_name: main 53 | network_name_eval: main_ema 54 | image_tag: warped_image 55 | label_tag: warped_label 56 | pred_logit_tag: pred_warped_logits 57 | 58 | scorer: $PARSE('../scorers/celebm.yaml') 59 | 60 | optimizer_cfg: 61 | $PARSE('../optimizers/{optimizer_name}.yaml'.format( 62 | optimizer_name=ARGS['optimizer_name'])) 63 | 64 | logger: 65 | class: blueprint.ml.Loggers 66 | loggers: 67 | - class: blueprint.ml.StandardLogger 68 | - class: blueprint.ml.TSVDataLogger 69 | - class: blueprint.ml.TensorBoardLogger 70 | 71 | enable_amp: $$enable_amp 72 | max_epoches: 300 73 | states_save_interval: 20 74 | eval_interval: 1 -------------------------------------------------------------------------------- /farl/experiments/face_parsing/trainers/lapa_farl.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # args: cfg_file, 5 | # [train_data_ratio], [warp_factor], batch_size, 6 | # model_type, model_path, 7 | # input_resolution, head_channel, 8 | # optimizer_name, enable_amp 9 | 10 | class: blueprint.ml.Trainer 11 | 12 | states_dir: $STATES(ARGS['cfg_file']) 13 | outputs_dir: $OUTPUTS(ARGS['cfg_file']) 14 | 15 | train_dataprocessor: 16 | class: blueprint.ml.DataProcessor 17 | data_src: 18 | class: blueprint.ml.DataSource 19 | data_path: $BLOB('data/LaPa.train.zip') 20 | random_ratio: $ARGS.get('train_data_ratio', None) 21 | subset_ids: $ARGS.get('train_subset_ids', None) 22 | augmentations: 23 | $PARSE('../augmenters/lapa/train.yaml', 24 | warp_factor=ARGS.get('warp_factor', 0.8)) 25 | batch_size: $$batch_size 26 | randomize: true 27 | 28 | eval_dataprocessors: 29 | lapa_test: 30 | class: blueprint.ml.DataProcessor 31 | data_src: 32 | class: blueprint.ml.DataSource 33 | data_path: $BLOB('data/LaPa.test.zip') 34 | augmentations: 35 | $PARSE('../augmenters/lapa/test.yaml', 36 | warp_factor=ARGS.get('warp_factor', 0.8)) 37 | post_augmentations: 38 | $PARSE('../augmenters/lapa/test_post.yaml', 39 | warp_factor=ARGS.get('warp_factor', 0.8)) 40 | batch_size: $$batch_size 41 | randomize: false 42 | 43 | networks: 44 | main: 45 | $PARSE('../networks/farl.yaml', 46 | model_type=ARGS['model_type'], 47 | model_path=ARGS['model_path'], 48 | input_resolution=ARGS['input_resolution'], 49 | head_channel=ARGS.get('head_channel', 768), 50 | num_labels=11) 51 | main_ema: 52 | class: blueprint.ml.EMAConfig 53 | network_name: main 54 | decay: 0.999 55 | 56 | task_module: 57 | class: FaceParsing 58 | network_name: main 59 | network_name_eval: main_ema 60 | image_tag: warped_image 61 | label_tag: warped_label 62 | pred_logit_tag: pred_warped_logits 63 | 64 | scorer: $PARSE('../scorers/lapa.yaml') 65 | 66 | optimizer_cfg: 67 | $PARSE('../optimizers/{optimizer_name}.yaml'.format( 68 | optimizer_name=ARGS['optimizer_name'])) 69 | 70 | logger: 71 | class: blueprint.ml.Loggers 72 | loggers: 73 | - class: blueprint.ml.StandardLogger 74 | - class: blueprint.ml.TSVDataLogger 75 | - class: blueprint.ml.TensorBoardLogger 76 | 77 | enable_amp: $$enable_amp 78 | max_epoches: 300 79 | states_save_interval: 20 80 | eval_interval: 1 -------------------------------------------------------------------------------- /farl/network/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .common import (load_checkpoint, Activation, MLP, Residual) 5 | from .geometry import (normalize_points, denormalize_points, 6 | points2heatmap, heatmap2points) 7 | from .mmseg import MMSEG_UPerHead 8 | from .transformers import FaRLVisualFeatures 9 | -------------------------------------------------------------------------------- /farl/network/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import List, Optional, Tuple, Optional 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from blueprint.ml import Logger 10 | 11 | 12 | def load_checkpoint(net: nn.Module, checkpoint_path: str, network_name: str): 13 | states = torch.load(open(checkpoint_path, 'rb'), map_location={ 14 | 'cuda:0': f'cuda:{torch.cuda.current_device()}'}) 15 | network_states = states['networks'] 16 | net.load_state_dict(network_states[network_name]) 17 | return net 18 | 19 | 20 | class Activation(nn.Module): 21 | def __init__(self, name: Optional[str], **kwargs): 22 | super().__init__() 23 | if name == 'relu': 24 | self.fn = F.relu 25 | elif name == 'softplus': 26 | self.fn = F.softplus 27 | elif name == 'gelu': 28 | self.fn = F.gelu 29 | elif name == 'sigmoid': 30 | self.fn = torch.sigmoid 31 | elif name == 'sigmoid_x': 32 | self.epsilon = kwargs.get('epsilon', 1e-3) 33 | self.fn = lambda x: torch.clamp( 34 | x.sigmoid() * (1.0 + self.epsilon*2.0) - self.epsilon, 35 | min=0.0, max=1.0) 36 | elif name == None: 37 | self.fn = lambda x: x 38 | else: 39 | raise RuntimeError(f'Unknown activation name: {name}') 40 | 41 | def forward(self, x): 42 | return self.fn(x) 43 | 44 | 45 | class MLP(nn.Module): 46 | def __init__(self, channels: List[int], act: Optional[str]): 47 | super().__init__() 48 | assert len(channels) > 1 49 | layers = [] 50 | for i in range(len(channels)-1): 51 | layers.append(nn.Linear(channels[i], channels[i+1])) 52 | if i+1 < len(channels): 53 | layers.append(Activation(act)) 54 | self.layers = nn.Sequential(*layers) 55 | 56 | def forward(self, x): 57 | return self.layers(x) 58 | 59 | 60 | class Residual(nn.Module): 61 | def __init__(self, net: nn.Module, res_weight_init: Optional[float] = 0.0): 62 | super().__init__() 63 | self.net = net 64 | if res_weight_init is not None: 65 | self.res_weight = nn.Parameter(torch.tensor(res_weight_init)) 66 | else: 67 | self.res_weight = None 68 | 69 | def forward(self, x): 70 | if self.res_weight is not None: 71 | return self.res_weight * self.net(x) + x 72 | else: 73 | return self.net(x) + x 74 | 75 | 76 | class SE(nn.Module): 77 | def __init__(self, channel: int, r: int = 1): 78 | super().__init__() 79 | self.branch = nn.Sequential( 80 | nn.Conv2d(channel, channel//r, (1, 1)), 81 | nn.ReLU(), 82 | nn.Conv2d(channel//r, channel, (1, 1)), 83 | nn.Sigmoid() 84 | ) 85 | 86 | def forward(self, x): 87 | # x: b x channel x h x w 88 | v = x.mean([2, 3], keepdim=True) # b x channel x 1 x 1 89 | v = self.branch(v) # b x channel x 1 x 1 90 | return x * v 91 | 92 | 93 | def verbose_execution(model: nn.Module, logger: Optional[Logger]): 94 | _print = print if logger is None else logger.log_info 95 | for name, layer in model.named_children(): 96 | layer._layer_name_ = name 97 | layer.register_forward_hook( 98 | lambda layer, _, output: _print( 99 | f"{layer._layer_name_}: shape={output.shape}, mean={output.mean().item()}," 100 | f" max={output.max().values.item()}, min={output.min().values.item()}") 101 | ) 102 | return model 103 | -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | BasedOnStyle: LLVM -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/.gitignore: -------------------------------------------------------------------------------- 1 | __temp__/ -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/README.md: -------------------------------------------------------------------------------- 1 | # P2I 2 | 3 | `p2i` is a simple yet effective point rendering operator for PyTorch. It is fully differentiable. It supports gradients to be back-propagated to not only point colors/features, but also point coordinates. The current implementation of `p2i` requires CUDA. 4 | 5 | ## Citation 6 | 7 | If you find this operator useful, please consider citing 8 | 9 | ``` 10 | @article{zheng2021farl, 11 | title={General Facial Representation Learning in a Visual-Linguistic Manner}, 12 | author={Zheng, Yinglin and Yang, Hao and Zhang, Ting and Bao, Jianmin and Chen, Dongdong and Huang, Yangyu and Yuan, Lu and Chen, Dong and Zeng, Ming and Wen, Fang}, 13 | journal={arXiv preprint arXiv:2112.03109}, 14 | year={2021} 15 | } 16 | 17 | @inproceedings{xie2021style, 18 | title={Style-based Point Generator with Adversarial Rendering for Point Cloud Completion}, 19 | author={Xie, Chulin and Wang, Chuxin and Zhang, Bo and Yang, Hao and Chen, Dong and Wen, Fang}, 20 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 21 | pages={4619--4628}, 22 | year={2021} 23 | } 24 | ``` 25 | 26 | ## Contact 27 | 28 | Please raise issues or contact [Hao Yang](https://haya.pro) (`haya@microsoft.com`) for any questions about this implementation. -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | from typing import Tuple, List, Union 6 | 7 | import torch 8 | from torch.autograd import Function 9 | from torch.utils.cpp_extension import load 10 | 11 | 12 | module_path = os.path.dirname(os.path.abspath(__file__)) 13 | if 'CUDA_HOME' in os.environ: 14 | os.environ['CUDA_HOME'] = '/usr/local/cuda' 15 | 16 | ext = load( 17 | 'ext', 18 | sources=[ 19 | os.path.join(module_path, 'ext.cpp'), 20 | os.path.join(module_path, 'p2i_sum.cu'), 21 | os.path.join(module_path, 'p2i_max.cu'), 22 | ], 23 | extra_cuda_cflags=['--expt-extended-lambda', 24 | '-O3', '-use_fast_math'] 25 | ) 26 | assert ext is not None 27 | 28 | 29 | # p2i 30 | 31 | 32 | class P2ISumFunction(Function): 33 | @staticmethod 34 | def forward(ctx, points, point_features, batch_inds, background, 35 | kernel_kind, kernel_radius): 36 | ctx.save_for_backward(points, point_features, batch_inds) 37 | ctx.kernel_kind = kernel_kind 38 | ctx.kernel_radius = kernel_radius 39 | 40 | out = ext.p2i_sum_forward_gpu( 41 | points.contiguous(), 42 | point_features.contiguous(), 43 | batch_inds.contiguous(), 44 | background.contiguous(), kernel_kind, kernel_radius) 45 | 46 | return (out,) 47 | 48 | @staticmethod 49 | def backward(ctx, out_grad): 50 | points, point_features, batch_inds = ctx.saved_tensors 51 | kernel_kind = ctx.kernel_kind 52 | kernel_radius = ctx.kernel_radius 53 | 54 | points_grad, point_features_grad = ext.p2i_sum_backward_gpu( 55 | out_grad.contiguous(), points.contiguous(), 56 | point_features.contiguous(), batch_inds.contiguous(), 57 | kernel_kind, kernel_radius) 58 | 59 | background_grad = out_grad 60 | return (points_grad, point_features_grad, None, 61 | background_grad, None, None) 62 | 63 | 64 | class P2IMaxFunction(Function): 65 | @staticmethod 66 | def forward(ctx, points, point_features, batch_inds, background, 67 | kernel_kind, kernel_radius): 68 | 69 | out, out_point_ids = ext.p2i_max_forward_gpu( 70 | points.contiguous(), 71 | point_features.contiguous(), 72 | batch_inds.contiguous(), 73 | background.contiguous(), kernel_kind, kernel_radius) 74 | 75 | ctx.save_for_backward(points, point_features, out_point_ids) 76 | ctx.kernel_kind = kernel_kind 77 | ctx.kernel_radius = kernel_radius 78 | 79 | ctx.mark_non_differentiable(out_point_ids) 80 | 81 | return (out, out_point_ids) 82 | 83 | @staticmethod 84 | def backward(ctx, out_grad, _): 85 | points, point_features, out_point_ids = ctx.saved_tensors 86 | kernel_kind = ctx.kernel_kind 87 | kernel_radius = ctx.kernel_radius 88 | 89 | points_grad, point_features_grad, background_grad = ext.p2i_max_backward_gpu( 90 | out_grad.contiguous(), out_point_ids, points.contiguous(), 91 | point_features.contiguous(), 92 | kernel_kind, kernel_radius) 93 | 94 | return (points_grad, point_features_grad, None, 95 | background_grad, None, None) 96 | 97 | 98 | _p2i_kernel_kind_dict = {'cos': 0, 'gaussian_awing': 1} 99 | 100 | 101 | def p2i(points: torch.Tensor, point_features: torch.Tensor, 102 | batch_inds: torch.Tensor, background: torch.Tensor, 103 | kernel_radius: float, kernel_kind_str: str = 'cos', reduce: str = 'sum', 104 | with_auxilary_output: bool = False 105 | ) -> Union[torch.Tensor, Tuple[torch.Tensor, ...]]: 106 | """Paint point cloud features on to 2D feature maps. 107 | 108 | Args: 109 | points (torch.Tensor): float, [npoints x (X,Y)]. X, Y are all absolute coordinates. 110 | point_features (torch.Tensor): float, [npoints x channels] 111 | batch_inds (torch.Tensor): int32, [npoints] 112 | background (torch.Tensor): float, [batch x channels x out_h x out_w] 113 | kernel_radius (float): 114 | kernel_kind_str (str): {'cos'} 115 | reduce (str): {'sum', 'max'} 116 | 117 | Returns: 118 | - torch.Tensor: float, [batch x channels x out_h x out_w] 119 | """ 120 | kernel_kind = _p2i_kernel_kind_dict[kernel_kind_str] 121 | 122 | assert points.size(0) == point_features.size(0) 123 | assert batch_inds.size(0) == points.size(0) 124 | assert background.size(1) == point_features.size(1) 125 | 126 | points = points[:, [1, 0]] 127 | 128 | if reduce == 'sum': 129 | assert kernel_kind == 0 # other kinds not implemented yet for p2i_sum 130 | result = P2ISumFunction.apply(points, point_features, batch_inds, background, 131 | kernel_kind, kernel_radius) 132 | elif reduce == 'max': 133 | result = P2IMaxFunction.apply(points, point_features, batch_inds, background, 134 | kernel_kind, kernel_radius) 135 | else: 136 | raise RuntimeError(f'Invalid reduce value: {reduce}') 137 | if with_auxilary_output: 138 | return result 139 | return result[0] 140 | -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | #ifdef _OPENMP 9 | #include 10 | #endif 11 | 12 | #ifdef __CUDACC__ 13 | #include 14 | #include 15 | #endif 16 | 17 | #ifdef _MSC_VER 18 | #define FORCE_INLINE __forceinline 19 | #define RESTRICT __restrict 20 | #pragma warning(disable : 4068) 21 | #else 22 | #define FORCE_INLINE __attribute__((always_inline)) 23 | #define RESTRICT __restrict__ 24 | #endif 25 | 26 | #ifdef __CUDACC__ 27 | #define XINLINE __device__ __host__ 28 | #define XGLOBAL __global__ 29 | #define XDEVICE __device__ 30 | #define XSHARED __shared__ 31 | #else 32 | #define XINLINE 33 | #define XGLOBAL 34 | #define XDEVICE 35 | #define XSHARED 36 | #endif 37 | 38 | namespace haya_ext { 39 | #ifdef __CUDACC__ 40 | // Macro for checking cuda errors following a cuda launch or api call 41 | #define cudaCheckError() \ 42 | do { \ 43 | cudaError_t e = cudaGetLastError(); \ 44 | if (e != cudaSuccess) { \ 45 | char buffer[512] = {'\0'}; \ 46 | sprintf(buffer, "Cuda failure %s:%d: '%s(%s)'", __FILE__, __LINE__, \ 47 | cudaGetErrorName(e), cudaGetErrorString(e)); \ 48 | AT_ERROR(buffer); \ 49 | } \ 50 | } while (0) 51 | #else 52 | #define cudaCheckError() 53 | #endif 54 | 55 | struct cpu_device {}; 56 | struct gpu_device {}; 57 | 58 | template struct kernel; 59 | 60 | template <> struct kernel { 61 | template 62 | inline static FORCE_INLINE void launch(OP op, const int N, Args... args) { 63 | #ifdef _OPENMP 64 | const int omp_cores = omp_get_thread_num(); 65 | if (omp_cores <= 1) { 66 | // Zero means not to use OMP, but don't interfere with external OMP 67 | // behavior 68 | for (int i = 0; i < N; ++i) { 69 | op(i, args...); 70 | } 71 | } else { 72 | #pragma omp parallel for num_threads(omp_cores) 73 | for (int i = 0; i < N; ++i) { 74 | op(i, args...); 75 | } 76 | } 77 | #else 78 | for (int i = 0; i < N; ++i) { 79 | op(i, args...); 80 | } 81 | #endif 82 | } 83 | }; 84 | 85 | #if defined(NO_CUDA) // try launching gpu kernel from a no cuda build 86 | template <> struct kernel { 87 | template 88 | inline static FORCE_INLINE void launch(OP op, const int N, Args... args) { 89 | AT_ERROR("failed to launch cuda kernel in a NO CUDA build"); 90 | } 91 | }; 92 | #elif defined(__CUDACC__) // launching gpu kernel within nvcc compilation 93 | namespace detail { 94 | constexpr int kMaxThreadsPerBlock = 1024; 95 | constexpr int kMaxGridNum = 65535; 96 | constexpr int kBaseThreadBits = 8; 97 | constexpr int kBaseThreadNum = 1 << kBaseThreadBits; 98 | constexpr int kBaseGridNum = 1024; 99 | 100 | template 101 | XGLOBAL void _generic_kernel(OP op, int N, Args... args) { 102 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; 103 | i += blockDim.x * gridDim.x) { 104 | op(i, args...); 105 | } 106 | } 107 | } // namespace detail 108 | 109 | template <> struct kernel { 110 | template 111 | inline static FORCE_INLINE void launch(OP op, const int N, Args... args) { 112 | static_assert(std::is_class::value, 113 | "You should pass a functor (including lambda) to " 114 | "kernel::launch. Passing a function pointer " 115 | "will cause cuda error in runtime."); 116 | const dim3 blocks = 117 | (N + detail::kBaseThreadNum - 1) / detail::kBaseThreadNum; 118 | detail::_generic_kernel 119 | <<>>(op, N, args...); 120 | } 121 | template 122 | inline static FORCE_INLINE void launch_max_threads(OP op, const int N, 123 | Args... args) { 124 | static_assert(std::is_class::value, 125 | "You should pass a functor (including lambda) to " 126 | "kernel::launch. Passing a function pointer " 127 | "will cause cuda error in runtime."); 128 | const dim3 blocks = 129 | (N + detail::kMaxThreadsPerBlock - 1) / detail::kMaxThreadsPerBlock; 130 | detail::_generic_kernel 131 | <<>>(op, N, args...); 132 | } 133 | }; 134 | #else // try launching gpu kernel without nvcc compilation, this should not 135 | // compile 136 | namespace detail { 137 | template struct always_false { 138 | static constexpr bool value = false; 139 | }; 140 | } // namespace detail 141 | template <> struct kernel { 142 | template 143 | inline static FORCE_INLINE void launch(OP op, const int N, Args... args) { 144 | static_assert(detail::always_false::value, 145 | "trying to instantiate gpu kernel under non cuda context"); 146 | } 147 | }; 148 | #endif 149 | } // namespace haya_ext 150 | -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/ext.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include 5 | 6 | #include "p2i_max.h" 7 | #include "p2i_sum.h" 8 | 9 | using namespace haya_ext; 10 | 11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 12 | m.def("p2i_sum_forward_gpu", &p2i_sum_forward_gpu, "p2i sum forward (CUDA)"); 13 | m.def("p2i_sum_backward_gpu", &p2i_sum_backward_gpu, 14 | "p2i sum backward (CUDA)"); 15 | 16 | m.def("p2i_max_forward_gpu", &p2i_max_forward_gpu, "p2i max forward (CUDA)"); 17 | m.def("p2i_max_backward_gpu", &p2i_max_backward_gpu, 18 | "p2i max backward (CUDA)"); 19 | } -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/p2i_max.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "p2i_max.h" 5 | 6 | namespace haya_ext { 7 | std::vector 8 | p2i_max_forward_gpu(const at::Tensor &points, const at::Tensor &point_features, 9 | const at::Tensor &batch_inds, const at::Tensor &background, 10 | int kernel_kind, double kernel_radius) { 11 | return p2i_max_op::forward(points, point_features, batch_inds, 12 | background, kernel_kind, 13 | kernel_radius); 14 | } 15 | 16 | std::vector p2i_max_backward_gpu(const at::Tensor &out_grad, 17 | const at::Tensor &out_point_ids, 18 | const at::Tensor &points, 19 | const at::Tensor &point_features, 20 | int kernel_kind, 21 | double kernel_radius) { 22 | return p2i_max_op::backward(out_grad, out_point_ids, points, 23 | point_features, kernel_kind, 24 | kernel_radius); 25 | } 26 | } // namespace haya_ext -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/p2i_max.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | #include "utility.h" 7 | 8 | namespace haya_ext { 9 | 10 | template struct p2i_max_forward_kernel { 11 | XDEVICE void 12 | operator()(int id, 13 | const scalar_t *RESTRICT points, // npoints x 2 14 | const scalar_t *RESTRICT point_features, // npoints x channels 15 | const int32_t *RESTRICT batch_inds, // npoints 16 | scalar_t *RESTRICT out, // batch x channels x out_h x out_w 17 | int32_t *RESTRICT out_point_ids, // batch x channels x out_h x 18 | // out_w, stores point_ids 19 | int32_t *RESTRICT out_lock, // batch x channels x out_h x out_w 20 | int batch, int npoints, int channels, int kernel_kind, 21 | scalar_t kernel_radius, int out_h, int out_w) const { 22 | 23 | // npoints x channels 24 | if (id >= npoints * channels) { 25 | return; 26 | } 27 | const int point_feature_offset = id; 28 | const int channel_id = id % channels; 29 | id = id / channels; 30 | const int point_id = id % npoints; 31 | const int32_t batch_id = batch_inds[point_id]; 32 | if (batch_id < 0 || batch_id >= batch) { 33 | return; 34 | } 35 | 36 | const scalar_t point_y = points[point_id * 2 + 0]; 37 | const scalar_t point_x = points[point_id * 2 + 1]; 38 | 39 | for_each_pixel_near_point( 40 | point_y, point_x, out_h, out_w, kernel_radius, 41 | [=] XDEVICE(int y, int x, scalar_t dy, scalar_t dx, scalar_t r) { 42 | scalar_t weight = 0; 43 | switch (kernel_kind) { 44 | case 0: // cos 45 | weight = cos(r * M_PI / kernel_radius) * 0.5 + 0.5; 46 | break; 47 | case 1: // gaussian_awing 48 | weight = exp(-r * r * 16 / 2 / kernel_radius / kernel_radius); 49 | break; 50 | } 51 | 52 | const scalar_t weighted_value = 53 | point_features[point_feature_offset] * weight; 54 | 55 | // lock, compare and replace 56 | const int index = 57 | ((batch_id * channels + channel_id) * out_h + y) * out_w + x; 58 | bool locked = false; 59 | do { 60 | if (locked = atomic_cas(&out_lock[index], 0, 1) == 0) { 61 | const scalar_t current_value = 62 | atomic_add(&out[index], static_cast(0)); 63 | if (current_value < weighted_value) { 64 | atomic_exch(&(out[index]), weighted_value); 65 | atomic_exch(&(out_point_ids[index]), point_id); 66 | } 67 | atomic_exch(&out_lock[index], 0); 68 | } 69 | } while (!locked); 70 | }); 71 | } 72 | }; 73 | 74 | template struct p2i_max_backward_kernel { 75 | XDEVICE void operator()( 76 | int id, 77 | const scalar_t *RESTRICT out_grad, // batch x channels x out_h x out_w 78 | int32_t *RESTRICT out_point_ids, // batch x channels x out_h x 79 | // out_w, stores point_ids 80 | const scalar_t *RESTRICT points, // npoints x 2 81 | const scalar_t *RESTRICT point_features, // npoints x channels 82 | scalar_t *RESTRICT points_grad, // npoints x 2 83 | scalar_t *RESTRICT point_features_grad, // npoints x channels 84 | scalar_t *RESTRICT background_grad, // batch x channels x out_h x out_w 85 | int batch, int npoints, int channels, int kernel_kind, 86 | scalar_t kernel_radius, int out_h, int out_w) const { 87 | 88 | // batch x channels x out_h x out_w 89 | const int index = id; 90 | if (id >= batch * channels * out_h * out_w) { 91 | return; 92 | } 93 | const int x = id % out_w; 94 | id /= out_w; 95 | const int y = id % out_h; 96 | id /= out_h; 97 | 98 | const int channel_id = id % channels; 99 | id /= channels; 100 | const int batch_id = id % batch; 101 | 102 | const scalar_t out_grad_value = out_grad[index]; 103 | 104 | const int point_id = out_point_ids[index]; 105 | if (point_id < 0) { // background here, no grads to points or point_features 106 | atomic_add(&(background_grad[index]), out_grad_value); 107 | return; 108 | } 109 | 110 | const int point_y_offset = point_id * 2 + 0; 111 | const int point_x_offset = point_id * 2 + 1; 112 | const scalar_t point_y = points[point_y_offset]; 113 | const scalar_t point_x = points[point_x_offset]; 114 | 115 | const scalar_t dx = x - point_x, dy = y - point_y; 116 | const scalar_t r = sqrt(dx * dx + dy * dy); 117 | 118 | scalar_t weight = 0; 119 | switch (kernel_kind) { 120 | case 0: // cos 121 | weight = cos(r * M_PI / kernel_radius) * 0.5 + 0.5; 122 | break; 123 | case 1: // gaussian_awing (sigma=0.25) 124 | weight = exp(-(dx*dx + dy+dy) * 16.0 / 2.0 / kernel_radius / kernel_radius); 125 | break; 126 | } 127 | 128 | const int point_feature_offset = point_id * channels + channel_id; 129 | const scalar_t point_feature_value = point_features[point_feature_offset]; 130 | 131 | // grad of point feature 132 | atomic_add(&(point_features_grad[point_feature_offset]), 133 | out_grad_value * weight); 134 | 135 | // grad of weight 136 | const scalar_t weight_grad_value = out_grad_value * point_feature_value; 137 | 138 | // grad of point_y, point_x 139 | scalar_t point_y_grad = 0, point_x_grad = 0; 140 | switch (kernel_kind) { 141 | case 0: { // cos 142 | // weight = cos(r * M_PI / kernel_radius) * 0.5 + 0.5; 143 | const scalar_t k = weight_grad_value * sin(r * M_PI / kernel_radius) * 144 | 0.5 * M_PI / kernel_radius / 145 | max(r, static_cast(1e-10)); 146 | point_y_grad = k * dy; 147 | point_x_grad = k * dx; 148 | break; 149 | } 150 | case 1: { // gaussian_awing (sigma=0.25) 151 | // weight = exp(-r * r * 16 / 2 / kernel_radius / kernel_radius); 152 | const scalar_t c = 153 | static_cast(16.0f) / 2 / kernel_radius / kernel_radius; 154 | const scalar_t k = weight_grad_value * exp(-c * r * r) * (-2 * c); 155 | point_y_grad = k * dy; 156 | point_x_grad = k * dx; 157 | break; 158 | } 159 | } 160 | atomic_add(&(points_grad[point_y_offset]), point_y_grad); 161 | atomic_add(&(points_grad[point_x_offset]), point_x_grad); 162 | } 163 | }; 164 | 165 | struct p2i_max_op { 166 | template 167 | static std::vector 168 | forward(const at::Tensor &points, const at::Tensor &point_features, 169 | const at::Tensor &batch_inds, const at::Tensor &background, 170 | int kernel_kind, double kernel_radius) { 171 | // inputs: 172 | // - points: float, [npoints x 2] 173 | // - point_features: float, [npoints x channels] 174 | // - batch_inds: int32, [npoints] 175 | // - background: float, [batch x channels x out_h x out_w] 176 | // returns: 177 | // - output: float, [batch x channels x out_h x out_w] 178 | // - out_point_ids: int32, [batch x channels x out_h x out_w] 179 | 180 | auto npoints = points.size(0); 181 | auto channels = point_features.size(1); 182 | 183 | auto batch = background.size(0); 184 | auto out_h = background.size(2); 185 | auto out_w = background.size(3); 186 | 187 | at::Tensor out = background.clone(); 188 | at::Tensor out_lock = at::zeros({batch, channels, out_h, out_w}, 189 | points.options().dtype(at::kInt)); 190 | at::Tensor out_point_ids = at::full({batch, channels, out_h, out_w}, -1, 191 | background.options().dtype(at::kInt)); 192 | 193 | auto N = npoints * channels; 194 | 195 | AT_DISPATCH_FLOATING_TYPES( 196 | points.type(), "p2i_max_op::forward", ([&] { 197 | kernel::launch( 198 | p2i_max_forward_kernel(), N, points.data(), 199 | point_features.data(), batch_inds.data(), 200 | out.data(), out_point_ids.data(), 201 | out_lock.data(), batch, npoints, channels, kernel_kind, 202 | static_cast(kernel_radius), out_h, out_w); 203 | })); 204 | 205 | cudaCheckError(); 206 | return {out, out_point_ids}; 207 | } 208 | 209 | template 210 | static std::vector 211 | backward(const at::Tensor &out_grad, const at::Tensor &out_point_ids, 212 | const at::Tensor &points, const at::Tensor &point_features, 213 | int kernel_kind, double kernel_radius) { 214 | // inputs: 215 | // - out_grad: float, [batch x channels x out_h x out_w] 216 | // - out_point_ids: int32, [batch x channels x out_h x out_w] 217 | // - points: float, [npoints x 2] 218 | // - point_features: float, [npoints x channels] 219 | // returns: 220 | // - points_grad: float, [npoints x 2] 221 | // - point_features_grad: float, [npoints x channels] 222 | // - background_grad: float, [batch x channels x out_h x out_w] 223 | 224 | auto npoints = points.size(0); 225 | auto channels = point_features.size(1); 226 | 227 | auto batch = out_grad.size(0); 228 | auto out_h = out_grad.size(2); 229 | auto out_w = out_grad.size(3); 230 | 231 | at::Tensor points_grad = at::zeros_like(points); 232 | at::Tensor point_features_grad = at::zeros_like(point_features); 233 | at::Tensor background_grad = at::zeros_like(out_grad); 234 | 235 | auto N = batch * channels * out_h * out_w; 236 | AT_DISPATCH_FLOATING_TYPES( 237 | points.type(), "p2i_max_op::backward", ([&] { 238 | kernel::launch( 239 | p2i_max_backward_kernel(), N, out_grad.data(), 240 | out_point_ids.data(), points.data(), 241 | point_features.data(), points_grad.data(), 242 | point_features_grad.data(), 243 | background_grad.data(), batch, npoints, channels, 244 | kernel_kind, static_cast(kernel_radius), out_h, out_w); 245 | })); 246 | 247 | cudaCheckError(); 248 | return {points_grad, point_features_grad, background_grad}; 249 | } 250 | }; 251 | 252 | std::vector 253 | p2i_max_forward_gpu(const at::Tensor &points, const at::Tensor &point_features, 254 | const at::Tensor &batch_inds, const at::Tensor &background, 255 | int kernel_kind, double kernel_radius); 256 | 257 | std::vector p2i_max_backward_gpu(const at::Tensor &out_grad, 258 | const at::Tensor &out_point_ids, 259 | const at::Tensor &points, 260 | const at::Tensor &point_features, 261 | int kernel_kind, 262 | double kernel_radius); 263 | } // namespace haya_ext -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/p2i_sum.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "p2i_sum.h" 5 | 6 | namespace haya_ext { 7 | at::Tensor p2i_sum_forward_gpu(const at::Tensor &points, 8 | const at::Tensor &point_features, 9 | const at::Tensor &batch_inds, 10 | const at::Tensor &background, int kernel_kind, 11 | double kernel_radius) { 12 | return p2i_sum_op::forward(points, point_features, batch_inds, 13 | background, kernel_kind, 14 | kernel_radius); 15 | } 16 | 17 | std::vector p2i_sum_backward_gpu(const at::Tensor &out_grad, 18 | const at::Tensor &points, 19 | const at::Tensor &point_features, 20 | const at::Tensor &batch_inds, 21 | int kernel_kind, 22 | double kernel_radius) { 23 | return p2i_sum_op::backward( 24 | out_grad, points, point_features, batch_inds, kernel_kind, kernel_radius); 25 | } 26 | } // namespace haya_ext -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/p2i_sum.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | #include "utility.h" 7 | 8 | namespace haya_ext { 9 | 10 | template struct p2i_sum_forward_kernel { 11 | XDEVICE void 12 | operator()(int id, 13 | const scalar_t *RESTRICT points, // npoints x 2 14 | const scalar_t *RESTRICT point_features, // npoints x channels 15 | const int32_t *RESTRICT batch_inds, // npoints 16 | scalar_t *RESTRICT out, // batch x channels x out_h x out_w 17 | int batch, int npoints, int channels, int kernel_kind, 18 | scalar_t kernel_radius, int out_h, int out_w) const { 19 | 20 | // npoints x channels 21 | if (id >= npoints * channels) { 22 | return; 23 | } 24 | const int point_feature_offset = id; 25 | const int channel_id = id % channels; 26 | id = id / channels; 27 | const int point_id = id % npoints; 28 | const int32_t batch_id = batch_inds[point_id]; 29 | if (batch_id < 0 || batch_id >= batch) { 30 | return; 31 | } 32 | 33 | const scalar_t point_y = points[point_id * 2 + 0]; 34 | const scalar_t point_x = points[point_id * 2 + 1]; 35 | 36 | for_each_pixel_near_point( 37 | point_y, point_x, out_h, out_w, kernel_radius, 38 | [=] XDEVICE(int y, int x, scalar_t dy, scalar_t dx, scalar_t r) { 39 | // lock, compare and replace 40 | const int index = 41 | ((batch_id * channels + channel_id) * out_h + y) * out_w + x; 42 | 43 | scalar_t weight = 0; 44 | switch (kernel_kind) { 45 | case 0: // cos 46 | weight = cos(r * M_PI / kernel_radius) * 0.5 + 0.5; 47 | break; 48 | } 49 | 50 | const scalar_t feature_value = point_features[point_feature_offset]; 51 | atomic_add(&(out[index]), weight * feature_value); 52 | }); 53 | } 54 | }; 55 | 56 | template struct p2i_sum_backward_kernel { 57 | XDEVICE void operator()( 58 | int id, 59 | const scalar_t *RESTRICT out_grad, // batch x channels x out_h x out_w 60 | const scalar_t *RESTRICT points, // npoints x 2 61 | const scalar_t *RESTRICT point_features, // npoints x channels 62 | const int32_t *RESTRICT batch_inds, // npoints 63 | scalar_t *RESTRICT points_grad, // npoints x 2 64 | scalar_t *RESTRICT point_features_grad, // npoints x channels 65 | int batch, int npoints, int channels, int kernel_kind, 66 | scalar_t kernel_radius, int out_h, int out_w) const { 67 | 68 | // npoints x channels 69 | if (id >= npoints * channels) { 70 | return; 71 | } 72 | const int point_feature_offset = id; 73 | const int channel_id = id % channels; 74 | id = id / channels; 75 | const int point_id = id % npoints; 76 | const int32_t batch_id = batch_inds[point_id]; 77 | if (batch_id < 0 || batch_id >= batch) { 78 | return; 79 | } 80 | 81 | const int point_y_offset = point_id * 2 + 0; 82 | const int point_x_offset = point_id * 2 + 1; 83 | const scalar_t point_y = points[point_y_offset]; 84 | const scalar_t point_x = points[point_x_offset]; 85 | 86 | for_each_pixel_near_point( 87 | point_y, point_x, out_h, out_w, kernel_radius, 88 | [=] XDEVICE(int y, int x, scalar_t dy, scalar_t dx, scalar_t r) { 89 | scalar_t weight = 0; 90 | const scalar_t r_X_PI_DIV_kernel_radius = r * M_PI / kernel_radius; 91 | switch (kernel_kind) { 92 | case 0: // cos 93 | weight = cos(r_X_PI_DIV_kernel_radius) * 0.5 + 0.5; 94 | break; 95 | } 96 | 97 | scalar_t point_feature_value = point_features[point_feature_offset]; 98 | 99 | // forward: out_value = point_feature_value * weight 100 | const int out_offset = 101 | ((batch_id * channels + channel_id) * out_h + y) * out_w + x; 102 | const scalar_t out_grad_value = out_grad[out_offset]; 103 | 104 | // grad of point feature 105 | atomic_add(&(point_features_grad[point_feature_offset]), 106 | out_grad_value * weight); 107 | 108 | // grad of weight 109 | const scalar_t weight_grad_value = 110 | out_grad_value * point_feature_value; 111 | 112 | // grad of point_y, point_x 113 | scalar_t point_y_grad = 0, point_x_grad = 0; 114 | switch (kernel_kind) { 115 | case 0: // cos 116 | // weight = cos(r * M_PI / kernel_radius) * 0.5 + 0.5; 117 | const scalar_t f = 0.5 * M_PI / kernel_radius; 118 | point_y_grad = weight_grad_value * sin(r_X_PI_DIV_kernel_radius) * 119 | f * dy / max(r, static_cast(1e-10)); 120 | point_x_grad = weight_grad_value * sin(r_X_PI_DIV_kernel_radius) * 121 | f * dx / max(r, static_cast(1e-10)); 122 | break; 123 | } 124 | atomic_add(&(points_grad[point_y_offset]), point_y_grad); 125 | atomic_add(&(points_grad[point_x_offset]), point_x_grad); 126 | }); 127 | } 128 | }; 129 | 130 | struct p2i_sum_op { 131 | template 132 | static at::Tensor 133 | forward(const at::Tensor &points, const at::Tensor &point_features, 134 | const at::Tensor &batch_inds, const at::Tensor &background, 135 | int kernel_kind, double kernel_radius) { 136 | // inputs: 137 | // - points: float, [npoints x 2] 138 | // - point_features: float, [npoints x channels] 139 | // - batch_inds: int32, [npoints] 140 | // - background: float, [batch x channels x out_h x out_w] 141 | // returns: 142 | // - output: float, [batch x channels x out_h x out_w] 143 | 144 | auto npoints = points.size(0); 145 | auto channels = point_features.size(1); 146 | 147 | auto batch = background.size(0); 148 | auto out_h = background.size(2); 149 | auto out_w = background.size(3); 150 | 151 | at::Tensor out = background.clone(); 152 | 153 | auto N = npoints * channels; 154 | 155 | AT_DISPATCH_FLOATING_TYPES( 156 | points.type(), "p2i_sum_op::forward", ([&] { 157 | kernel::launch( 158 | p2i_sum_forward_kernel(), N, points.data(), 159 | point_features.data(), batch_inds.data(), 160 | out.data(), batch, npoints, channels, kernel_kind, 161 | static_cast(kernel_radius), out_h, out_w); 162 | })); 163 | 164 | cudaCheckError(); 165 | return out; 166 | } 167 | 168 | template 169 | static std::vector 170 | backward(const at::Tensor &out_grad, const at::Tensor &points, 171 | const at::Tensor &point_features, const at::Tensor &batch_inds, 172 | int kernel_kind, double kernel_radius) { 173 | // inputs: 174 | // - out_grad: float, [batch x channels x out_h x out_w] 175 | // - points: float, [npoints x 2] 176 | // - point_features: float, [npoints x channels] 177 | // - batch: int32, [npoints] 178 | // returns: 179 | // - points_grad: float, [npoints x 2] 180 | // - point_features_grad: float, [npoints x channels] 181 | 182 | auto npoints = points.size(0); 183 | auto channels = point_features.size(1); 184 | 185 | auto batch = out_grad.size(0); 186 | auto out_h = out_grad.size(2); 187 | auto out_w = out_grad.size(3); 188 | 189 | at::Tensor points_grad = at::zeros_like(points); 190 | at::Tensor point_features_grad = at::zeros_like(point_features); 191 | 192 | auto N = npoints * channels; 193 | AT_DISPATCH_FLOATING_TYPES( 194 | points.type(), "p2i_sum_op::backward", ([&] { 195 | kernel::launch( 196 | p2i_sum_backward_kernel(), N, out_grad.data(), 197 | points.data(), point_features.data(), 198 | batch_inds.data(), points_grad.data(), 199 | point_features_grad.data(), batch, npoints, channels, 200 | kernel_kind, static_cast(kernel_radius), out_h, out_w); 201 | })); 202 | 203 | cudaCheckError(); 204 | return {points_grad, point_features_grad}; 205 | } 206 | }; 207 | 208 | at::Tensor p2i_sum_forward_gpu(const at::Tensor &points, 209 | const at::Tensor &point_features, 210 | const at::Tensor &batch_inds, 211 | const at::Tensor &background, int kernel_kind, 212 | double kernel_radius); 213 | 214 | std::vector p2i_sum_backward_gpu(const at::Tensor &out_grad, 215 | const at::Tensor &points, 216 | const at::Tensor &point_features, 217 | const at::Tensor &batch_inds, 218 | int kernel_kind, 219 | double kernel_radius); 220 | } // namespace haya_ext -------------------------------------------------------------------------------- /farl/network/ext/p2i_ops/sample.ipynb: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f7d4c2c0c21613b6c6d6bad83a10723b21a3606c4156d39551d4adba13ef47e1 3 | size 2330 4 | -------------------------------------------------------------------------------- /farl/network/farl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from .model import load_farl -------------------------------------------------------------------------------- /farl/network/geometry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Tuple, Union 5 | 6 | import torch 7 | 8 | from .ext.p2i_ops import p2i 9 | 10 | 11 | def normalize_points(points: torch.Tensor, h: int, w: int) -> torch.Tensor: 12 | """ Normalize coordinates to [0, 1]. 13 | """ 14 | return (points + 0.5) / torch.tensor([[[w, h]]]).to(points) 15 | 16 | 17 | def denormalize_points(normalized_points: torch.Tensor, h: int, w: int) -> torch.Tensor: 18 | """ Reverse normalize_points. 19 | """ 20 | return normalized_points * torch.tensor([[[w, h]]]).to(normalized_points) - 0.5 21 | 22 | 23 | def points2heatmap(normalized_points, heatmap_size: Tuple[int, int], kernel_radius: float): 24 | """ Normalized points [b x npoints x 2(XY)] -> heatmaps. 25 | """ 26 | batch, npoints, _ = normalized_points.shape 27 | out_h, out_w = heatmap_size 28 | 29 | points = denormalize_points(normalized_points, out_h, out_w) 30 | 31 | # (batch x npoints) x 1 x h x w 32 | heatmap = torch.zeros( 33 | batch * npoints, 1, out_h, out_w).to(points) 34 | # (batch x npoints) x 2 35 | points_flatten = points.view(-1, 2) 36 | # (batch x npoints) 37 | batch_inds = torch.arange( 38 | batch * npoints, dtype=torch.int32).cuda() 39 | # (batch x npoints) x 1 40 | points_color = torch.ones( 41 | points_flatten.size(0), 1).to(points_flatten) 42 | # (batch x npoints) x 1 x h x w 43 | heatmap = p2i(points_flatten, points_color, batch_inds=batch_inds, background=heatmap, 44 | kernel_radius=kernel_radius, 45 | kernel_kind_str='gaussian_awing', reduce='max') 46 | # batch x npoints x h x w 47 | heatmap = heatmap.reshape(batch, npoints, out_h, out_w) 48 | return heatmap 49 | 50 | 51 | def heatmap2points(heatmap, t_scale: Union[None, float, torch.Tensor] = None): 52 | """ Heatmaps -> normalized points [b x npoints x 2(XY)]. 53 | """ 54 | dtype = heatmap.dtype 55 | _, _, h, w = heatmap.shape 56 | 57 | # 0 ~ h-1, 0 ~ w-1 58 | yy, xx = torch.meshgrid( 59 | torch.arange(h).float(), 60 | torch.arange(w).float()) 61 | 62 | yy = yy.view(1, 1, h, w).to(heatmap) 63 | xx = xx.view(1, 1, h, w).to(heatmap) 64 | 65 | if t_scale is not None: 66 | heatmap = (heatmap * t_scale).exp() 67 | heatmap_sum = torch.clamp(heatmap.sum([2, 3]), min=1e-6) 68 | 69 | yy_coord = (yy * heatmap).sum([2, 3]) / heatmap_sum # b x npoints 70 | xx_coord = (xx * heatmap).sum([2, 3]) / heatmap_sum # b x npoints 71 | 72 | points = torch.stack([xx_coord, yy_coord], dim=-1) # b x npoints x 2 73 | 74 | normalized_points = normalize_points(points, h, w) 75 | return normalized_points 76 | -------------------------------------------------------------------------------- /farl/network/mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch.nn as nn 5 | 6 | 7 | class MMSEG_UPerHead(nn.Module): 8 | """Wraps the UPerHead from mmseg for segmentation. 9 | """ 10 | 11 | def __init__(self, num_classes: int, 12 | in_channels: list = [384, 384, 384, 384], channels: int = 512): 13 | super().__init__() 14 | 15 | from mmseg.models.decode_heads import UPerHead 16 | self.head = UPerHead( 17 | in_channels=in_channels, 18 | in_index=[0, 1, 2, 3], 19 | pool_scales=(1, 2, 3, 6), 20 | channels=channels, 21 | dropout_ratio=0.1, 22 | num_classes=num_classes, 23 | norm_cfg=dict(type='SyncBN', requires_grad=True), 24 | align_corners=False, 25 | loss_decode=dict( 26 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) 27 | 28 | def forward(self, inputs): 29 | return self.head(inputs) 30 | -------------------------------------------------------------------------------- /farl/network/transformers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import math 5 | 6 | from typing import Optional, List, Tuple 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | from blueprint.ml.util import deal_with_remote_file 14 | from blueprint import Context 15 | 16 | from . import farl 17 | 18 | 19 | def _make_fpns(vision_patch_size: int, output_channels: int): 20 | if vision_patch_size in {16, 14}: 21 | fpn1 = nn.Sequential( 22 | nn.ConvTranspose2d(output_channels, output_channels, 23 | kernel_size=2, stride=2), 24 | nn.SyncBatchNorm(output_channels), 25 | nn.GELU(), 26 | nn.ConvTranspose2d(output_channels, output_channels, kernel_size=2, stride=2)) 27 | 28 | fpn2 = nn.ConvTranspose2d( 29 | output_channels, output_channels, kernel_size=2, stride=2) 30 | fpn3 = nn.Identity() 31 | fpn4 = nn.MaxPool2d(kernel_size=2, stride=2) 32 | return nn.ModuleList([fpn1, fpn2, fpn3, fpn4]) 33 | elif vision_patch_size == 8: 34 | fpn1 = nn.Sequential(nn.ConvTranspose2d( 35 | output_channels, output_channels, kernel_size=2, stride=2)) 36 | fpn2 = nn.Identity() 37 | fpn3 = nn.MaxPool2d(kernel_size=2, stride=2) 38 | fpn4 = nn.MaxPool2d(kernel_size=4, stride=4) 39 | return nn.ModuleList([fpn1, fpn2, fpn3, fpn4]) 40 | else: 41 | raise NotImplementedError() 42 | 43 | 44 | def _resize_pe(pe: torch.Tensor, new_size: int, mode: str = 'bicubic', num_tokens: int = 1) -> torch.Tensor: 45 | """Resize positional embeddings. 46 | 47 | Args: 48 | pe (torch.Tensor): A tensor with shape (num_tokens + old_size ** 2, width). pe[0, :] is the CLS token. 49 | 50 | Returns: 51 | torch.Tensor: A tensor with shape (num_tokens + new_size **2, width). 52 | """ 53 | l, w = pe.shape 54 | old_size = int(math.sqrt(l-num_tokens)) 55 | assert old_size ** 2 + num_tokens == l 56 | return torch.cat([ 57 | pe[:num_tokens, :], 58 | F.interpolate(pe[num_tokens:, :].reshape(1, old_size, old_size, w).permute(0, 3, 1, 2), 59 | (new_size, new_size), mode=mode, align_corners=False).view(w, -1).t()], dim=0) 60 | 61 | 62 | class FaRLVisualFeatures(nn.Module): 63 | """Extract features from FaRL visual encoder. 64 | 65 | Args: 66 | image (torch.Tensor): Float32 tensor with shape [b, 3, h, w], 67 | normalized to [0, 1]. 68 | 69 | Returns: 70 | List[torch.Tensor]: A list of features. 71 | """ 72 | image_mean: torch.Tensor 73 | image_std: torch.Tensor 74 | output_channels: int 75 | num_outputs: int 76 | 77 | def __init__(self, model_type: str, 78 | model_path: str, output_indices: Optional[List[int]] = None, 79 | forced_input_resolution: Optional[int] = None, 80 | apply_fpn: bool = True, _ctx: Optional[Context] = None): 81 | super().__init__() 82 | 83 | model_path = deal_with_remote_file( 84 | model_path, _ctx.copy2local, _ctx.blob_root) 85 | self.visual = farl.load_farl(model_type, model_path) 86 | 87 | vision_patch_size = self.visual.conv1.weight.shape[-1] 88 | 89 | self.input_resolution = self.visual.input_resolution 90 | if forced_input_resolution is not None and \ 91 | self.input_resolution != forced_input_resolution: 92 | # resizing the positonal embeddings 93 | self.visual.positional_embedding = nn.Parameter( 94 | _resize_pe(self.visual.positional_embedding, 95 | forced_input_resolution//vision_patch_size)) 96 | self.input_resolution = forced_input_resolution 97 | 98 | self.output_channels = self.visual.transformer.width 99 | 100 | if output_indices is None: 101 | output_indices = self.__class__.get_default_output_indices( 102 | model_type) 103 | self.output_indices = output_indices 104 | self.num_outputs = len(output_indices) 105 | 106 | self.register_buffer('image_mean', torch.tensor( 107 | [0.48145466, 0.4578275, 0.40821073]).view(1, 3, 1, 1)) 108 | self.register_buffer('image_std', torch.tensor( 109 | [0.26862954, 0.26130258, 0.27577711]).view(1, 3, 1, 1)) 110 | 111 | if apply_fpn: 112 | self.fpns = _make_fpns(vision_patch_size, self.output_channels) 113 | else: 114 | self.fpns = None 115 | 116 | @staticmethod 117 | def get_output_channel(model_type): 118 | if model_type == 'base': 119 | return 768 120 | if model_type == 'large': 121 | return 1024 122 | if model_type == 'huge': 123 | return 1280 124 | 125 | @staticmethod 126 | def get_default_output_indices(model_type): 127 | if model_type == 'base': 128 | return [3, 5, 7, 11] 129 | if model_type == 'large': 130 | return [7, 11, 15, 23] 131 | if model_type == 'huge': 132 | return [8, 14, 20, 31] 133 | 134 | def forward(self, image: torch.Tensor) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: 135 | # b x 3 x res x res 136 | _, _, input_h, input_w = image.shape 137 | if input_h != self.input_resolution or input_w != self.input_resolution: 138 | image = F.interpolate(image, self.input_resolution, 139 | mode='bilinear', align_corners=False) 140 | image = (image - self.image_mean) / self.image_std 141 | 142 | x = image.to(self.visual.conv1.weight.data) 143 | 144 | x = self.visual.conv1(x) # shape = [*, width, grid, grid] 145 | N, _, S, S = x.shape 146 | 147 | # shape = [*, width, grid ** 2] 148 | x = x.reshape(x.shape[0], x.shape[1], -1) 149 | x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] 150 | x = torch.cat([self.visual.class_embedding.to(x.dtype) + 151 | torch.zeros(x.shape[0], 1, x.shape[-1], 152 | dtype=x.dtype, device=x.device), 153 | x], dim=1) # shape = [*, grid ** 2 + 1, width] 154 | 155 | x = x + self.visual.positional_embedding.to(x.dtype) 156 | 157 | x = self.visual.ln_pre(x) 158 | 159 | x = x.permute(1, 0, 2).contiguous() # NLD -> LND 160 | 161 | features = [] 162 | cls_tokens = [] 163 | for blk in self.visual.transformer.resblocks: 164 | x = blk(x) # [S ** 2 + 1, N, D] 165 | # if idx in self.output_indices: 166 | feature = x[1:, :, :].permute( 167 | 1, 2, 0).view(N, -1, S, S).contiguous().float() 168 | features.append(feature) 169 | cls_tokens.append(x[0, :, :]) 170 | 171 | features = [features[ind] for ind in self.output_indices] 172 | cls_tokens = [cls_tokens[ind] for ind in self.output_indices] 173 | 174 | if self.fpns is not None: 175 | for i, fpn in enumerate(self.fpns): 176 | features[i] = fpn(features[i]) 177 | 178 | return features, cls_tokens 179 | -------------------------------------------------------------------------------- /farl/network/viz.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | from .geometry import denormalize_points 8 | from .ext.p2i_ops import p2i 9 | 10 | 11 | def _expand_as_rgbs(x): 12 | _, c, _, _ = x.shape 13 | if c == 3: 14 | return [x] 15 | 16 | if c % 3 > 0: 17 | x = torch.cat([ 18 | x, x[:, [-1], :, :].expand( 19 | -1, 3 - c % 3, -1, -1)], dim=1) 20 | c = x.size(1) 21 | assert c % 3 == 0 22 | return list(x.split([3] * (c // 3), dim=1)) 23 | 24 | 25 | def _visualize_flags(flags, size, num_flags): 26 | batch_size = flags.size(0) 27 | flags = flags.to(dtype=torch.uint8) 28 | has_what = [flags & torch.full_like(flags, 1 << i) 29 | for i in range(num_flags)] 30 | # batch x 1 x 1 x 4 31 | vis_im = torch.stack(has_what, dim=1).float().view( 32 | batch_size, 1, 1, num_flags) 33 | vis_im = F.interpolate(vis_im.expand(-1, 3, -1, -1), 34 | size=size, mode='nearest') 35 | return vis_im 36 | 37 | 38 | def visualize_in_row(*data) -> torch.Tensor: 39 | """Visualize data in one row. 40 | 41 | Args: 42 | *data (list): A list of (value, modal, [v_min, v_max]) tuples. 43 | 44 | Each tuple defines the following inputs: 45 | 46 | value (torch.Tensor): The data value to visualize. 47 | modal (str): The modal type string of the data. 48 | Supported data modal types are: 49 | 50 | * "BHW", "BNHW", "BHWN" for tensors; 51 | * "flags_{K}" for binary flags, with K being the number of bits; 52 | * "points" for points, where `value` is a tensor with shape [B, N, 2]. 53 | 54 | v_min (float): Optional, to normalize value. 55 | v_max (float): Optional, to normalize value. 56 | 57 | Returns: 58 | torch.Tensor: A tensor with shape b x 3 x h x w. 59 | """ 60 | batch = None 61 | size = None 62 | device = None 63 | 64 | row = [] 65 | for v in data: 66 | assert isinstance(v, (tuple, list)) 67 | if len(v) == 2: 68 | value, modal = v 69 | v_min, v_max = 0.0, 1.0 70 | elif len(v) == 4: 71 | value, modal, v_min, v_max = v 72 | else: 73 | raise RuntimeError( 74 | 'Input either (value, modal) or (value, modal, v_min, v_max)') 75 | 76 | if value is None: 77 | assert batch is not None 78 | assert size is not None 79 | assert device is not None 80 | value = torch.rand(batch, 1, size[0], size[1], device=device) 81 | modal = 'BNHW' 82 | v_min, v_max = 0.0, 1.0 83 | 84 | if modal == 'BHW': 85 | assert isinstance(value, torch.Tensor) 86 | value = value.detach().float() 87 | 88 | batch = value.size(0) 89 | size = value.shape[1:] 90 | device = value.device 91 | 92 | value = (value - v_min) / (v_max - v_min) 93 | row.append(value.unsqueeze( 94 | 1).expand(-1, 3, -1, -1)) 95 | 96 | elif modal == 'BNHW': 97 | assert isinstance(value, torch.Tensor) 98 | value = value.detach().float() 99 | 100 | batch = value.size(0) 101 | size = value.shape[2:] 102 | device = value.device 103 | 104 | value = (value - v_min) / (v_max - v_min) 105 | row += _expand_as_rgbs(value) 106 | 107 | elif modal == 'BHWN': 108 | assert isinstance(value, torch.Tensor) 109 | value = value.detach().float().permute(0, 3, 1, 2) 110 | 111 | batch = value.size(0) 112 | size = value.shape[2:] 113 | device = value.device 114 | 115 | value = (value - v_min) / (v_max - v_min) 116 | row += _expand_as_rgbs(value) 117 | 118 | elif modal.startswith('flags_'): 119 | assert isinstance(value, torch.Tensor) 120 | value = value.detach().float() 121 | 122 | batch = value.size(0) 123 | device = value.device 124 | 125 | num_flags = int(modal.split('_')[1]) 126 | assert size is not None 127 | row.append(_visualize_flags(value, size, num_flags)) 128 | 129 | elif modal == 'points': 130 | points, background = value 131 | 132 | if background is None: 133 | background = torch.rand( 134 | batch, 1, size[0], size[1], device=device) 135 | else: 136 | assert isinstance(background, torch.Tensor) 137 | background = background.detach().float() 138 | background = (background - v_min) / (v_max - v_min) 139 | 140 | if points is None: 141 | canvas = background 142 | else: 143 | assert isinstance(points, torch.Tensor) 144 | points = points.detach().float() 145 | points = denormalize_points( 146 | points, background.size(2), background.size(3)) 147 | 148 | npoints = points.size(1) 149 | batch = background.size(0) 150 | assert points.size(0) == batch 151 | channels = background.size(1) 152 | 153 | points = points.reshape(npoints * batch, 2) 154 | 155 | point_colors = torch.ones( 156 | npoints * batch, channels, dtype=background.dtype, device=background.device) 157 | batch_inds = torch.arange(batch).unsqueeze(1).expand(-1, npoints).reshape( 158 | npoints * batch).to(dtype=torch.int32, device=background.device) 159 | canvas = p2i(points, point_colors, batch_inds, background, 5) 160 | 161 | row.append(canvas) 162 | 163 | return torch.cat(row, dim=-1) 164 | -------------------------------------------------------------------------------- /figures/framework.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:aab2bc402eff2102bfcd9f2b50d74af24313fe98d73c4f543893b941c5289c8e 3 | size 834480 4 | -------------------------------------------------------------------------------- /figures/framework2.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:003da68f0ba85ee53a7de7dc64f4fb99ed6138a4b4209185e457fe84e1535b3c 3 | size 74728 4 | -------------------------------------------------------------------------------- /logs/paper/face_parsing.train_lapa_farl-b-ep16_448_refinebb/eval.lapa_test_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch background face_lr_rr lb rb le re nose ul im ll hair fg_mean 2 | 2275 5 0.989987978 0.973036685 0.885155426 0.878772797 0.893121881 0.894956997 0.968309779 0.86420439 0.884666865 0.876497938 0.955160356 0.907388312 3 | 4550 10 0.991600939 0.977248634 0.911420533 0.908769639 0.925159195 0.92351955 0.97488682 0.886737405 0.904512726 0.899673778 0.961769199 0.927369748 4 | 6825 15 0.992040328 0.978406951 0.918396098 0.914176252 0.931163388 0.929629952 0.97650742 0.893738837 0.909817983 0.905734716 0.963399186 0.932097078 5 | 9100 20 0.992173578 0.978952037 0.922733175 0.917986777 0.934555037 0.933059587 0.977402932 0.897548557 0.913729383 0.908735632 0.963840523 0.934854364 6 | 11375 25 0.992188869 0.979214153 0.923466464 0.91945533 0.936598294 0.934614926 0.977908135 0.898486496 0.914441287 0.910001488 0.963862141 0.935804871 7 | 13650 30 0.992242502 0.979532614 0.924731395 0.9212786 0.937183664 0.935740283 0.978222938 0.900068734 0.915150742 0.910409115 0.964110218 0.93664283 8 | 15925 35 0.992454587 0.979641072 0.925832682 0.922761524 0.937805367 0.936875149 0.978418528 0.90088008 0.915119706 0.909800811 0.965067752 0.937220267 9 | 18200 40 0.99245317 0.97978871 0.925342757 0.923843838 0.937419003 0.936207824 0.978461409 0.900326254 0.914326636 0.909569328 0.965056432 0.937034219 10 | 20475 45 0.992410527 0.979925107 0.925970524 0.925035386 0.938201355 0.93666443 0.978786043 0.899642801 0.914902456 0.910194583 0.96494044 0.937426312 11 | 22750 50 0.992409369 0.979945767 0.926439199 0.925741588 0.938117206 0.93679926 0.978807999 0.899829182 0.915219436 0.910166062 0.964831133 0.937589683 12 | 25025 55 0.992435013 0.979967523 0.927226961 0.92688224 0.93794021 0.93688949 0.978864147 0.899510191 0.915400411 0.910628007 0.964963061 0.937827224 13 | 27300 60 0.992530421 0.980071141 0.927290316 0.926819505 0.938259953 0.937844215 0.978958923 0.900390327 0.91624264 0.910686991 0.96541926 0.938198327 14 | 29575 65 0.992421144 0.98008158 0.927834959 0.926819453 0.939033941 0.93770338 0.978921672 0.900047426 0.916500137 0.910962654 0.964945208 0.938285041 15 | 31850 70 0.992496092 0.980118091 0.927369746 0.926852234 0.938859462 0.938221184 0.978958961 0.900475297 0.916480611 0.911139395 0.965256199 0.938373118 16 | 34125 75 0.992551791 0.980071668 0.926587503 0.926432556 0.938943973 0.937979487 0.978989836 0.900115381 0.916567231 0.91093381 0.96553049 0.938215194 17 | 36400 80 0.992631902 0.980072918 0.9271774 0.926594203 0.938869365 0.938217122 0.978946002 0.900219974 0.916653786 0.911165921 0.965948969 0.938386566 18 | 38675 85 0.992539526 0.980042817 0.927654894 0.926456416 0.939299927 0.939234286 0.978935736 0.900584047 0.916725428 0.911500926 0.965548187 0.938598266 19 | 40950 90 0.992515838 0.980086124 0.928012475 0.925957855 0.938867444 0.939310852 0.979046027 0.900384467 0.917023881 0.911817623 0.9654825 0.938598925 20 | 43225 95 0.992452377 0.98009219 0.927631249 0.926035899 0.938550986 0.939466762 0.979024762 0.900896966 0.916961345 0.911876405 0.965219751 0.938575632 21 | 45500 100 0.992409271 0.980029991 0.926627806 0.92602373 0.939092003 0.939167922 0.979087222 0.899690894 0.916451242 0.911538876 0.964985579 0.938269526 22 | 47775 105 0.992515206 0.980028941 0.92701855 0.926004682 0.938854499 0.939322423 0.979227064 0.900288225 0.916785203 0.911437858 0.965475573 0.938444302 23 | 50050 110 0.992553727 0.980041087 0.927548805 0.926367531 0.938911467 0.939198535 0.979132171 0.900290093 0.916948971 0.911300766 0.965620847 0.938536027 24 | 52325 115 0.992463911 0.979995746 0.927100674 0.92630671 0.939577051 0.93886891 0.979079953 0.900001107 0.917024427 0.911496331 0.965267594 0.93847185 25 | 54600 120 0.992403899 0.979979585 0.926703186 0.926500199 0.939371349 0.939418305 0.979113261 0.900515149 0.917334655 0.911873867 0.965051305 0.938586086 26 | 56875 125 0.992443247 0.97994715 0.926472904 0.926498937 0.939581821 0.939326139 0.979090982 0.900339327 0.917321422 0.912077839 0.965195655 0.938585218 27 | 59150 130 0.992436878 0.979968723 0.92714583 0.926772666 0.939391447 0.93887556 0.979074069 0.901449143 0.917437466 0.911831813 0.96516563 0.938711235 28 | 61425 135 0.99240879 0.979988757 0.926981485 0.926811414 0.939403915 0.938778969 0.979080987 0.900912378 0.917144842 0.911541573 0.965031545 0.938567587 29 | 63700 140 0.992376123 0.980014308 0.927017047 0.926596553 0.939167513 0.938984111 0.979134731 0.901031223 0.917001784 0.911729969 0.96496459 0.938564183 30 | 65975 145 0.992438849 0.98002805 0.926944114 0.926130623 0.939629258 0.938852761 0.979079882 0.901113338 0.91681146 0.911548604 0.965208885 0.938534697 31 | 68250 150 0.99245925 0.980002013 0.926970678 0.926510028 0.93973116 0.93912486 0.979178354 0.901491884 0.917424673 0.912140722 0.965240662 0.938781504 32 | 70525 155 0.992339929 0.979864995 0.926025315 0.926114011 0.939331306 0.938654862 0.978943129 0.900957367 0.917029777 0.911165489 0.964743597 0.938282985 33 | 72800 160 0.992203923 0.979900367 0.926727523 0.92647391 0.940395767 0.938723895 0.979026394 0.900681711 0.916931613 0.911898963 0.964201983 0.938496212 34 | 75075 165 0.992099216 0.979910904 0.927012403 0.926461464 0.94020402 0.938883692 0.979026131 0.900677053 0.917148455 0.911910724 0.96375364 0.938498848 35 | 77350 170 0.99227981 0.979919475 0.926861751 0.926645602 0.939963086 0.938864983 0.979087964 0.900318445 0.916988875 0.911618725 0.964526231 0.938479514 36 | 79625 175 0.992281594 0.979911201 0.927342439 0.927102102 0.940246937 0.938872301 0.979055245 0.900187462 0.917199209 0.912234196 0.964500405 0.93866515 37 | 81900 180 0.992221928 0.979940818 0.927025285 0.9270031 0.940082772 0.938656943 0.978984149 0.900785194 0.917031122 0.912342646 0.964293604 0.938614563 38 | 84175 185 0.992325698 0.979886464 0.927047941 0.926639573 0.940042049 0.938632734 0.978974008 0.900693916 0.9173745 0.912503413 0.96473467 0.938652927 39 | 86450 190 0.992274606 0.979866099 0.926841822 0.926451279 0.940156548 0.938498048 0.979042539 0.900703915 0.917285387 0.912291289 0.964501264 0.938563819 40 | 88725 195 0.992303494 0.979836321 0.926651248 0.926435613 0.939686958 0.938572085 0.979123213 0.900531263 0.917631349 0.911926251 0.964635147 0.938502945 41 | 91000 200 0.992271196 0.979886934 0.926768892 0.926732517 0.939510606 0.938588125 0.97913274 0.900762246 0.918015928 0.912511568 0.964491357 0.938640091 42 | 93275 205 0.992195981 0.979828054 0.926688291 0.926807257 0.93962859 0.938729841 0.979138837 0.900722478 0.917566934 0.912373441 0.964163751 0.938564748 43 | 95550 210 0.992161355 0.979816649 0.92687609 0.926928335 0.939674495 0.938638312 0.9791825 0.900798301 0.91747354 0.91242363 0.964024594 0.938583645 44 | 97825 215 0.992124255 0.979797973 0.926845719 0.926657918 0.939534379 0.938653112 0.979141899 0.90075719 0.917250596 0.912376875 0.963872429 0.938488809 45 | 100100 220 0.992118588 0.979780456 0.926903579 0.92665234 0.939434856 0.938434274 0.979112889 0.900717321 0.917147925 0.912220964 0.963821266 0.938422587 46 | 102375 225 0.992114551 0.979774353 0.926995207 0.9268085 0.939458688 0.938413828 0.979115664 0.900536428 0.917185025 0.91225336 0.963801743 0.93843428 47 | 104650 230 0.992119675 0.979768675 0.927204191 0.926843172 0.939497907 0.938428531 0.979124918 0.900550283 0.917132671 0.912134996 0.963848239 0.938453358 48 | 106925 235 0.99209793 0.979752746 0.927023647 0.926674373 0.939474798 0.938343819 0.979073759 0.900493726 0.916799326 0.91198711 0.963762083 0.938338539 49 | 109200 240 0.992099494 0.979731915 0.926974493 0.926619269 0.939606125 0.938393057 0.979064783 0.9004502 0.916694153 0.911845866 0.963762806 0.938314267 50 | 111475 245 0.992104045 0.979709665 0.927005535 0.926646124 0.939715889 0.938457473 0.979038579 0.900448642 0.91679015 0.912027246 0.963793935 0.938363324 51 | 113750 250 0.992130284 0.979703398 0.926940489 0.926492288 0.939587469 0.938332106 0.979055329 0.900370495 0.916680302 0.91187978 0.96389212 0.938293378 52 | 116025 255 0.992104 0.979691064 0.926945291 0.926677752 0.939491368 0.938316321 0.979020491 0.900507061 0.916888542 0.911821112 0.963767116 0.938312612 53 | 118300 260 0.992105329 0.979676503 0.927015168 0.926673863 0.939405117 0.938299468 0.979021256 0.900307616 0.916897456 0.911808121 0.963765612 0.938287018 54 | 120575 265 0.992092964 0.979663958 0.92700983 0.926661514 0.939423657 0.938249087 0.979015187 0.900269029 0.916923846 0.911800293 0.963723502 0.93827399 55 | 122850 270 0.992097427 0.979655787 0.927028609 0.926749016 0.939313003 0.938213875 0.978967406 0.900337721 0.917013765 0.911812321 0.963738708 0.938283021 56 | 125125 275 0.991994175 0.979653698 0.927049423 0.926707493 0.939358089 0.938232012 0.978976329 0.900369739 0.917090677 0.911910554 0.963296147 0.938264416 57 | 127400 280 0.991990309 0.979636583 0.926980785 0.92660933 0.939314191 0.938237876 0.978956019 0.900216986 0.91695283 0.911902857 0.963280545 0.9382088 58 | 129675 285 0.991991324 0.979647466 0.926947613 0.926636243 0.93940415 0.938170548 0.978960032 0.900134673 0.916876797 0.91182223 0.963285389 0.938188514 59 | 131950 290 0.991990298 0.979631356 0.927058664 0.926667647 0.939345338 0.938092034 0.978959659 0.900034725 0.916746506 0.911695571 0.96327757 0.938150907 60 | 134225 295 0.99199019 0.979618777 0.927066458 0.92653226 0.939231263 0.938170006 0.978979534 0.900130175 0.9167733 0.911685939 0.963267183 0.938145489 61 | 136500 300 0.991977596 0.979621484 0.927063881 0.926587561 0.939305526 0.938126372 0.978982225 0.90020567 0.916712107 0.911643823 0.963212715 0.938146136 62 | -------------------------------------------------------------------------------- /logs/reproduce/face_alignment.train_aflw19_farl-b-50m-ep16_448_refinebb/eval.aflw19_test_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch inter_ocular inter_pupil box diag auc_box_7 fr_box_7 2 | 500 1 0.30372195889620385 0.45103474767904983 0.1187083552647985 0.08393919005874516 0.0027595379236966134 0.9705882352941176 3 | 1000 2 0.1658182000657276 0.24668459146467453 0.06263745434470118 0.044291206217225265 0.13112071960567176 0.16279069767441856 4 | 1500 3 0.06678431033000955 0.10004851223397244 0.02348611173268817 0.016607128861719105 0.6714398084815322 0.004787961696306464 5 | 2000 4 0.04648940420781321 0.06993992949423593 0.015772063276618786 0.01115249098026747 0.780357957136343 0.0031919744642042724 6 | 2500 5 0.04288645482095909 0.06456253879754119 0.014476722198917722 0.010236551377852648 0.7979165852387466 0.002507979936160476 7 | 3000 6 0.04158687635039935 0.06259661116558723 0.01402912931359635 0.009920055008456417 0.8035851084619894 0.002279981760145877 8 | 3500 7 0.040948115469275466 0.0616293236158017 0.013807401122211922 0.009763269733197603 0.8065536447136995 0.0018239854081167906 9 | 4000 8 0.040489377684099624 0.06093511481400336 0.013660904048949249 0.009659681481093073 0.8084989577226241 0.0015959872321021917 10 | 4500 9 0.040151082050620936 0.06042256422690806 0.013551480684223863 0.00958230731370946 0.8099573317699175 0.0018239854081167906 11 | 5000 10 0.03991289047470823 0.06005910168050687 0.013476588766269362 0.00952935120964833 0.8110336460165463 0.0015959872321021917 12 | 5500 11 0.039796918807266726 0.0599013370734831 0.013435829082581207 0.00950052963235962 0.8116003843397824 0.0020519835841312783 13 | 6000 12 0.03968867742466264 0.05972091041631042 0.013390515419211607 0.009468488262714016 0.812264184743665 0.0020519835841312783 14 | 6500 13 0.03955435067945238 0.05948890445985807 0.013349329560954333 0.009439365753994866 0.8128203374373005 0.0018239854081167906 15 | 7000 14 0.03942742719532853 0.059303106263626455 0.013314985978891466 0.009415081631251246 0.8132846394371704 0.0015959872321021917 16 | 7500 15 0.039385543802368334 0.059247414814626795 0.013292247391268917 0.009399002673579198 0.813558562960068 0.0015959872321021917 17 | 8000 16 0.039335523084370015 0.05917320129700681 0.013273403517361705 0.009385679064958106 0.8137820011725623 0.0015959872321021917 18 | 8500 17 0.03927436616395741 0.059068595444400504 0.013254084763220332 0.009372017995657792 0.8140318220311383 0.0015959872321021917 19 | 9000 18 0.03929805799102435 0.05909740136629688 0.013255426778241047 0.009372966886382096 0.814103478600743 0.0015959872321021917 20 | 9500 19 0.03935805989138024 0.05918298417772814 0.013265741942301765 0.0093802622992102 0.8140569018304998 0.0015959872321021917 21 | 10000 20 0.03936757489094384 0.05919232870746409 0.01326288570294465 0.009378242753598987 0.813970425379454 0.001367989056087593 22 | 10500 21 0.039430641769221314 0.05927439079510802 0.013272531164111038 0.009385061546704892 0.8139018630708099 0.0015959872321021917 23 | 11000 22 0.039444909050102596 0.05931382846788788 0.013302863486760075 0.009406509434203823 0.813544883069507 0.0015959872321021917 24 | 11500 23 0.039412679815748974 0.05926432959630073 0.013311742703446068 0.009412789855762209 0.813404175623738 0.0015959872321021917 25 | 12000 24 0.039466492617668436 0.05933157820381968 0.013341354008303675 0.009433728073266234 0.8130849781773176 0.0015959872321021917 26 | 12500 25 0.039538702245069536 0.05942412592535197 0.013376617779538233 0.00945866450402381 0.812669044361931 0.0015959872321021917 27 | 13000 26 0.039630362827702846 0.05957018725686132 0.013408145130468839 0.009480956912964814 0.812261904761905 0.0015959872321021917 28 | 13500 27 0.03971912890224222 0.05970432961633963 0.013429252948056494 0.009495881720324168 0.811934564523484 0.0015959872321021917 29 | 14000 28 0.03980086048715907 0.05982123712834516 0.013451053951626981 0.009511297933054035 0.8115922415477821 0.0015959872321021917 30 | 14500 29 0.039847847538020474 0.05988519462449769 0.01346649451718937 0.009522214090231615 0.8112665298677614 0.0015959872321021917 31 | 15000 30 0.03991881169198693 0.05999473366517755 0.013495051692296423 0.00954240693711169 0.810856458862615 0.0015959872321021917 32 | 15500 31 0.04000304466070131 0.0601186756668056 0.01352447600219003 0.009563212953524887 0.8104118624193866 0.0015959872321021917 33 | 16000 32 0.040094131212282336 0.06024769697350234 0.013554524788288998 0.00958445993015552 0.8099773630382388 0.0015959872321021917 34 | 16500 33 0.04021973964345004 0.060440647944543006 0.013583430730747513 0.009604899784336882 0.8095510064490915 0.0015959872321021917 35 | 17000 34 0.040313223209546355 0.06058380781000627 0.013613261211097811 0.00962599281627622 0.8091464725425055 0.0015959872321021917 36 | 17500 35 0.0404351647999316 0.060753449641348184 0.013651483851483681 0.009653020981398554 0.8088064295485639 0.0015959872321021917 37 | 18000 36 0.040460422166232045 0.06079574703246124 0.013672026335459239 0.009667545706509347 0.8084377239267803 0.0015959872321021917 38 | 18500 37 0.04057415685205768 0.060965444527420344 0.013708403379037616 0.009693269255603768 0.8079976874470721 0.0015959872321021917 39 | 19000 38 0.04067686666270342 0.06112478163162976 0.013735772483815247 0.009712621060017848 0.8076263761318483 0.0015959872321021917 40 | 19500 39 0.04075819120741956 0.061253566367953434 0.013759914838283833 0.009729694135103169 0.8072478991596641 0.0015959872321021917 41 | 20000 40 0.04082914492445779 0.0613736397481867 0.013783583182262277 0.009746430619253288 0.8068905934466812 0.0015959872321021917 42 | 20500 41 0.04084762524536523 0.06139253058827092 0.013797169453576988 0.009756037811682858 0.8066769265845875 0.0015959872321021917 43 | 21000 42 0.04090703919876453 0.06147263053340886 0.013816741303227777 0.009769876308762967 0.8063955116930495 0.0015959872321021917 44 | 21500 43 0.040988015845873235 0.06159026167243787 0.013836375774447907 0.009783760032532044 0.8061538336264741 0.0015959872321021917 45 | 22000 44 0.04103712159316397 0.06165703018125855 0.01385425510223848 0.009796401761842545 0.8059694808155823 0.0015959872321021917 46 | 22500 45 0.041111136809147714 0.06176116289311873 0.01386990179713328 0.009807466645247306 0.8058030421470915 0.0015959872321021917 47 | 23000 46 0.04119927666799369 0.061882961845484925 0.01388635474473333 0.009819101210984258 0.805544101361475 0.0015959872321021917 48 | 23500 47 0.04121569743506505 0.06191892054219035 0.013896448124069304 0.009826238330400973 0.805325874535861 0.0015959872321021917 49 | 24000 48 0.04125702419185334 0.06198152123767385 0.013910142243642324 0.0098359229300482 0.8051102534036872 0.0015959872321021917 50 | 24500 49 0.04131555796430582 0.06206365986279141 0.01393229462379633 0.009851584410591316 0.8048356784574298 0.0015959872321021917 51 | 25000 50 0.04137726804191426 0.06215004979667194 0.013948076998758033 0.009862746705326162 0.8046402514494172 0.0015959872321021917 52 | -------------------------------------------------------------------------------- /logs/reproduce/face_alignment.train_aflw19_farl-b-50m-ep16_448_refinebb/eval.aflw19_test_frontal_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch inter_ocular inter_pupil box diag auc_box_7 fr_box_7 2 | 500 1 0.252838831513984 0.3657075600355546 0.13389165869586545 0.09467569986979167 0.0 1.0 3 | 1000 2 0.12655792933076485 0.182967559024805 0.06691924259179985 0.047319046438556826 0.081416431108212 0.20928462709284623 4 | 1500 3 0.041595084482132026 0.06009611945536946 0.021971634352406774 0.015536292321424325 0.7015046749293327 0.007610350076103556 5 | 2000 4 0.0262346369309331 0.0378981928484262 0.0138492555197334 0.009792901972476025 0.8141818873668191 0.0053272450532724225 6 | 2500 5 0.02398985887408438 0.034653845080139065 0.012659482215637485 0.008951606634363556 0.8287122200478366 0.003805175038051778 7 | 3000 6 0.023223252782720045 0.03354627646993466 0.012252245863823041 0.00866364489225855 0.8330811045879539 0.0030441400304414 8 | 3500 7 0.022847994277466376 0.033001070879002864 0.012052558509907947 0.008522445934185335 0.835578930202218 0.0022831050228310223 9 | 4000 8 0.0225906255945586 0.032627915682857986 0.011916698386135712 0.00842637812528799 0.8373206131767776 0.0022831050228310223 10 | 4500 9 0.022409351993369186 0.0323660486183573 0.011820878793841265 0.008358623702050707 0.8385404435746903 0.0022831050228310223 11 | 5000 10 0.022306452421655756 0.03221569903183564 0.01176647726259275 0.008320155209057952 0.8393819308545337 0.0022831050228310223 12 | 5500 11 0.02219026818123038 0.032048435878898815 0.01170523816229183 0.008276853750104047 0.8400070667536421 0.0022831050228310223 13 | 6000 12 0.022133294487289824 0.031965386377622 0.011675389027123763 0.008255745903723497 0.8404571646010003 0.0022831050228310223 14 | 6500 13 0.022061485859538505 0.03185977674510381 0.01163697859649426 0.008228587233312597 0.8408659491193738 0.0022831050228310223 15 | 7000 14 0.022009501174160336 0.03178516080208928 0.011609930970353078 0.008209461490857547 0.8412301587301587 0.0022831050228310223 16 | 7500 15 0.02195848758180755 0.03171093097379037 0.011582945761252393 0.008190379295174934 0.8415432702761471 0.0022831050228310223 17 | 8000 16 0.021910465652721294 0.03164179321474862 0.011557764114310209 0.008172572294145234 0.8418183300717548 0.0022831050228310223 18 | 8500 17 0.021886282678427037 0.0316065841976729 0.011545448361285019 0.008163865116996126 0.8420151119808655 0.0022831050228310223 19 | 9000 18 0.021869136862558862 0.0315817218937286 0.01153710988014256 0.008157968884007934 0.8422368993259405 0.0022831050228310223 20 | 9500 19 0.021873410252494174 0.03158750490510845 0.01153865361322551 0.008159060456437063 0.8422640791476409 0.0022831050228310223 21 | 10000 20 0.0218516644458974 0.0315551061064141 0.011524468977883163 0.008149030183185363 0.8422379865188085 0.0022831050228310223 22 | 10500 21 0.021842136412087824 0.03154038149109351 0.011517395530480047 0.008144028110591244 0.8422847358121331 0.0022831050228310223 23 | 11000 22 0.021936461624308082 0.03167650579861854 0.011568088328275869 0.008179874362103653 0.8419520547945206 0.0022831050228310223 24 | 11500 23 0.02197063567975885 0.03172580364813725 0.011586158061499284 0.00819265098513715 0.8418020221787347 0.0022831050228310223 25 | 12000 24 0.022039623928215225 0.03182740697759109 0.011625296449008053 0.008220325684801447 0.8415671885192434 0.0022831050228310223 26 | 12500 25 0.022108731204516268 0.03192859225802951 0.011663678574235472 0.008247466210723831 0.8411942813655144 0.0022831050228310223 27 | 13000 26 0.022144236339644574 0.0319811648248356 0.011683592513271663 0.008261547785371406 0.8408898673624702 0.0022831050228310223 28 | 13500 27 0.02213397817161711 0.03196837658932946 0.011677889337641282 0.008257513902684506 0.8408072407045011 0.0022831050228310223 29 | 14000 28 0.022178509101112864 0.032033072577582464 0.011701498220318891 0.0082742082836784 0.840427810393564 0.0022831050228310223 30 | 14500 29 0.02220068311763862 0.03206650198322453 0.01171353309666185 0.008282718774571991 0.8400831702544033 0.0022831050228310223 31 | 15000 30 0.0222390681459842 0.03212206185861991 0.01173431216491049 0.008297412181372331 0.8397744074798871 0.0022831050228310223 32 | 15500 31 0.022268428831521418 0.032164668020773876 0.011750523176730315 0.008308874417657722 0.8394699934768429 0.0022831050228310223 33 | 16000 32 0.022307573025266512 0.032221072703554565 0.011770339861308059 0.008322887043248935 0.8391721026310068 0.0022831050228310223 34 | 16500 33 0.022338327207521762 0.032266375862481565 0.01178603992433127 0.008333988567102628 0.8388720373994348 0.0022831050228310223 35 | 17000 34 0.02238409196223661 0.03233257566594279 0.01181118165339872 0.008351767262730243 0.8385654490106546 0.0022831050228310223 36 | 17500 35 0.02248753151392828 0.032480739204488025 0.011867549321422838 0.00839162489776379 0.8383110458795391 0.0022831050228310223 37 | 18000 36 0.022516207063579124 0.03252248314054771 0.01188220135878936 0.008401984674926944 0.8379772776690585 0.0022831050228310223 38 | 18500 37 0.02256747613033022 0.03259867837984268 0.01191167091125767 0.008422823257097915 0.8376772124374865 0.0022831050228310223 39 | 19000 38 0.022606057119151774 0.032654755010271 0.011931838873133087 0.008437084099292029 0.837344531419874 0.0022831050228310223 40 | 19500 39 0.02263223453562372 0.03269127623675621 0.011943855604987529 0.00844558080037435 0.8370259839095456 0.0022831050228310223 41 | 20000 40 0.022688928078661588 0.03277337641839386 0.011973746109589413 0.008466716952156984 0.8366878669275931 0.0022831050228310223 42 | 20500 41 0.02272086673312717 0.032819698571795924 0.011991213445794093 0.008479068268379665 0.836540008697543 0.0022831050228310223 43 | 21000 42 0.02277056230983415 0.03289167848351884 0.01201753746973325 0.008497682336258562 0.836172537508154 0.0022831050228310223 44 | 21500 43 0.02281246736895185 0.032953346519528276 0.012040032280815972 0.008513588520671316 0.8359550989345511 0.0022831050228310223 45 | 22000 44 0.022863846754193125 0.03302784053157998 0.012068842825461378 0.008533959700818113 0.8356974342248316 0.0022831050228310223 46 | 22500 45 0.0228834979610356 0.03305677881342454 0.012080175147208994 0.008541973758505903 0.8355995868667103 0.0022831050228310223 47 | 23000 46 0.02287975293860588 0.033052218014791135 0.012076721162375069 0.008539532236131178 0.8354930419656447 0.0022831050228310223 48 | 23500 47 0.022871911616448762 0.0330409568194385 0.012071490469225647 0.008535832212032975 0.8354299847793 0.0022831050228310223 49 | 24000 48 0.022890895106113845 0.033068295483175476 0.012081437277830172 0.008542865741561356 0.8352332028701892 0.0022831050228310223 50 | 24500 49 0.02293633906627173 0.0331333862954986 0.012106187811725216 0.008560367189404446 0.834994020439226 0.0022831050228310223 51 | 25000 50 0.02299767759836972 0.033221376903888114 0.012139147638004484 0.008583673422013606 0.8347461404653187 0.0022831050228310223 52 | -------------------------------------------------------------------------------- /logs/reproduce/face_alignment.train_aflw19_farl-b-ep16_448_refinebb/eval.aflw19_test_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch inter_ocular inter_pupil box diag auc_box_7 fr_box_7 2 | 500 1 0.2852458684236776 0.4252502190919973 0.10744825955454856 0.07597712887730491 0.0015061993789763988 0.9783401732786138 3 | 1000 2 0.12893658667136912 0.19227527039158687 0.04775637423562721 0.03376873693018268 0.33106847002366846 0.02165982672138622 4 | 1500 3 0.0615663819806629 0.09224255966694454 0.021506739597694547 0.015207501997210585 0.6997334049899031 0.0043319653442772665 5 | 2000 4 0.04624004859767762 0.06952701678191262 0.01571074284433068 0.011109133371195725 0.7809839749853429 0.00364797081623347 6 | 2500 5 0.042761517832173745 0.06432145271353454 0.014453172140125809 0.010219899258872336 0.7982085857598854 0.002735978112175075 7 | 3000 6 0.04166805477812037 0.06260951120887126 0.014027887319137775 0.009919180045973695 0.8039228714741711 0.002279981760145877 8 | 3500 7 0.04112436039720667 0.061763953031930434 0.013818106801336995 0.009770843464111662 0.8069327731092438 0.0018239854081167906 9 | 4000 8 0.04082053793764962 0.06128146775607045 0.013690777146756459 0.0096808066935611 0.8088038238551236 0.0015959872321021917 10 | 4500 9 0.04053833731439523 0.06086774792129788 0.013596723028583935 0.00961430084743404 0.8100995049182467 0.0015959872321021917 11 | 5000 10 0.040275904233195825 0.06046106257614783 0.013515649839889171 0.009556974279679395 0.8111886847762363 0.0015959872321021917 12 | 5500 11 0.04017879905300601 0.06030735445522686 0.013464980292852969 0.009521145174832742 0.8118914077258811 0.001367989056087593 13 | 6000 12 0.040024534295913135 0.06006328340974835 0.01341547272382563 0.009486138847827693 0.8125043971076802 0.001367989056087593 14 | 6500 13 0.039970745846826194 0.05996802904483014 0.013387701797311403 0.009466502637119527 0.8128027490065792 0.001367989056087593 15 | 7000 14 0.03996636581594847 0.0599635968293113 0.013374410369218999 0.009457103313510407 0.8130571298286756 0.001367989056087593 16 | 7500 15 0.039919959754091495 0.05989100451454201 0.01336473359726794 0.009450260167571979 0.8132242199205263 0.001367989056087593 17 | 8000 16 0.03983455784941611 0.05978494444993659 0.013356542348100669 0.009444468542152053 0.8133505960523744 0.001367989056087593 18 | 8500 17 0.039776333705834256 0.05971485699794089 0.013337754137810647 0.009431182332523762 0.813607745423751 0.0015959872321021917 19 | 9000 18 0.0398115826914205 0.059784283444482444 0.013337379278138627 0.009430918800086122 0.813623053872712 0.0015959872321021917 20 | 9500 19 0.03985061332399966 0.05985780116688754 0.013332266922797235 0.009427303274200759 0.8136456908344732 0.001367989056087593 21 | 10000 20 0.03982898452974486 0.05983797100326323 0.013334276031480225 0.009428726175415207 0.8135494430330271 0.001139990880072994 22 | 10500 21 0.03980426640473595 0.059821710269091284 0.01334195239218848 0.009434153378091335 0.8134103641456581 0.001139990880072994 23 | 11000 22 0.03980939789444575 0.05983623847317815 0.013355937876007733 0.009444043237326952 0.8131651032506024 0.001139990880072994 24 | 11500 23 0.03980205029697653 0.05981928890174348 0.013370793973589618 0.00945454713583971 0.8129348250928279 0.001139990880072994 25 | 12000 24 0.039868794452964686 0.05988301678547509 0.013401925590991756 0.009476561226694756 0.8126255618526479 0.001367989056087593 26 | 12500 25 0.04005689921033366 0.060132723772194197 0.013423093420916694 0.009491528651511023 0.8122633704644647 0.001367989056087593 27 | 13000 26 0.04025276556332471 0.060405767730420654 0.01346124996075715 0.009518509850456353 0.8117813171780339 0.001367989056087593 28 | 13500 27 0.04043362013455455 0.0606543127391309 0.013496445891958255 0.009543398445292907 0.8113562634356067 0.001367989056087593 29 | 14000 28 0.04054687472168191 0.06082582625960086 0.01353047810566246 0.00956746078331178 0.8109406553319002 0.001367989056087593 30 | 14500 29 0.04062225717526291 0.060931608006120325 0.013558663900073565 0.009587391837242255 0.8106090808416389 0.001367989056087593 31 | 15000 30 0.040676873620655564 0.06101133917979437 0.013588429149622466 0.00960843703326057 0.8102120383036935 0.001367989056087593 32 | 15500 31 0.040764665581894094 0.061152843052646205 0.01362643609349172 0.009635312993179648 0.809705882352941 0.001367989056087593 33 | 16000 32 0.04078659008911523 0.061202272344711865 0.013645407819769669 0.009648727055178675 0.8093567194319587 0.001367989056087593 34 | 16500 33 0.040794612607942884 0.06123223328666496 0.013668328183892107 0.009664935604709499 0.8089459970034525 0.001367989056087593 35 | 17000 34 0.0408746882002483 0.061367287137874774 0.01369345508858976 0.009682701865777651 0.8085815256335093 0.001367989056087593 36 | 17500 35 0.040897875575784455 0.061428871972344534 0.013716369364504068 0.009698905196844363 0.8082072829131652 0.001367989056087593 37 | 18000 36 0.04081920201083704 0.06134923821000057 0.01372630097145258 0.009705927510050623 0.8079115367077062 0.001367989056087593 38 | 18500 37 0.04090731751685049 0.06150492238933182 0.013755469576604715 0.009726554359195986 0.8074040779102338 0.001367989056087593 39 | 19000 38 0.040940211235634336 0.061559841505642955 0.013785308754395199 0.009747652609599436 0.8070096410657286 0.001367989056087593 40 | 19500 39 0.04095451330627636 0.061584020389360325 0.013803549025953496 0.009760550913395427 0.806733763272751 0.001367989056087593 41 | 20000 40 0.04096167303903756 0.0616047411708597 0.013807508970470228 0.009763349749647316 0.8065308448960979 0.001367989056087593 42 | 20500 41 0.04108954628362973 0.0617924736777887 0.013828477629014905 0.009778177145676584 0.8062899811087226 0.0015959872321021917 43 | 21000 42 0.041224012187882964 0.061976184488375656 0.013852291220244457 0.009795015389876881 0.8059893492280634 0.0015959872321021917 44 | 21500 43 0.04115500669744642 0.06189812322321734 0.0138589108419701 0.00979969722192906 0.8058424532603741 0.0015959872321021917 45 | 22000 44 0.04116363803708711 0.06190808005274239 0.01387251102918911 0.009809313981542835 0.8056594032962024 0.0015959872321021917 46 | 22500 45 0.04117128482649873 0.061900015786201835 0.013883124515448212 0.009816819872423442 0.8054936160510717 0.0015959872321021917 47 | 23000 46 0.04134599900495718 0.06213831173139677 0.013912696681824982 0.009837730258118873 0.8053913425835453 0.0015959872321021917 48 | 23500 47 0.04138621248940165 0.062218105526640165 0.01392775629950722 0.009848379403882737 0.8050916878379261 0.0015959872321021917 49 | 24000 48 0.0414114420238935 0.062270575443794884 0.013942744598179933 0.009858977234749504 0.8048392612859099 0.0015959872321021917 50 | 24500 49 0.04132299949412905 0.06214682826482701 0.013944860685377211 0.009860473194461514 0.8046874796430201 0.0015959872321021917 51 | 25000 50 0.04127405030076166 0.062060041727673994 0.013945335565611373 0.009860810655140736 0.8046027946062146 0.0015959872321021917 52 | -------------------------------------------------------------------------------- /logs/reproduce/face_alignment.train_aflw19_farl-b-ep16_448_refinebb/eval.aflw19_test_frontal_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch inter_ocular inter_pupil box diag auc_box_7 fr_box_7 2 | 500 1 0.21787771750439974 0.3151402350067185 0.11532913277682648 0.0815500117146624 0.0 1.0 3 | 1000 2 0.0941658194206621 0.13612189213071965 0.04981141692790085 0.035221988207673374 0.31131822135246806 0.011415525114155223 4 | 1500 3 0.03816025732496195 0.05514637846924943 0.0201708548326652 0.014262948406341413 0.7260366383996522 0.006849315068493178 5 | 2000 4 0.026361848665699024 0.038087743239497124 0.013927506166687476 0.009848233953095644 0.812208632311372 0.0053272450532724225 6 | 2500 5 0.024045164726640537 0.03472943937397439 0.01269656374392807 0.00897782614604886 0.8282295064144379 0.003805175038051778 7 | 3000 6 0.023233291765326233 0.03355573773202649 0.012265386828185943 0.008672937773496956 0.8330262013481191 0.003805175038051778 8 | 3500 7 0.02285225750648812 0.03300366916975837 0.012064149934951572 0.008530641436758288 0.8359572733202871 0.0022831050228310223 9 | 4000 8 0.022628101946919294 0.03267960279862449 0.011945789807463346 0.008446948894808463 0.8377022178734509 0.0022831050228310223 10 | 4500 9 0.02246703259658233 0.03244624725759846 0.011858871901234897 0.008385488431747646 0.8389024787997391 0.0022831050228310223 11 | 5000 10 0.022335392155059395 0.03225418276619875 0.011787363746213406 0.008334924822710057 0.8398070232659275 0.0022831050228310223 12 | 5500 11 0.022257338920140375 0.03214044440282534 0.011744510819015619 0.008304623526888169 0.8404473798651881 0.0022831050228310223 13 | 6000 12 0.022171836284016182 0.03201721284306031 0.011698433253318753 0.008272041831749578 0.8408126766688411 0.0022831050228310223 14 | 6500 13 0.022062546949226925 0.03186036317497265 0.011640293231656381 0.008230930775449338 0.8411986301369864 0.0022831050228310223 15 | 7000 14 0.022048436343397725 0.03184014586008847 0.011632784316528878 0.008225620972329987 0.8412964774951077 0.0022831050228310223 16 | 7500 15 0.022057745191786025 0.03185429856113103 0.011638835140559228 0.008229899442722082 0.8414834746684062 0.0022831050228310223 17 | 8000 16 0.02207360928098542 0.03187602840057791 0.011647479900667838 0.008236011958013387 0.8416378560556643 0.0022831050228310223 18 | 8500 17 0.0220206665666136 0.03180142407003603 0.011620341552084076 0.0082168230727383 0.8419292237442922 0.0022831050228310223 19 | 9000 18 0.022009424241529026 0.03178475146242835 0.01161371953955524 0.008212138891582982 0.8420205479452055 0.0022831050228310223 20 | 9500 19 0.021970394720951355 0.03173043412160656 0.01159351456292325 0.008197854098664028 0.8421401391606871 0.0022831050228310223 21 | 10000 20 0.021968201415179526 0.03172762970946151 0.011591758902214434 0.008196610838310903 0.8420596868884541 0.0022831050228310223 22 | 10500 21 0.021998604320137106 0.03177188774584998 0.011607874111134894 0.008208005577099015 0.8419553163731247 0.0022831050228310223 23 | 11000 22 0.022036471141890668 0.0318243732191112 0.011626575272558668 0.00822123000610909 0.8416682974559688 0.0022831050228310223 24 | 11500 23 0.0220760624158328 0.03187977051988947 0.011647400064918366 0.008235956072988757 0.8414312894107415 0.0022831050228310223 25 | 12000 24 0.022117825222160537 0.031940886963448024 0.011671988024196306 0.008253342121884884 0.8412692976734073 0.0022831050228310223 26 | 12500 25 0.02209459156750544 0.031907451751569636 0.011658260629964565 0.008243635546308312 0.8408855185909981 0.0022831050228310223 27 | 13000 26 0.022127705984826863 0.03195571028478614 0.011675966747638116 0.008256155243384421 0.8406104587953903 0.0022831050228310223 28 | 13500 27 0.02215991027460432 0.03200469169442512 0.011694210667951285 0.008269055974719369 0.8403973689932595 0.0022831050228310223 29 | 14000 28 0.0222358181052012 0.032113233476287514 0.011734029836669178 0.008297212591998653 0.8400788214829311 0.0022831050228310223 30 | 14500 29 0.022294434964021774 0.03219801904222555 0.0117654263338179 0.008319413462367413 0.8397624483583388 0.0022831050228310223 31 | 15000 30 0.022360993302576074 0.032292329738855 0.011801079527972496 0.008344623414711684 0.8394993476842793 0.0022831050228310223 32 | 15500 31 0.022418893091210493 0.0323754343993769 0.011832621181029344 0.008366926619995675 0.8392058056099153 0.0022831050228310223 33 | 16000 32 0.022438323661072613 0.032402842737949786 0.01184225227553369 0.008373736609425537 0.8390264187866929 0.0022831050228310223 34 | 16500 33 0.022484756132964856 0.032471399873359015 0.011867170464502622 0.008391356359333753 0.8386502500543599 0.0022831050228310223 35 | 17000 34 0.022511146928621755 0.03251149483830236 0.01188182903388501 0.008401721942733231 0.8383784518373559 0.0022831050228310223 36 | 17500 35 0.022569638953361337 0.03259549075610017 0.011912194198306475 0.008423194130443184 0.8380979560774083 0.0022831050228310223 37 | 18000 36 0.022604212187559456 0.03264505278937167 0.01193040255542215 0.008436068733714669 0.8378055011959122 0.0022831050228310223 38 | 18500 37 0.02261675946425811 0.03266252085135589 0.011936257418976528 0.008440209305994043 0.8375793650793651 0.0022831050228310223 39 | 19000 38 0.022662686793590063 0.032729057416523974 0.011960894731263228 0.008457630192308122 0.8373162644053057 0.0022831050228310223 40 | 19500 39 0.022706197277051674 0.032791985405815974 0.011983568084112948 0.008473663388140488 0.837052076538378 0.0022831050228310223 41 | 20000 40 0.022685454497780068 0.03276233295690342 0.011970885812419734 0.00846469420457721 0.836836812350511 0.0022831050228310223 42 | 20500 41 0.02268860002630922 0.032768037584092885 0.011972275680240068 0.008465676910075241 0.8367096107849532 0.0022831050228310223 43 | 21000 42 0.022702562573112126 0.032789114221953186 0.011979436946967603 0.008470742125489397 0.8365215264187867 0.0022831050228310223 44 | 21500 43 0.0227135334566485 0.03280420463016225 0.011983495506158884 0.008473611132013562 0.8363791041530768 0.0022831050228310223 45 | 22000 44 0.02273193196800383 0.032831218144665025 0.011993829155258574 0.008480918280428766 0.8362323331158948 0.0022831050228310223 46 | 22500 45 0.022753200211662862 0.03286270246113816 0.012005868386278777 0.00848943094866098 0.8360551206784084 0.0022831050228310223 47 | 23000 46 0.02280930877639097 0.032942648529099186 0.012035038917576341 0.008510058328985624 0.8361018699717332 0.0022831050228310223 48 | 23500 47 0.02284169741417175 0.0329900216112761 0.012051950306652888 0.008522016998476815 0.8358692106979779 0.0022831050228310223 49 | 24000 48 0.022868725444265332 0.0330298175550487 0.012067182967651925 0.008532787566859972 0.8356517721243749 0.0022831050228310223 50 | 24500 49 0.02290444686169675 0.03308065986342989 0.012086310887445597 0.008546312468599875 0.8353451837355947 0.0022831050228310223 51 | 25000 50 0.02292864144846366 0.03311633047629346 0.012098573658564319 0.008554982630992407 0.8350462056968907 0.0022831050228310223 52 | -------------------------------------------------------------------------------- /logs/reproduce/face_alignment.train_aflw19_farl-b-ep64_448_refinebb/eval.aflw19_test_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch inter_ocular inter_pupil box diag auc_box_7 fr_box_7 2 | 500 1 0.2814381430261058 0.41885215943663073 0.10713332872438583 0.07575439787106703 0.0019018304996417192 0.9758321933424533 3 | 1000 2 0.17002066400460983 0.2540649748044203 0.061316688909848094 0.043357271611065396 0.14344540529390057 0.1094391244870041 4 | 1500 3 0.07078309413563753 0.10633050628954344 0.024175536942384147 0.017094629081590394 0.6611914533255163 0.004103967168262668 5 | 2000 4 0.04625239200478974 0.06961887566618218 0.015649217152454188 0.01106563225436091 0.7817782229170739 0.0029639762881896736 6 | 2500 5 0.04223303701363358 0.06360247360208618 0.014264682955900611 0.010086619685460485 0.800483518988991 0.002507979936160476 7 | 3000 6 0.041085674183258876 0.06185928393432227 0.013851238829981937 0.009794269149508913 0.8061440622760733 0.0020519835841312783 8 | 3500 7 0.040619098264987316 0.06113140560207549 0.01367669425160712 0.009670848124548012 0.8086129568106313 0.0020519835841312783 9 | 4000 8 0.04047411193708497 0.0609115203982665 0.013616412293677237 0.009628222839939938 0.8093884763207609 0.0020519835841312783 10 | 4500 9 0.04013620246995055 0.06040206609987745 0.013516743977864584 0.009557745742623903 0.8106336720734807 0.0015959872321021917 11 | 5000 10 0.039965249064628575 0.060082104670311075 0.013439680309095613 0.00950325280088189 0.8118835906455607 0.001367989056087593 12 | 5500 11 0.040075445631786506 0.060206053629891274 0.013456335037307983 0.009515030874381911 0.8117806657546739 0.001367989056087593 13 | 6000 12 0.03995748399003042 0.06002157048661579 0.01340347547483292 0.009477652755438113 0.8125644909126443 0.0015959872321021917 14 | 6500 13 0.03976160720011115 0.059695291236498235 0.013342023711198006 0.00943420034426834 0.8134608494560618 0.0015959872321021917 15 | 7000 14 0.0398093770205893 0.05979287651538631 0.013339652789003275 0.009432523477800459 0.8138977916748096 0.0018239854081167906 16 | 7500 15 0.03962548878222256 0.05947365958669987 0.013290326996475825 0.009397645003166146 0.8143570451436389 0.0018239854081167906 17 | 8000 16 0.03943022777106846 0.059187075453591684 0.013265424485734973 0.009380037905253399 0.8145327665950103 0.0015959872321021917 18 | 8500 17 0.039339854409582695 0.05909946092013295 0.013268546866761785 0.009382244445828613 0.8142730115301936 0.0018239854081167906 19 | 9000 18 0.0396895123789205 0.059661684327619126 0.01332956636761945 0.009425392446591872 0.8141977721321088 0.0018239854081167906 20 | 9500 19 0.03979430261725875 0.05978074184683866 0.013332530455234875 0.00942749026916476 0.8141378411829849 0.0018239854081167906 21 | 10000 20 0.03963351478002629 0.05950143573167821 0.013289879078306241 0.009397331025575429 0.814310956940916 0.0015959872321021917 22 | 10500 21 0.039648157790323615 0.05950849805310932 0.013291422873939274 0.009398422554318784 0.8143164940394765 0.0015959872321021917 23 | 11000 22 0.03967515464466128 0.05954689203306686 0.013296124710103881 0.009401746715957914 0.8141725294769073 0.0015959872321021917 24 | 11500 23 0.039708559772928066 0.059597504176997836 0.013305010014998005 0.009408031486236392 0.8140298677610581 0.0015959872321021917 25 | 12000 24 0.039667114730953246 0.05953555057106418 0.01331041982279376 0.009411854880942202 0.8138487720669665 0.0015959872321021917 26 | 12500 25 0.03967443449661387 0.05953680300245098 0.013326420503504136 0.009423168511136285 0.8136080711354311 0.0015959872321021917 27 | 13000 26 0.03973255079193742 0.05964009380210115 0.013359941307725397 0.009446872514619493 0.8130911666992381 0.0015959872321021917 28 | 13500 27 0.039732943916233825 0.0596550046935562 0.013384079313473891 0.00946394124098472 0.8126283304019284 0.0015959872321021917 29 | 14000 28 0.03985245370234304 0.05984229884949983 0.013416291152947143 0.00948671722760007 0.8121495342322979 0.0015959872321021917 30 | 14500 29 0.03988163535365545 0.059904976082456964 0.013447578454528614 0.00950884177594548 0.8116870236466682 0.0015959872321021917 31 | 15000 30 0.04000825616686082 0.06008586196447147 0.013489506204433763 0.009538488740307851 0.8111127939547914 0.0015959872321021917 32 | 15500 31 0.04010855156811089 0.06024135827909471 0.013525384014945458 0.009563858303586695 0.8105844896097976 0.0015959872321021917 33 | 16000 32 0.04018360699794089 0.06036461840141216 0.013547252858549397 0.009579322352237588 0.8102525894078564 0.0015959872321021917 34 | 16500 33 0.040370160131980665 0.06061522296395848 0.013583014993106617 0.009604610159578685 0.8098206957201488 0.0015959872321021917 35 | 17000 34 0.04045603865637825 0.060752329411052214 0.013616260958217998 0.009628117600913685 0.8094070418865223 0.0015959872321021917 36 | 17500 35 0.04057392028168462 0.06091937592624259 0.013644100594509698 0.00964780425677493 0.809091752980262 0.0015959872321021917 37 | 18000 36 0.04066703855529312 0.061058416684033716 0.013666668712304599 0.009663762320028394 0.8087605042016809 0.0015959872321021917 38 | 18500 37 0.04075045048565393 0.06120921638095645 0.013691585138949748 0.009681378985125345 0.8083904957331772 0.0015959872321021917 39 | 19000 38 0.04083007033209359 0.0613661808234831 0.01371606147512148 0.00969868776083971 0.8080146244544331 0.0015959872321021917 40 | 19500 39 0.04091616803198387 0.061525712750352686 0.013736956205424575 0.009713462102483843 0.8077224610774544 0.0015959872321021917 41 | 20000 40 0.04096929895459274 0.06161789170042108 0.013757116002031944 0.009727718076692886 0.8074250863135953 0.0015959872321021917 42 | 20500 41 0.04102421111295172 0.06168608658943228 0.013780491242276116 0.009744244952534521 0.8071664712396589 0.0015959872321021917 43 | 21000 42 0.04103054980735935 0.0616953406657903 0.013792915535581965 0.009753030236866503 0.8069518272425251 0.0015959872321021917 44 | 21500 43 0.04113477297259747 0.06183125730306657 0.013818980894075699 0.009771460982364876 0.8066736694677872 0.0015959872321021917 45 | 22000 44 0.04120781407528037 0.061927360538146944 0.013836534937603312 0.009783873099254464 0.8064834538466551 0.0015959872321021917 46 | 22500 45 0.04127905654733278 0.06199781676160653 0.013850349951594919 0.009793641194327476 0.806355449156407 0.0015959872321021917 47 | 23000 46 0.04136341128020976 0.06212810441559436 0.013865397392860896 0.009804281642651156 0.8061795648491956 0.0015959872321021917 48 | 23500 47 0.0413935009442776 0.062190183264666694 0.013880151730392603 0.009814715091898405 0.8059027099211781 0.0015959872321021917 49 | 24000 48 0.04138021473464931 0.062125961366332506 0.013887421050900147 0.009819855279048393 0.8057958764901311 0.0015959872321021917 50 | 24500 49 0.04144283282501318 0.06218319748070922 0.013904946392875145 0.009832248261569572 0.8056330206501207 0.0015959872321021917 51 | 25000 50 0.04150123439637483 0.06225391810635046 0.013912215713382692 0.009837388448719559 0.8054792847371509 0.0015959872321021917 52 | -------------------------------------------------------------------------------- /logs/reproduce/face_alignment.train_aflw19_farl-b-ep64_448_refinebb/eval.aflw19_test_frontal_0.tsv: -------------------------------------------------------------------------------- 1 | global_step epoch inter_ocular inter_pupil box diag auc_box_7 fr_box_7 2 | 500 1 0.2193900098176489 0.3172616479603667 0.1161577756002069 0.08213595385965147 0.0 1.0 3 | 1000 2 0.1151964718892694 0.16652847824212805 0.06086161945871385 0.043035663970529214 0.1559879865188085 0.0730593607305936 4 | 1500 3 0.041189420168802615 0.05951496745534865 0.02173389074827801 0.01536818000642495 0.7027081974342249 0.0060882800608828 5 | 2000 4 0.0258627242693618 0.03737465903457804 0.013653498262030894 0.009654482024263937 0.8150092411393782 0.003805175038051778 6 | 2500 5 0.023514772296133288 0.0339818429003386 0.012413286182978382 0.008777518250626516 0.8302745161991738 0.003805175038051778 7 | 3000 6 0.0228670677637945 0.03303848045964582 0.01206907144601668 0.008534123001214757 0.8351125244618396 0.0022831050228310223 8 | 3500 7 0.022621678797984594 0.03267881895672059 0.011940147597314378 0.008442959284673543 0.8373032180908895 0.0022831050228310223 9 | 4000 8 0.022508656053238263 0.03251119581713161 0.011877308153126338 0.008398524883856694 0.8379261796042619 0.0022831050228310223 10 | 4500 9 0.022364577201947775 0.03230150359224874 0.011800588901003021 0.008344276492091256 0.8387263535551208 0.0022831050228310223 11 | 5000 10 0.022267562250749887 0.03215836825436109 0.01174949329556214 0.008308147186557997 0.8396656881930855 0.0022831050228310223 12 | 5500 11 0.022276820294570343 0.0321662328014635 0.011751828128344391 0.008309797609233421 0.8398255055446837 0.0022831050228310223 13 | 6000 12 0.022143310244950712 0.03197736464316079 0.011683316717046218 0.008261353276454513 0.8407191780821919 0.0022831050228310223 14 | 6500 13 0.022051841701002425 0.031845490500625774 0.0116370257721644 0.008228619893391928 0.8414051967819092 0.0022831050228310223 15 | 7000 14 0.022001369540186957 0.031774157984253114 0.011610735859863653 0.008210029776237871 0.8417291802565777 0.0022831050228310223 16 | 7500 15 0.02195044304137905 0.031701765829751115 0.01158479141862425 0.008191684972568553 0.8421357903892153 0.0022831050228310223 17 | 8000 16 0.021923965152177274 0.031662817596482 0.011571398608760747 0.008182214275342688 0.8424217221135031 0.0022831050228310223 18 | 8500 17 0.02190620242369955 0.031636413736793365 0.01156153163590569 0.008175237356618478 0.8424630354424876 0.0022831050228310223 19 | 9000 18 0.02190212063956297 0.0316303375104791 0.01155878093144665 0.008173292267449552 0.8424619482496196 0.0022831050228310223 20 | 9500 19 0.021901926130646076 0.03163238130566555 0.011560710053465682 0.008174657458765503 0.842424983692107 0.0022831050228310223 21 | 10000 20 0.0218969908297697 0.031625402209602714 0.011558935522488807 0.008173401134380648 0.8425304414003045 0.0022831050228310223 22 | 10500 21 0.021934564436588838 0.03167744350578505 0.011577805790545553 0.008186744591235388 0.8424793433355078 0.0022831050228310223 23 | 11000 22 0.0219369798309001 0.0316803408177113 0.011577582250447034 0.008186587097075069 0.8423597521200262 0.0022831050228310223 24 | 11500 23 0.021963396754620463 0.031717166871603586 0.011590929336199478 0.008196024408442063 0.842128180039139 0.0022831050228310223 25 | 12000 24 0.02196571199135512 0.03171985806214029 0.01159085312934771 0.008195970700756056 0.8419846705805611 0.0022831050228310223 26 | 12500 25 0.02201701444396508 0.031793942734531074 0.01161885878448254 0.008215773595522528 0.8416943900848012 0.0022831050228310223 27 | 13000 26 0.022075047776034976 0.03187806058329171 0.011648977183860187 0.008237070870363187 0.8412051532941944 0.0022831050228310223 28 | 13500 27 0.022087351190807975 0.03189675085802238 0.011654448110037562 0.008240938549535278 0.8408539899978258 0.0022831050228310223 29 | 14000 28 0.02212861611237083 0.03195781214233584 0.011675979811669847 0.00825616467851845 0.840505001087193 0.0022831050228310223 30 | 14500 29 0.02219505832619863 0.03205385600050835 0.011713254397318244 0.008282522088316478 0.840286475320722 0.0022831050228310223 31 | 15000 30 0.022256771360539593 0.03214292076261802 0.0117459318953562 0.008305628731551962 0.8398602957164601 0.0022831050228310223 32 | 15500 31 0.022307986719604676 0.03221652932363014 0.011773646512895234 0.00832522550492889 0.8394439008480106 0.0022831050228310223 33 | 16000 32 0.0223527528016716 0.03228068242878674 0.0117964171202034 0.008341326198258538 0.8391264405305502 0.0022831050228310223 34 | 16500 33 0.022419983212080538 0.03237747238832703 0.011830633270867521 0.008365520785025447 0.8386252446183954 0.0022831050228310223 35 | 17000 34 0.022478068800277362 0.03246246117253645 0.011862945701796534 0.008388369776524003 0.8383338769297675 0.0022831050228310223 36 | 17500 35 0.022536134066647045 0.0325470899700937 0.011894118658846735 0.008410410975393821 0.8380229397695151 0.0022831050228310223 37 | 18000 36 0.022580334040672268 0.03261235787262474 0.011918305262038697 0.008427513970269097 0.8376869971732986 0.0022831050228310223 38 | 18500 37 0.022607051437122456 0.032652418000150125 0.01193220394024203 0.0084373424768085 0.8374304196564472 0.0022831050228310223 39 | 19000 38 0.02262017498277638 0.03267184857001225 0.01193805227178054 0.008441477968631088 0.8372455968688847 0.0022831050228310223 40 | 19500 39 0.022652453302066982 0.03271858586931156 0.011954255300025418 0.008452935124459694 0.836978147423353 0.0022831050228310223 41 | 20000 40 0.022685888513945374 0.03276576734568975 0.01197051929375171 0.00846443582706074 0.8367139595564254 0.0022831050228310223 42 | 20500 41 0.022750215806191732 0.03285907066031678 0.012004872616749013 0.008488727668286096 0.8364171559034574 0.0022831050228310223 43 | 21000 42 0.022759601587211343 0.03287425396830705 0.012010630951624482 0.008492799291509109 0.8363736681887369 0.0022831050228310223 44 | 21500 43 0.02277868958913028 0.032903218378215075 0.012021556836829338 0.008500525214719265 0.8362160252228746 0.0022831050228310223 45 | 22000 44 0.022806350498983306 0.03294353107702061 0.012037071100290144 0.008511493920917018 0.8361083931289413 0.0022831050228310223 46 | 22500 45 0.022815043886321138 0.032956795423905415 0.012042132686806597 0.008515073465611474 0.8360790389215048 0.0022831050228310223 47 | 23000 46 0.022853154570000356 0.03301177757878645 0.012061751959349285 0.008528946741530884 0.8357854968471408 0.0022831050228310223 48 | 23500 47 0.022882936207671144 0.03305407020417885 0.01207685760892871 0.008539628039030543 0.8355734942378779 0.0022831050228310223 49 | 24000 48 0.02291734977037032 0.033102055844288075 0.012094089792562221 0.008551812425958875 0.8353832354859753 0.0022831050228310223 50 | 24500 49 0.022927892443977718 0.033116379829302225 0.012099713858222672 0.008555789697841603 0.8353288758425745 0.0022831050228310223 51 | 25000 50 0.0229278779283869 0.033117651395057436 0.012100026669204688 0.0085560110606015 0.8352723418134379 0.0022831050228310223 52 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | timm==0.4.12 2 | py-blueprint==0.0.1.post3 3 | mmsegmentation==0.18.0 4 | ftfy==6.0.1 5 | einops==0.3.2 6 | --------------------------------------------------------------------------------