├── .gitignore
├── LICENSE
├── README.md
├── WORKSPACE
├── background
    ├── 1.jpg
    ├── 10.jpg
    ├── 100.jpg
    ├── 11.jpg
    ├── 12.jpg
    ├── 13.jpg
    ├── 14.jpg
    ├── 15.jpg
    ├── 16.jpg
    ├── 17.jpg
    ├── 18.jpg
    ├── 19.jpg
    ├── 2.jpg
    ├── 20.jpg
    ├── 21.jpg
    ├── 22.jpg
    ├── 23.jpg
    ├── 24.jpg
    ├── 25.jpg
    ├── 26.jpg
    ├── 27.jpg
    ├── 28.jpg
    ├── 29.jpg
    ├── 3.jpg
    ├── 30.jpg
    ├── 31.jpg
    ├── 32.jpg
    ├── 33.jpg
    ├── 34.jpg
    ├── 35.jpg
    ├── 36.jpg
    ├── 37.jpg
    ├── 38.jpg
    ├── 39.jpg
    ├── 4.jpg
    ├── 40.jpg
    ├── 41.jpg
    ├── 42.jpg
    ├── 43.jpg
    ├── 44.jpg
    ├── 45.jpg
    ├── 46.jpg
    ├── 47.jpg
    ├── 48.jpg
    ├── 49.jpg
    ├── 5.jpg
    ├── 50.jpg
    ├── 51.jpg
    ├── 52.jpg
    ├── 53.jpg
    ├── 54.jpg
    ├── 55.jpg
    ├── 56.jpg
    ├── 57.jpg
    ├── 58.jpg
    ├── 59.jpg
    ├── 6.jpg
    ├── 60.jpg
    ├── 61.jpg
    ├── 62.jpg
    ├── 63.jpg
    ├── 64.jpg
    ├── 65.jpg
    ├── 66.jpg
    ├── 67.jpg
    ├── 68.jpg
    ├── 69.jpg
    ├── 7.jpg
    ├── 70.jpg
    ├── 71.jpg
    ├── 72.jpg
    ├── 73.jpg
    ├── 74.jpg
    ├── 75.jpg
    ├── 76.jpg
    ├── 77.jpg
    ├── 78.jpg
    ├── 79.jpg
    ├── 8.jpg
    ├── 80.jpg
    ├── 81.jpg
    ├── 82.jpg
    ├── 83.jpg
    ├── 84.jpg
    ├── 85.jpg
    ├── 86.jpg
    ├── 87.jpg
    ├── 88.jpg
    ├── 89.jpg
    ├── 9.jpg
    ├── 90.jpg
    ├── 91.jpg
    ├── 92.jpg
    ├── 93.jpg
    ├── 94.jpg
    ├── 95.jpg
    ├── 96.jpg
    ├── 97.jpg
    ├── 98.jpg
    └── 99.jpg
├── config
    ├── BUILD
    ├── configure.py
    └── params.yml
├── datasets
    ├── BUILD
    ├── make_data_from_GRID.py
    ├── makelist_bfm.py
    ├── makelist_pixrefer.py
    └── models.py
├── generator
    ├── BUILD
    ├── generator.py
    ├── loader.py
    └── test_generator.py
├── res
    ├── 1.png
    ├── 2.png
    └── 3.jpg
├── sample
    ├── 22.jpg
    └── test.aac
├── utils
    ├── BUILD
    ├── bfm_load_data.py
    ├── bfm_visual.py
    ├── cython
    │   ├── mesh_core.cpp
    │   ├── mesh_core.h
    │   ├── mesh_core_cython.pyx
    │   └── setup.py
    ├── reconstruct_mesh.py
    └── utils.py
└── voicepuppet
    ├── BUILD
    ├── atvgnet
        ├── BUILD
        ├── __init__.py
        ├── atnet.py
        ├── backbone.py
        ├── infer.py
        ├── plot.py
        ├── test_atnet.py
        ├── test_vgnet.py
        ├── tinynet.py
        ├── train_atnet.py
        ├── train_vgnet.py
        └── vgnet.py
    ├── bfmnet
        ├── BUILD
        ├── bfmnet.py
        ├── infer_bfmnet.py
        ├── tinynet.py
        └── train_bfmnet.py
    ├── builder.py
    ├── pixflow
        ├── BUILD
        ├── infer_bfm_pixflow.py
        ├── infer_pixflow.py
        ├── pixflow.py
        └── train_pixflow.py
    └── pixrefer
        ├── BUILD
        ├── infer_bfmvid.py
        ├── infer_pixrefer.py
        ├── pixrefer.py
        ├── train_pixrefer.py
        └── vgg_simple.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 DongLu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # VoicePuppet #
  4 |  - This repository provided a common pipeline to generate speaking actor by voice input automatically.
  5 |  - For a better feeling, there's a [short video](https://youtu.be/h24MGPTTw5M) to demostrate it.
  6 | 
  7 | ## The archecture of the network ##
  8 |  - Composed of 2 parts, one for predict 3D face coeffcients of each frame align to a certain stride window size of waveform, called BFMNet(basel face model network). The another for redraw the real face foreground using the rasterized face which produced by the rendered 3D face coeffcients of previous step, called PixReferNet.
  9 | 
 10 | <table>
 11 |   <tr>
 12 |     <th>
 13 |       <img src="https://github.com/taylorlu/voicepuppet/blob/master/res/1.png" >
 14 |     </th>
 15 |   </tr>
 16 |   <tr><th>
 17 |     BFMNet component
 18 |   </th></tr>
 19 |     <th>
 20 |       <img src="https://github.com/taylorlu/voicepuppet/blob/master/res/2.png" >
 21 |     </th>
 22 |   </tr>
 23 |   <tr><th>
 24 |     PixReferNet component
 25 |   </th></tr>
 26 | </table>
 27 | 
 28 | 
 29 | ## Run the prediction pipeline ##
 30 | ------------------------
 31 | 
 32 |  1. Download the pretrained model and required models.</br>
 33 |     Baidu Disk: [[ckpt.zip](https://pan.baidu.com/s/1cVIVFhSsEA1MbgqL7H7mMw), code: a6pn], [[allmodels.zip](https://pan.baidu.com/s/11FKHjGjnPtD2c7Ttg-mXng), code: brfh]</br>
 34 |     or Google Drive: [[ckpt.zip](https://drive.google.com/file/d/1RgMSQUL2pzvwCWGgnkvwxHxHeEnZ7FlN/view?usp=sharing)], [[allmodels.zip](https://drive.google.com/file/d/1Z1Pm39sp977nED_HHZtvn5glRrmiThwB/view?usp=sharing)]</br>
 35 |     Extract the `ckpt.zip` to `ckpt_bfmnet` and `ckpt_pixrefer`, extract the `allmodels.zip` to current root dir
 36 |  2. `cd utils/cython` && `python3 setup.py install`
 37 |  3. Install ffmpeg tool if you want to merge the png sequence and audio file to video container like mp4.
 38 |  4. `python3 voicepuppet/pixrefer/infer_bfmvid.py --config_path config/params.yml sample/22.jpg sample/test.aac`
 39 | 
 40 | ## Run the training pipeline ##
 41 | ------------------------
 42 | 
 43 | #### Requirements ####
 44 | 
 45 |  - tensorflow>=1.14.0
 46 |  - pytorch>=1.4.0, only for data preparation (face foreground segmentation and matting)
 47 |  - mxnet>=1.5.1, only for data preparation (face alignment)
 48 |  tips: you can use other models to do the same label marking instead, such as dlib
 49 | 
 50 | #### Data preparation ####
 51 | 
 52 |  1. Check your `config/params.yml` to make sure the dataset folder in specified structure (same as the [grid dataset](http://spandh.dcs.shef.ac.uk/gridcorpus/), you can extend the dataset by using the same folder structure which contains common video files)
 53 | ```
 54 | |- srcdir/
 55 | |    |- s10/
 56 | |        |- video/
 57 | |            |- mpg_6000/
 58 | |                |- bbab8n.mpg
 59 | |                |- bbab9s.mpg
 60 | |                |- bbac1a.mpg
 61 | |                |- ...
 62 | |    |- s8/
 63 | |        |- video/
 64 | |            |- mpg_6000/
 65 | |                |- bbae5n.mpg
 66 | |                |- bbae6s.mpg
 67 | |                |- bbae7p.mpg
 68 | |                |- ...
 69 | ```
 70 |  2. Extract audio stream from mpg video file, `todir` was a output folder which you want to store the labels.</br>
 71 |     `python3 datasets/make_data_from_GRID.py --gpu 0 --step 2  srcdir todir`
 72 | 
 73 |  3. Face detection and alignment</br>
 74 |     `python3 datasets/make_data_from_GRID.py --gpu 0 --step 3 srcdir todir ./allmodels`
 75 | 
 76 |  4. 3D face reconstruction</br>
 77 |     `python3 datasets/make_data_from_GRID.py --gpu 0 --step 4 todir ./allmodels`
 78 | 
 79 |  5. It will take several hours to finish the above steps, subsequently, you'll find there's `*.jpg, landmark.txt, audio.wav, bfmcoeff.txt` in each output subfolder. The above labels(`audio.wav`, `bfmcoeff.txt`) are used for BFMNet training, the others are only temp files.
 80 | ```
 81 | |- todir/
 82 | |    |- s10/
 83 | |        |- bbab8n/
 84 | |            |- landmark.txt
 85 | |            |- audio.wav
 86 | |            |- bfmcoeff.txt
 87 | |            |- 0.jpg
 88 | |            |- 1.jpg
 89 | |            |- ...
 90 | |        |- bbab9s/
 91 | |            |- ...
 92 | |    |- s8/
 93 | |        |- bbae5n/
 94 | |            |- landmark.txt
 95 | |            |- audio.wav
 96 | |            |- bfmcoeff.txt
 97 | |            |- 0.jpg
 98 | |            |- 1.jpg
 99 | |            |- ...
100 | |        |- bbae6s/
101 | |            |- ...
102 | ```
103 |  6. Face(human foreground) segmentation and matting for PixelReferNet training. Before invoke the python shell, you should make sure the width and height of the video was in the same size(1:1). In general, 3-5 minutes video was enough for training the PixelReferNet network, the trained model will only take effect on this specified person too.</br>
104 |     `python3 datasets/make_data_from_GRID.py --gpu 0 --step 6 src_dir to_dvp_dir ./allmodels`</br>
105 | the `src_dir` has the same folder structure as [tip1 in Data preparation], when finish the above step, you will find `*.jpg` in subfolders, like this
106 | <div align="center">
107 | <img src="https://github.com/taylorlu/voicepuppet/blob/master/sample/22.jpg">
108 | </div>
109 | 
110 | #### Train BFMNet ####
111 | 
112 | 1. Prepare train and eval txt, check the `root_path` parameter in `config/params.yml` is the output folder of [tip1 in Data preparation]</br>
113 |     `python3 datasets/makelist_bfm.py --config_path config/params.yml`
114 | 2. train the model</br>
115 |     `python3 voicepuppet/bfmnet/train_bfmnet.py --config_path config/params.yml`
116 | 3. Watch the evalalute images every 1000 step in `log/eval_bfmnet`, the upper was the target sequence, and the under was the evaluated sequence.
117 | <div align="center">
118 | <img src="https://github.com/taylorlu/voicepuppet/blob/master/res/3.jpg">
119 | </div>
120 | 
121 | #### Train PixReferNet ####
122 | 
123 | 1. Prepare train and eval txt, check the `root_path` parameter in `config/params.yml` is the output folder of [tip6 in Data preparation]</br>
124 |     `python3 datasets/makelist_pixrefer.py --config_path config/params.yml`
125 | 2. train the model</br>
126 |     `python3 voicepuppet/pixrefer/train_pixrefer.py --config_path config/params.yml`
127 | 3. Use tensorboard to watch the training process</br>
128 |     `tensorboard --logdir=log/summary_pixrefer`
129 | 
130 | ## Acknowledgement ##
131 | 1. The face alignment model was refer to [Deepinx's work](https://github.com/deepinx/deep-face-alignment), it's more stable than Dlib.
132 | 2. 3D face reconstruction model was refer to [microsoft's work](https://github.com/microsoft/Deep3DFaceReconstruction)
133 | 3. Image segmentation model was refer to [gasparian's work](https://github.com/gasparian/PicsArtHack-binary-segmentation)
134 | 4. Image matting model was refer to [foamliu's work](https://github.com/foamliu/Deep-Image-Matting)
135 | 


--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
1 | workspace(name = "voicepuppet")
2 | 


--------------------------------------------------------------------------------
/background/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/1.jpg


--------------------------------------------------------------------------------
/background/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/10.jpg


--------------------------------------------------------------------------------
/background/100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/100.jpg


--------------------------------------------------------------------------------
/background/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/11.jpg


--------------------------------------------------------------------------------
/background/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/12.jpg


--------------------------------------------------------------------------------
/background/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/13.jpg


--------------------------------------------------------------------------------
/background/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/14.jpg


--------------------------------------------------------------------------------
/background/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/15.jpg


--------------------------------------------------------------------------------
/background/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/16.jpg


--------------------------------------------------------------------------------
/background/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/17.jpg


--------------------------------------------------------------------------------
/background/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/18.jpg


--------------------------------------------------------------------------------
/background/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/19.jpg


--------------------------------------------------------------------------------
/background/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/2.jpg


--------------------------------------------------------------------------------
/background/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/20.jpg


--------------------------------------------------------------------------------
/background/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/21.jpg


--------------------------------------------------------------------------------
/background/22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/22.jpg


--------------------------------------------------------------------------------
/background/23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/23.jpg


--------------------------------------------------------------------------------
/background/24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/24.jpg


--------------------------------------------------------------------------------
/background/25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/25.jpg


--------------------------------------------------------------------------------
/background/26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/26.jpg


--------------------------------------------------------------------------------
/background/27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/27.jpg


--------------------------------------------------------------------------------
/background/28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/28.jpg


--------------------------------------------------------------------------------
/background/29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/29.jpg


--------------------------------------------------------------------------------
/background/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/3.jpg


--------------------------------------------------------------------------------
/background/30.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/30.jpg


--------------------------------------------------------------------------------
/background/31.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/31.jpg


--------------------------------------------------------------------------------
/background/32.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/32.jpg


--------------------------------------------------------------------------------
/background/33.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/33.jpg


--------------------------------------------------------------------------------
/background/34.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/34.jpg


--------------------------------------------------------------------------------
/background/35.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/35.jpg


--------------------------------------------------------------------------------
/background/36.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/36.jpg


--------------------------------------------------------------------------------
/background/37.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/37.jpg


--------------------------------------------------------------------------------
/background/38.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/38.jpg


--------------------------------------------------------------------------------
/background/39.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/39.jpg


--------------------------------------------------------------------------------
/background/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/4.jpg


--------------------------------------------------------------------------------
/background/40.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/40.jpg


--------------------------------------------------------------------------------
/background/41.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/41.jpg


--------------------------------------------------------------------------------
/background/42.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/42.jpg


--------------------------------------------------------------------------------
/background/43.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/43.jpg


--------------------------------------------------------------------------------
/background/44.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/44.jpg


--------------------------------------------------------------------------------
/background/45.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/45.jpg


--------------------------------------------------------------------------------
/background/46.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/46.jpg


--------------------------------------------------------------------------------
/background/47.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/47.jpg


--------------------------------------------------------------------------------
/background/48.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/48.jpg


--------------------------------------------------------------------------------
/background/49.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/49.jpg


--------------------------------------------------------------------------------
/background/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/5.jpg


--------------------------------------------------------------------------------
/background/50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/50.jpg


--------------------------------------------------------------------------------
/background/51.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/51.jpg


--------------------------------------------------------------------------------
/background/52.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/52.jpg


--------------------------------------------------------------------------------
/background/53.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/53.jpg


--------------------------------------------------------------------------------
/background/54.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/54.jpg


--------------------------------------------------------------------------------
/background/55.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/55.jpg


--------------------------------------------------------------------------------
/background/56.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/56.jpg


--------------------------------------------------------------------------------
/background/57.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/57.jpg


--------------------------------------------------------------------------------
/background/58.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/58.jpg


--------------------------------------------------------------------------------
/background/59.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/59.jpg


--------------------------------------------------------------------------------
/background/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/6.jpg


--------------------------------------------------------------------------------
/background/60.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/60.jpg


--------------------------------------------------------------------------------
/background/61.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/61.jpg


--------------------------------------------------------------------------------
/background/62.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/62.jpg


--------------------------------------------------------------------------------
/background/63.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/63.jpg


--------------------------------------------------------------------------------
/background/64.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/64.jpg


--------------------------------------------------------------------------------
/background/65.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/65.jpg


--------------------------------------------------------------------------------
/background/66.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/66.jpg


--------------------------------------------------------------------------------
/background/67.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/67.jpg


--------------------------------------------------------------------------------
/background/68.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/68.jpg


--------------------------------------------------------------------------------
/background/69.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/69.jpg


--------------------------------------------------------------------------------
/background/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/7.jpg


--------------------------------------------------------------------------------
/background/70.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/70.jpg


--------------------------------------------------------------------------------
/background/71.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/71.jpg


--------------------------------------------------------------------------------
/background/72.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/72.jpg


--------------------------------------------------------------------------------
/background/73.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/73.jpg


--------------------------------------------------------------------------------
/background/74.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/74.jpg


--------------------------------------------------------------------------------
/background/75.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/75.jpg


--------------------------------------------------------------------------------
/background/76.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/76.jpg


--------------------------------------------------------------------------------
/background/77.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/77.jpg


--------------------------------------------------------------------------------
/background/78.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/78.jpg


--------------------------------------------------------------------------------
/background/79.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/79.jpg


--------------------------------------------------------------------------------
/background/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/8.jpg


--------------------------------------------------------------------------------
/background/80.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/80.jpg


--------------------------------------------------------------------------------
/background/81.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/81.jpg


--------------------------------------------------------------------------------
/background/82.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/82.jpg


--------------------------------------------------------------------------------
/background/83.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/83.jpg


--------------------------------------------------------------------------------
/background/84.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/84.jpg


--------------------------------------------------------------------------------
/background/85.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/85.jpg


--------------------------------------------------------------------------------
/background/86.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/86.jpg


--------------------------------------------------------------------------------
/background/87.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/87.jpg


--------------------------------------------------------------------------------
/background/88.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/88.jpg


--------------------------------------------------------------------------------
/background/89.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/89.jpg


--------------------------------------------------------------------------------
/background/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/9.jpg


--------------------------------------------------------------------------------
/background/90.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/90.jpg


--------------------------------------------------------------------------------
/background/91.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/91.jpg


--------------------------------------------------------------------------------
/background/92.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/92.jpg


--------------------------------------------------------------------------------
/background/93.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/93.jpg


--------------------------------------------------------------------------------
/background/94.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/94.jpg


--------------------------------------------------------------------------------
/background/95.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/95.jpg


--------------------------------------------------------------------------------
/background/96.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/96.jpg


--------------------------------------------------------------------------------
/background/97.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/97.jpg


--------------------------------------------------------------------------------
/background/98.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/98.jpg


--------------------------------------------------------------------------------
/background/99.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/99.jpg


--------------------------------------------------------------------------------
/config/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "configure",
10 |     srcs = ["configure.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 


--------------------------------------------------------------------------------
/config/configure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | import yaml
 4 | from tensorflow.contrib.training import HParams
 5 | 
 6 | 
 7 | class YParams(HParams):
 8 |   def __init__(self, yaml_fn, config_name):
 9 |     HParams.__init__(self)
10 |     with open(yaml_fn) as fp:
11 |       for k, v in yaml.load(fp, Loader=yaml.FullLoader)[config_name].items():
12 |         self.add_hparam(k, v)
13 | 


--------------------------------------------------------------------------------
/config/params.yml:
--------------------------------------------------------------------------------
 1 | default: &DEFAULT
 2 |   train_dataset_path: config/train.txt
 3 |   eval_dataset_path: config/eval.txt
 4 | 
 5 |   root_path: /media/dong/DiskData/gridcorpus/todir # used by makelist_*
 6 |   # root_path: /media/dong/DiskData/gridcorpus/todir_vid2vid
 7 |   train_by_eval: 9  # train/eval
 8 | 
 9 |   sample_file:  # used by generator
10 |     landmark_name: landmark.txt
11 |     wav_name: audio.wav
12 |     bfmcoeff_name: bfmcoeff.txt
13 | 
14 |   model_dir: ./allmodels
15 | 
16 |   mel:
17 |     sample_rate: 16000
18 |     num_mel_bins: 80
19 |     win_length: 512
20 |     fft_length: 512
21 |     hop_step: 128
22 | 
23 |   frame_rate: 25
24 | 
25 |   training:
26 |     epochs: 100000
27 |     drop_rate: 0.25
28 |     learning_rate: 0.001
29 |     max_grad_norm: 50
30 |     decay_steps: 1000
31 |     decay_rate: 0.95
32 | 


--------------------------------------------------------------------------------
/datasets/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "models",
10 |     srcs = ["models.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 
15 | py_binary(
16 |     name = "makelist_bfm",
17 |     srcs = ["makelist_bfm.py"],
18 |     deps = [
19 |     "//config:configure"
20 |     ],
21 | )
22 | 
23 | py_binary(
24 |     name = "makelist_pixrefer",
25 |     srcs = ["makelist_pixrefer.py"],
26 |     deps = [
27 |     "//config:configure"
28 |     ],
29 | )
30 | 
31 | py_binary(
32 |     name = "make_data_from_GRID",
33 |     srcs = ["make_data_from_GRID.py"],
34 |     deps = [
35 |       "//utils:bfm_load_data",
36 |       "//utils:reconstruct_mesh",
37 |       "//utils:utils",
38 |       ":models"
39 |     ],
40 | )


--------------------------------------------------------------------------------
/datasets/makelist_bfm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from optparse import OptionParser
 4 | import json
 5 | import logging
 6 | import sys
 7 | 
 8 | sys.path.append(os.getcwd())
 9 | from config.configure import YParams
10 | 
11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def write_dataset(params):
16 |   train_dataset_path = params.train_dataset_path
17 |   eval_dataset_path = params.eval_dataset_path
18 |   root_path = params.root_path
19 |   train_by_eval = params.train_by_eval
20 | 
21 |   landmark_name = params.sample_file['landmark_name']
22 |   wav_name = params.sample_file['wav_name']
23 |   bfmcoeff_name = params.sample_file['bfmcoeff_name']
24 | 
25 |   sample_index = 0
26 | 
27 |   with open(train_dataset_path, "w") as train_file:
28 |     with open(eval_dataset_path, "w") as eval_file:
29 |       for root, subdirs, files in os.walk(root_path):
30 |         if not subdirs:
31 |           if (os.path.exists(os.path.join(root, landmark_name)) and
32 |               os.path.exists(os.path.join(root, wav_name)) and
33 |               os.path.exists(os.path.join(root, bfmcoeff_name))):
34 | 
35 |             logger.info('Processing {}'.format(root))
36 |             count = 0
37 |             for file in files:
38 |               if (file.endswith('.jpg')):
39 |                 count += 1
40 | 
41 |             sample_index += 1
42 |             if (sample_index % (train_by_eval + 1) == 0):
43 |               eval_file.write("{}|{}\n".format(root, count))
44 |             else:
45 |               train_file.write("{}|{}\n".format(root, count))
46 | 
47 | 
48 | if (__name__ == '__main__'):
49 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
50 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
51 |                         help='the config json file')
52 | 
53 |   opts, argv = cmd_parser.parse_args()
54 | 
55 |   if (not opts.config_path is None):
56 |     config_path = opts.config_path
57 | 
58 |     if (not os.path.exists(config_path)):
59 |       logger.error('config_path not exists')
60 |       exit(0)
61 | 
62 |     params = YParams(config_path, 'default')
63 |     write_dataset(params)
64 |   else:
65 |     print('Please check your parameters.')
66 | 


--------------------------------------------------------------------------------
/datasets/makelist_pixrefer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from optparse import OptionParser
 4 | import json
 5 | import logging
 6 | import sys
 7 | 
 8 | sys.path.append(os.getcwd())
 9 | from config.configure import YParams
10 | 
11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def write_dataset(params):
16 |   train_dataset_path = params.train_dataset_path
17 |   eval_dataset_path = params.eval_dataset_path
18 |   root_path = params.root_path
19 |   train_by_eval = params.train_by_eval
20 | 
21 |   sample_index = 0
22 | 
23 |   with open(train_dataset_path, "w") as train_file:
24 |     with open(eval_dataset_path, "w") as eval_file:
25 |       for root, subdirs, files in os.walk(root_path):
26 |         if not subdirs:
27 |             logger.info('Processing {}'.format(root))
28 |             count = 0
29 |             for file in files:
30 |               if (file.endswith('.jpg')):
31 |                 count += 1
32 | 
33 |             sample_index += 1
34 |             if (sample_index % (train_by_eval + 1) == 0):
35 |               eval_file.write("{}|{}\n".format(root, count))
36 |             else:
37 |               train_file.write("{}|{}\n".format(root, count))
38 | 
39 | 
40 | if (__name__ == '__main__'):
41 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
42 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
43 |                         help='the config json file')
44 | 
45 |   opts, argv = cmd_parser.parse_args()
46 | 
47 |   if (not opts.config_path is None):
48 |     config_path = opts.config_path
49 | 
50 |     if (not os.path.exists(config_path)):
51 |       logger.error('config_path not exists')
52 |       exit(0)
53 | 
54 |     params = YParams(config_path, 'default')
55 |     write_dataset(params)
56 |   else:
57 |     print('Please check your parameters.')
58 | 


--------------------------------------------------------------------------------
/generator/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "loader",
10 |     srcs = ["loader.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 
15 | py_library(
16 |     name = "generator",
17 |     srcs = ["generator.py"],
18 |     deps = [
19 |         ":loader",
20 |         "//config:configure"
21 |     ],
22 | )
23 | 
24 | py_library(
25 |     name = "test_generator",
26 |     srcs = ["test_generator.py"],
27 |     deps = [
28 |         ":generator"
29 |     ],
30 | )
31 | 


--------------------------------------------------------------------------------
/generator/loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import librosa
  4 | import cv2
  5 | from scipy.io import wavfile
  6 | import resampy
  7 | 
  8 | 
  9 | class Loader:
 10 |   ### root_path: None if the file_path is full path
 11 |   def __init__(self, root_path=None):
 12 |     self.root_path = root_path
 13 | 
 14 |   ### load txt data, each line split by comma, default float format
 15 |   ### file_path: file name in root_path, or full path.
 16 |   ### return: numpy array(float32)
 17 |   def get_text_data(self, file_path):
 18 |     if (self.root_path):
 19 |       file_path = os.path.join(self.root_path, file_path)
 20 | 
 21 |     with open(file_path) as f:
 22 |       lines = f.readlines()
 23 |       data_list = []
 24 |       for line in lines:
 25 |         pts = line.strip().split(',')
 26 |         if (len(pts) != 0):
 27 |           pts = list(map(lambda x: np.float32(x), pts))
 28 |           data_list.append(np.array(pts))
 29 | 
 30 |     return np.array(data_list)
 31 | 
 32 |   ### load binary data of pickle format.
 33 |   ### file_path: file name in root_path, or full path.
 34 |   ### return: numpy array(float32)
 35 |   def get_bin_data(self, file_path):
 36 |     if (self.root_path):
 37 |       file_path = os.path.join(self.root_path, file_path)
 38 | 
 39 |     if (file_path.endswith('.npy') or file_path.endswith('.npz')):
 40 |       data = np.load(file_path)
 41 |     return data
 42 | 
 43 | 
 44 | class EarLoader(Loader):
 45 | 
 46 |   def get_data(self, file_path):
 47 |     data = self.get_text_data(file_path)
 48 |     return data
 49 | 
 50 | 
 51 | class PoseLoader(Loader):
 52 | 
 53 |   def get_data(self, file_path):
 54 |     data = self.get_text_data(file_path)
 55 |     return data
 56 | 
 57 | 
 58 | class LandmarkLoader(Loader):
 59 |   def __init__(self, root_path=None, norm_size=128):
 60 |     Loader.__init__(self, root_path)
 61 |     self.norm_size = norm_size
 62 | 
 63 |   def get_data(self, file_path):
 64 |     data = self.get_text_data(file_path).astype(np.float32)
 65 |     data /= self.norm_size
 66 |     return data
 67 | 
 68 | 
 69 | class BFMCoeffLoader(Loader):
 70 | 
 71 |   def get_data(self, file_path):
 72 |     data = self.get_text_data(file_path)
 73 |     return data
 74 | 
 75 | 
 76 | class ImageLoader(Loader):
 77 |   def __init__(self, root_path=None, resize=None):
 78 |     Loader.__init__(self, root_path)
 79 |     self.resize = resize
 80 | 
 81 |   def get_data(self, file_path):
 82 |     if (self.root_path):
 83 |       file_path = os.path.join(self.root_path, file_path)
 84 | 
 85 |     data = cv2.imread(file_path).astype(np.float32)
 86 |     if (self.resize is not None):
 87 |       data = cv2.resize(data, (self.resize[0], self.resize[1]))
 88 |     data /= 255.0
 89 |     return data
 90 | 
 91 | 
 92 | class WavLoader(Loader):
 93 |   def __init__(self, root_path=None, sr=16000):
 94 |     self.sr = sr
 95 |     Loader.__init__(self, root_path)
 96 | 
 97 |   def get_data(self, file_path):
 98 |     if (self.root_path):
 99 |       file_path = os.path.join(self.root_path, file_path)
100 | 
101 |     data, _ = librosa.load(file_path, sr=self.sr)
102 |     return data
103 | 
104 | 
105 | class AudioLoader(Loader):
106 |   def __init__(self, root_path=None, sr=16000):
107 |     self.sr = sr
108 |     Loader.__init__(self, root_path)
109 | 
110 |   def get_data(self, file_path):
111 |     if (self.root_path):
112 |       file_path = os.path.join(self.root_path, file_path)
113 | 
114 |     rate, data = wavfile.read(file_path)
115 |     if data.ndim != 1:
116 |         data = data[:,0]
117 | 
118 |     data = resampy.resample(data.astype(np.float32), rate, self.sr)
119 |     return data
120 | 


--------------------------------------------------------------------------------
/generator/test_generator.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import os
  3 | import numpy as np
  4 | from generator import ATNetDataGenerator
  5 | from generator import VGNetDataGenerator
  6 | 
  7 | 
  8 | class GeneratorTest(tf.test.TestCase):
  9 | 
 10 |   def testATNetGenerator(self):
 11 |     config_path = 'config/params.yml'
 12 |     batch_size = 2
 13 |     landmark_size = 136
 14 |     ### Generator for training setting
 15 |     generator = ATNetDataGenerator(config_path)
 16 |     params = generator.params
 17 |     params.dataset_path = params.train_dataset_path
 18 |     params.batch_size = batch_size
 19 |     generator.set_params(params)
 20 |     dataset = generator.get_dataset()
 21 | 
 22 |     sess = tf.Session()
 23 |     tf.train.start_queue_runners(sess=sess)
 24 | 
 25 |     iterator = dataset.make_one_shot_iterator()
 26 |     landmark, ears, poses, mfccs, example_landmark, seq_len = sess.run(iterator.get_next())
 27 | 
 28 |     frame_mfcc_scale = params.mel['sample_rate'] / params.frame_rate / params.mel['hop_step']
 29 | 
 30 |     assert (frame_mfcc_scale - int(frame_mfcc_scale) == 0), "sample_rate/hop_step must divided by frame_rate."
 31 | 
 32 |     ## Test seq_len value range
 33 |     self.assertAllGreaterEqual(seq_len, params.min_squence_len)
 34 |     self.assertAllLessEqual(seq_len, params.max_squence_len)
 35 | 
 36 |     max_seq_len = np.max(seq_len)
 37 | 
 38 |     ## Test seq_len shape, [batch_size]
 39 |     self.assertAllEqual(seq_len.shape, [params.batch_size])
 40 |     ## Test landmark shape, [batch_size, padding_time, landmark_size]
 41 |     self.assertAllEqual(landmark.shape, [params.batch_size, max_seq_len, landmark_size])
 42 |     ## Test ears shape, [batch_size, padding_time, 1]
 43 |     self.assertAllEqual(ears.shape, [params.batch_size, max_seq_len, 1])
 44 |     ## Test poses shape, [batch_size, padding_time, 3]
 45 |     self.assertAllEqual(poses.shape, [params.batch_size, max_seq_len, 3])
 46 |     ## Test mfccs shape, [batch_size, padding_time, num_mel_bins]
 47 |     self.assertAllEqual(mfccs.shape, [params.batch_size, max_seq_len * frame_mfcc_scale, params.mel['num_mel_bins']])
 48 |     ## Test example_landmark shape, [batch_size, landmark_size]
 49 |     self.assertAllEqual(example_landmark.shape, [params.batch_size, landmark_size])
 50 | 
 51 |     ## Test the range of value, landmark [-1, 1]
 52 |     self.assertAllGreaterEqual(landmark, -1)
 53 |     self.assertAllLessEqual(landmark, 1)
 54 |     self.assertAllGreaterEqual(example_landmark, -1)
 55 |     self.assertAllLessEqual(example_landmark, 1)
 56 | 
 57 |     ## Test the range of value, ears [0, 1]
 58 |     self.assertAllGreaterEqual(ears, 0)
 59 |     self.assertAllLessEqual(ears, 1)
 60 | 
 61 |     ## Test the range of value, poses [-1, 1]
 62 |     self.assertAllGreaterEqual(poses, -1)
 63 |     self.assertAllLessEqual(poses, 1)
 64 | 
 65 |   def testVGNetGenerator(self):
 66 |     config_path = 'config/params.yml'
 67 |     batch_size = 2
 68 |     landmark_size = 136
 69 |     ### Generator for training setting
 70 |     generator = VGNetDataGenerator(config_path)
 71 |     params = generator.params
 72 |     params.dataset_path = params.train_dataset_path
 73 |     params.batch_size = batch_size
 74 |     generator.set_params(params)
 75 |     dataset = generator.get_dataset()
 76 | 
 77 |     sess = tf.Session()
 78 |     tf.train.start_queue_runners(sess=sess)
 79 | 
 80 |     iterator = dataset.make_one_shot_iterator()
 81 |     real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img, seq_len = sess.run(
 82 |       iterator.get_next())
 83 | 
 84 |     ## Test seq_len value range
 85 |     self.assertAllGreaterEqual(seq_len, params.min_squence_len)
 86 |     self.assertAllLessEqual(seq_len, params.max_squence_len)
 87 | 
 88 |     max_seq_len = np.max(seq_len)
 89 | 
 90 |     ## Test seq_len shape, [batch_size]
 91 |     self.assertAllEqual(seq_len.shape, [params.batch_size])
 92 |     ## Test real_landmark_seq shape, [batch_size, padding_time, landmark_size]
 93 |     self.assertAllEqual(real_landmark_seq.shape, [params.batch_size, max_seq_len, landmark_size])
 94 |     ## Test real_mask_seq shape, [batch_size, padding_time, img_height, img_width, 1]
 95 |     self.assertAllEqual(real_mask_seq.shape, [params.batch_size, max_seq_len, params.img_size, params.img_size, 1])
 96 |     ## Test real_img_seq shape, [batch_size, padding_time, img_height, img_width, 3]
 97 |     self.assertAllEqual(real_img_seq.shape, [params.batch_size, max_seq_len, params.img_size, params.img_size, 3])
 98 |     ## Test example_landmark shape, [batch_size, 136]
 99 |     self.assertAllEqual(example_landmark.shape, [params.batch_size, landmark_size])
100 |     ## Test example_img shape, [batch_size, img_height, img_width, 3]
101 |     self.assertAllEqual(example_img.shape, [params.batch_size, params.img_size, params.img_size, 3])
102 | 
103 |     ## Test the range of value, real_landmark_seq [-1, 1]
104 |     self.assertAllGreaterEqual(real_landmark_seq, -1)
105 |     self.assertAllLessEqual(real_landmark_seq, 1)
106 |     self.assertAllGreaterEqual(example_landmark, -1)
107 |     self.assertAllLessEqual(example_landmark, 1)
108 | 
109 |     ## Test the range of value, real_mask_seq [0, 1]
110 |     self.assertAllGreaterEqual(real_mask_seq, 0)
111 |     self.assertAllLessEqual(real_mask_seq, 1)
112 | 
113 |     ## Test the range of value, real_img_seq [-1, 1]
114 |     self.assertAllGreaterEqual(real_img_seq, -1)
115 |     self.assertAllLessEqual(real_img_seq, 1)
116 |     self.assertAllGreaterEqual(example_img, -1)
117 |     self.assertAllLessEqual(example_img, 1)
118 | 
119 | 
120 | if (__name__ == '__main__'):
121 |   tf.test.main()
122 | 


--------------------------------------------------------------------------------
/res/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/res/1.png


--------------------------------------------------------------------------------
/res/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/res/2.png


--------------------------------------------------------------------------------
/res/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/res/3.jpg


--------------------------------------------------------------------------------
/sample/22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/sample/22.jpg


--------------------------------------------------------------------------------
/sample/test.aac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/sample/test.aac


--------------------------------------------------------------------------------
/utils/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "bfm_load_data",
10 |     srcs = ["bfm_load_data.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 
15 | py_library(
16 |     name = "reconstruct_mesh",
17 |     srcs = ["reconstruct_mesh.py"],
18 |     deps = [
19 |     ],
20 | )
21 | 
22 | py_library(
23 |     name = "bfm_visual",
24 |     srcs = ["bfm_visual.py"],
25 |     deps = [
26 |     ],
27 | )
28 | 
29 | py_library(
30 |     name = "utils",
31 |     srcs = ["utils.py"],
32 |     deps = [
33 |     ],
34 | )


--------------------------------------------------------------------------------
/utils/bfm_load_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | from scipy.io import loadmat, savemat
  4 | from array import array
  5 | import os
  6 | 
  7 | 
  8 | # define facemodel for reconstruction
  9 | class BFM():
 10 |   def __init__(self, model_dir='BFM'):
 11 |     model_path = os.path.join(model_dir, 'BFM_model_front.mat')
 12 |     model = loadmat(model_path)
 13 |     self.meanshape = model['meanshape']  # mean face shape
 14 |     self.idBase = model['idBase']  # identity basis
 15 |     self.exBase = model['exBase']  # expression basis
 16 |     self.meantex = model['meantex']  # mean face texture
 17 |     self.texBase = model['texBase']  # texture basis
 18 |     self.point_buf = model[
 19 |       'point_buf']  # adjacent face index for each vertex, starts from 1 (only used for calculating face normal)
 20 |     self.tri = model['tri']  # vertex index for each triangle face, starts from 1
 21 |     self.keypoints = np.squeeze(model['keypoints']).astype(np.int32) - 1  # 68 face landmark index, starts from 0
 22 | 
 23 | 
 24 | # load expression basis
 25 | def LoadExpBasis(model_dir='BFM'):
 26 |   n_vertex = 53215
 27 |   Expbin = open(os.path.join(model_dir, 'Exp_Pca.bin'), 'rb')
 28 |   exp_dim = array('i')
 29 |   exp_dim.fromfile(Expbin, 1)
 30 |   expMU = array('f')
 31 |   expPC = array('f')
 32 |   expMU.fromfile(Expbin, 3 * n_vertex)
 33 |   expPC.fromfile(Expbin, 3 * exp_dim[0] * n_vertex)
 34 | 
 35 |   expPC = np.array(expPC)
 36 |   expPC = np.reshape(expPC, [exp_dim[0], -1])
 37 |   expPC = np.transpose(expPC)
 38 | 
 39 |   expEV = np.loadtxt(os.path.join(model_dir, 'std_exp.txt'))
 40 | 
 41 |   return expPC, expEV
 42 | 
 43 | 
 44 | # transfer original BFM09 to our face model
 45 | def transferBFM09(model_dir='BFM'):
 46 |   original_BFM = loadmat(os.path.join(model_dir, '01_MorphableModel.mat'))
 47 |   shapePC = original_BFM['shapePC']  # shape basis
 48 |   shapeEV = original_BFM['shapeEV']  # corresponding eigen value
 49 |   shapeMU = original_BFM['shapeMU']  # mean face
 50 |   texPC = original_BFM['texPC']  # texture basis
 51 |   texEV = original_BFM['texEV']  # eigen value
 52 |   texMU = original_BFM['texMU']  # mean texture
 53 | 
 54 |   expPC, expEV = LoadExpBasis()
 55 | 
 56 |   # transfer BFM09 to our face model
 57 | 
 58 |   idBase = shapePC * np.reshape(shapeEV, [-1, 199])
 59 |   idBase = idBase / 1e5  # unify the scale to decimeter
 60 |   idBase = idBase[:, :80]  # use only first 80 basis
 61 | 
 62 |   exBase = expPC * np.reshape(expEV, [-1, 79])
 63 |   exBase = exBase / 1e5  # unify the scale to decimeter
 64 |   exBase = exBase[:, :64]  # use only first 64 basis
 65 | 
 66 |   texBase = texPC * np.reshape(texEV, [-1, 199])
 67 |   texBase = texBase[:, :80]  # use only first 80 basis
 68 | 
 69 |   # our face model is cropped align face landmarks which contains only 35709 vertex.
 70 |   # original BFM09 contains 53490 vertex, and expression basis provided by JuYong contains 53215 vertex.
 71 |   # thus we select corresponding vertex to get our face model.
 72 | 
 73 |   index_exp = loadmat(os.path.join(model_dir, 'BFM_front_idx.mat'))
 74 |   index_exp = index_exp['idx'].astype(np.int32) - 1  # starts from 0 (to 53215)
 75 | 
 76 |   index_shape = loadmat(os.path.join(model_dir, 'BFM_exp_idx.mat'))
 77 |   index_shape = index_shape['trimIndex'].astype(np.int32) - 1  # starts from 0 (to 53490)
 78 |   index_shape = index_shape[index_exp]
 79 | 
 80 |   idBase = np.reshape(idBase, [-1, 3, 80])
 81 |   idBase = idBase[index_shape, :, :]
 82 |   idBase = np.reshape(idBase, [-1, 80])
 83 | 
 84 |   texBase = np.reshape(texBase, [-1, 3, 80])
 85 |   texBase = texBase[index_shape, :, :]
 86 |   texBase = np.reshape(texBase, [-1, 80])
 87 | 
 88 |   exBase = np.reshape(exBase, [-1, 3, 64])
 89 |   exBase = exBase[index_exp, :, :]
 90 |   exBase = np.reshape(exBase, [-1, 64])
 91 | 
 92 |   meanshape = np.reshape(shapeMU, [-1, 3]) / 1e5
 93 |   meanshape = meanshape[index_shape, :]
 94 |   meanshape = np.reshape(meanshape, [1, -1])
 95 | 
 96 |   meantex = np.reshape(texMU, [-1, 3])
 97 |   meantex = meantex[index_shape, :]
 98 |   meantex = np.reshape(meantex, [1, -1])
 99 | 
100 |   # other info contains triangles, region used for computing photometric loss,
101 |   # region used for skin texture regularization, and 68 landmarks index etc.
102 |   other_info = loadmat(os.path.join(model_dir, 'facemodel_info.mat'))
103 |   frontmask2_idx = other_info['frontmask2_idx']
104 |   skinmask = other_info['skinmask']
105 |   keypoints = other_info['keypoints']
106 |   point_buf = other_info['point_buf']
107 |   tri = other_info['tri']
108 |   tri_mask2 = other_info['tri_mask2']
109 | 
110 |   # save our face model
111 |   savemat(os.path.join(model_dir, 'BFM_model_front.mat'),
112 |           {'meanshape': meanshape, 'meantex': meantex, 'idBase': idBase, 'exBase': exBase, 'texBase': texBase,
113 |            'tri': tri, 'point_buf': point_buf, 'tri_mask2': tri_mask2 \
114 |             , 'keypoints': keypoints, 'frontmask2_idx': frontmask2_idx, 'skinmask': skinmask})
115 | 
116 | 
117 | # load landmarks for standard face, which is used for image preprocessing
118 | def load_lm3d(model_dir='BFM'):
119 |   Lm3D = loadmat(os.path.join(model_dir, 'similarity_Lm3D_all.mat'))
120 |   Lm3D = Lm3D['lm']
121 | 
122 |   # calculate 5 facial landmarks using 68 landmarks
123 |   lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
124 |   Lm3D = np.stack(
125 |       [Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean(Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :],
126 |        Lm3D[lm_idx[6], :]], axis=0)
127 |   Lm3D = Lm3D[[1, 2, 0, 3, 4], :]
128 | 
129 |   return Lm3D
130 | 
131 | 
132 | # save 3D face to obj file
133 | def save_obj(path, v, f, c):
134 |   with open(path, 'w') as file:
135 |     for i in range(len(v)):
136 |       file.write('v %f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2]))
137 |     # file.write('v %f %f %f\n'%(v[i,0],v[i,1],v[i,2]))
138 | 
139 |     file.write('\n')
140 | 
141 |     for i in range(len(f)):
142 |       file.write('f %d %d %d\n' % (f[i, 0], f[i, 1], f[i, 2]))
143 | 
144 |   file.close()
145 | 
146 | 
147 | # calculating least sqaures problem
148 | def POS(xp, x):
149 |   npts = xp.shape[1]
150 | 
151 |   A = np.zeros([2 * npts, 8])
152 | 
153 |   A[0:2 * npts - 1:2, 0:3] = x.transpose()
154 |   A[0:2 * npts - 1:2, 3] = 1
155 | 
156 |   A[1:2 * npts:2, 4:7] = x.transpose()
157 |   A[1:2 * npts:2, 7] = 1;
158 | 
159 |   b = np.reshape(xp.transpose(), [2 * npts, 1])
160 | 
161 |   k, _, _, _ = np.linalg.lstsq(A, b)
162 | 
163 |   R1 = k[0:3]
164 |   R2 = k[4:7]
165 |   sTx = k[3]
166 |   sTy = k[7]
167 |   s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2
168 |   t = np.stack([sTx, sTy], axis=0)
169 | 
170 |   return t, s
171 | 
172 | 
173 | def process_img(img, lm, t, s):
174 |   w0, h0 = img.size
175 |   img = img.transform(img.size, Image.AFFINE, (1, 0, t[0] - w0 / 2, 0, 1, h0 / 2 - t[1]))
176 |   w = (w0 / s * 102).astype(np.int32)
177 |   h = (h0 / s * 102).astype(np.int32)
178 |   img = img.resize((w, h), resample=Image.BILINEAR)
179 |   lm = np.stack([lm[:, 0] - t[0] + w0 / 2, lm[:, 1] - t[1] + h0 / 2], axis=1) / s * 102
180 | 
181 |   # crop the image to 224*224 from image center
182 |   left = (w / 2 - 112).astype(np.int32)
183 |   right = left + 224
184 |   up = (h / 2 - 112).astype(np.int32)
185 |   below = up + 224
186 | 
187 |   img = img.crop((left, up, right, below))
188 |   img = np.array(img)
189 |   img = img[:, :, ::-1]
190 |   img = np.expand_dims(img, 0)
191 |   lm = lm - np.reshape(np.array([(w / 2 - 112), (h / 2 - 112)]), [1, 2])
192 | 
193 |   return img, lm, t[0] - w0 / 2, h0 / 2 - t[1]
194 | 
195 | 
196 | # resize and crop input images before sending to the R-Net
197 | def Preprocess(img, lm, lm3D):
198 |   w0, h0 = img.size
199 | 
200 |   # change from image plane coordinates to 3D sapce coordinates(X-Y plane)
201 |   lm = np.stack([lm[:, 0], h0 - 1 - lm[:, 1]], axis=1)
202 | 
203 |   # calculate translation and scale factors using 5 facial landmarks and standard landmarks
204 |   t, s = POS(lm.transpose(), lm3D.transpose())
205 |   #   print('t = {}, s = {}'.format(t,s))
206 | 
207 |   # processing the image
208 |   img_new, lm_new, t0, t1 = process_img(img, lm, t, s)
209 |   lm_new = np.stack([lm_new[:, 0], 223 - lm_new[:, 1]], axis=1)
210 |   trans_params = np.array([w0, h0, 102.0 / s, t0, t1])
211 | 
212 |   return img_new, lm_new, trans_params
213 | 


--------------------------------------------------------------------------------
/utils/bfm_visual.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | from PIL import Image
  4 | import os
  5 | import sys
  6 | 
  7 | sys.path.append(os.path.join(os.getcwd(), 'utils'))
  8 | from bfm_load_data import *
  9 | from reconstruct_mesh import *
 10 | import mesh_core_cython
 11 | 
 12 | 
 13 | def isPointInTri(point, tri_points):
 14 |   ''' Judge whether the point is in the triangle
 15 |   Method:
 16 |       http://blackpawn.com/texts/pointinpoly/
 17 |   Args:
 18 |       point: [u, v] or [x, y]
 19 |       tri_points: three vertices(2d points) of a triangle. 2 coords x 3 vertices
 20 |   Returns:
 21 |       bool: true for in triangle
 22 |   '''
 23 |   tp = tri_points
 24 | 
 25 |   # vectors
 26 |   v0 = tp[:, 2] - tp[:, 0]
 27 |   v1 = tp[:, 1] - tp[:, 0]
 28 |   v2 = point - tp[:, 0]
 29 | 
 30 |   # dot products
 31 |   dot00 = np.dot(v0.T, v0)
 32 |   dot01 = np.dot(v0.T, v1)
 33 |   dot02 = np.dot(v0.T, v2)
 34 |   dot11 = np.dot(v1.T, v1)
 35 |   dot12 = np.dot(v1.T, v2)
 36 | 
 37 |   # barycentric coordinates
 38 |   if dot00 * dot11 - dot01 * dot01 == 0:
 39 |     inverDeno = 0
 40 |   else:
 41 |     inverDeno = 1 / (dot00 * dot11 - dot01 * dot01)
 42 | 
 43 |   u = (dot11 * dot02 - dot01 * dot12) * inverDeno
 44 |   v = (dot00 * dot12 - dot01 * dot02) * inverDeno
 45 | 
 46 |   # check if point in triangle
 47 |   return (u >= 0) & (v >= 0) & (u + v < 1)
 48 | 
 49 | 
 50 | def render_texture(vertices, colors, triangles, h, w, c=3):
 51 |   ''' render mesh by z buffer
 52 |   Args:
 53 |       vertices: 3 x nver
 54 |       colors: 3 x nver
 55 |       triangles: 3 x ntri
 56 |       h: height
 57 |       w: width
 58 |   '''
 59 |   # initial
 60 |   image = np.zeros((h, w, c), dtype=np.uint8)
 61 | 
 62 |   depth_buffer = np.zeros([h, w]) - 999999.
 63 |   # triangle depth: approximate the depth to the average value of z in each vertex(v0, v1, v2), since the vertices are closed to each other
 64 |   tri_depth = (vertices[2, triangles[0, :]] + vertices[2, triangles[1, :]] + vertices[2, triangles[2, :]]) / 3.
 65 |   tri_tex = (colors[:, triangles[0, :]] + colors[:, triangles[1, :]] + colors[:, triangles[2, :]]) / 3.
 66 | 
 67 |   for i in range(triangles.shape[1]):
 68 |     tri = triangles[:, i]  # 3 vertex indices
 69 | 
 70 |     # the inner bounding box
 71 |     umin = max(int(np.ceil(np.min(vertices[0, tri]))), 0)
 72 |     umax = min(int(np.floor(np.max(vertices[0, tri]))), w - 1)
 73 | 
 74 |     vmin = max(int(np.ceil(np.min(vertices[1, tri]))), 0)
 75 |     vmax = min(int(np.floor(np.max(vertices[1, tri]))), h - 1)
 76 | 
 77 |     if umax < umin or vmax < vmin:
 78 |       continue
 79 | 
 80 |     for u in range(umin, umax + 1):
 81 |       for v in range(vmin, vmax + 1):
 82 |         if tri_depth[i] > depth_buffer[v, u] and isPointInTri([u, v], vertices[:2, tri]):
 83 |           depth_buffer[v, u] = tri_depth[i]
 84 |           image[v, u, :] = tri_tex[:, i]
 85 |   return image
 86 | 
 87 | 
 88 | def plot_bfm_coeff_seq(save_dir, facemodel, step, seq_len, real_bfm_coeff_seq, bfm_coeff_seq, id_coeff=None, texture_coeff=None):
 89 |   ## 9*10 block
 90 |   block_x = 10
 91 |   block_y = 9
 92 |   img_size = 224
 93 | 
 94 |   def merge_seq(bfm_coeff_seq, big_img, time, h_index):
 95 | 
 96 |     for i in range(time):
 97 |       face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, translation = Reconstruction(
 98 |           bfm_coeff_seq[0, i:i + 1, ...], facemodel)
 99 | 
100 |       face_projection2 = np.concatenate([face_projection, z_buffer], axis=2)
101 |       face_projection = np.squeeze(face_projection2, (0))
102 | 
103 |       shape = np.squeeze(face_projection2, (0))
104 |       color = np.squeeze(face_color, (0))
105 |       color = np.clip(color, 0, 255).astype(np.int32)
106 | 
107 |       new_image = np.zeros((224 * 224 * 3), dtype=np.uint8)
108 |       face_mask = np.zeros((224 * 224), dtype=np.uint8)
109 | 
110 |       vertices = shape.reshape(-1).astype(np.float32).copy()
111 |       triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
112 |       colors = color.reshape(-1).astype(np.float32).copy()
113 |       depth_buffer = (np.zeros((224 * 224)) - 99999.0).astype(np.float32)
114 |       mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
115 |                                           facemodel.tri.shape[0], 224, 224, 3)
116 |       new_image = new_image.reshape([224, 224, 3])
117 | 
118 | 
119 |       # shape = np.squeeze(face_shape, (0))
120 |       # color = np.squeeze(face_color, (0))
121 |       # color = np.clip(color, 0, 255).astype(np.int32)
122 |       # shape[:, :2] = 112 - shape[:, :2] * 112
123 | 
124 |       # new_image = render_texture(shape.T, color.T, (facemodel.tri - 1).astype(int).T, 224, 224, c=3)
125 |       new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
126 | 
127 |       big_img[(i // block_x + h_index) * img_size: (i // block_x + h_index + 1) * img_size,
128 |       (i % block_x) * img_size: (i % block_x + 1) * img_size] = new_image
129 | 
130 |     return big_img
131 | 
132 |   ### We only pick the first sequence of the batch, trim length of 30.
133 |   if (seq_len[0] > 30):
134 |     time = 30
135 |   else:
136 |     time = seq_len[0]
137 | 
138 |   ### We only pick the first sequence of the batch, trim length of 30.
139 |   if (seq_len[0] > 30):
140 |     time = 30
141 |   else:
142 |     time = seq_len[0]
143 | 
144 |   big_img = np.zeros((img_size * block_y, img_size * block_x, 3), dtype=np.uint8)
145 |   big_img = merge_seq(real_bfm_coeff_seq, big_img, time, 0)
146 | 
147 |   if(id_coeff is None or texture_coeff is None):
148 |     bfm_coeff_seq = np.concatenate([real_bfm_coeff_seq[:, :, :80], bfm_coeff_seq[:, :, :], real_bfm_coeff_seq[:, :, 144:]], axis=2)
149 |   else:
150 |     bfm_coeff_seq = np.concatenate([np.tile(id_coeff, (1, real_bfm_coeff_seq.shape[1], 1)), bfm_coeff_seq[:, :, :], np.tile(texture_coeff, (1, real_bfm_coeff_seq.shape[1], 1)), real_bfm_coeff_seq[:, :, 224:]], axis=2)
151 | 
152 |   big_img = merge_seq(bfm_coeff_seq, big_img, time, 3)
153 | 
154 |   cv2.imwrite('{}/bfmnet_{}.jpg'.format(save_dir, step), big_img)
155 | 
156 | 


--------------------------------------------------------------------------------
/utils/cython/mesh_core.h:
--------------------------------------------------------------------------------
 1 | #ifndef MESH_CORE_HPP_
 2 | #define MESH_CORE_HPP_
 3 | 
 4 | #include <stdio.h>
 5 | #include <cmath>
 6 | #include <algorithm>  
 7 | #include <string>
 8 | #include <iostream>
 9 | #include <fstream>
10 | 
11 | using namespace std;
12 | 
13 | class point
14 | {
15 |  public:
16 |     float x;
17 |     float y;
18 | 
19 |     float dot(point p)
20 |     {
21 |         return this->x * p.x + this->y * p.y;
22 |     }
23 | 
24 |     point operator-(const point& p)
25 |     {
26 |         point np;
27 |         np.x = this->x - p.x;
28 |         np.y = this->y - p.y;
29 |         return np;
30 |     }
31 | 
32 |     point operator+(const point& p)
33 |     {
34 |         point np;
35 |         np.x = this->x + p.x;
36 |         np.y = this->y + p.y;
37 |         return np;
38 |     }
39 | 
40 |     point operator*(float s)
41 |     {
42 |         point np;
43 |         np.x = s * this->x;
44 |         np.y = s * this->y;
45 |         return np;
46 |     }
47 | }; 
48 | 
49 | 
50 | bool isPointInTri(point p, point p0, point p1, point p2, int h, int w);
51 | void get_point_weight(float* weight, point p, point p0, point p1, point p2);
52 | 
53 | void _get_normal_core(
54 |     float* normal, float* tri_normal, int* triangles,
55 |     int ntri);
56 | 
57 | void _rasterize_triangles_core(
58 |     float* vertices, int* triangles, 
59 |     float* depth_buffer, int* triangle_buffer, float* barycentric_weight,
60 |     int nver, int ntri,
61 |     int h, int w);
62 | 
63 | void _render_colors_core(
64 |     unsigned char* image, unsigned char *face_mask, float* vertices, int* triangles, 
65 |     float* colors, 
66 |     float* depth_buffer,
67 |     int ntri,
68 |     int h, int w, int c);
69 | 
70 | void _render_texture_core(
71 |     float* image, float* vertices, int* triangles, 
72 |     float* texture, float* tex_coords, int* tex_triangles, 
73 |     float* depth_buffer,
74 |     int nver, int tex_nver, int ntri, 
75 |     int h, int w, int c, 
76 |     int tex_h, int tex_w, int tex_c, 
77 |     int mapping_type);
78 | 
79 | #endif


--------------------------------------------------------------------------------
/utils/cython/mesh_core_cython.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | from libcpp.string cimport string
  4 | 
  5 | # use the Numpy-C-API from Cython
  6 | np.import_array()
  7 | 
  8 | # cdefine the signature of our c function
  9 | cdef extern from "mesh_core.h":
 10 |     void _rasterize_triangles_core(
 11 |         float* vertices, int* triangles, 
 12 |         float* depth_buffer, int* triangle_buffer, float* barycentric_weight,
 13 |         int nver, int ntri,
 14 |         int h, int w)
 15 | 
 16 |     void _render_colors_core(
 17 |         unsigned char* image, unsigned char *face_mask, float* vertices, int* triangles, 
 18 |         float* colors, 
 19 |         float* depth_buffer,
 20 |         int ntri,
 21 |         int h, int w, int c)
 22 | 
 23 |     void _render_texture_core(
 24 |         float* image, float* vertices, int* triangles, 
 25 |         float* texture, float* tex_coords, int* tex_triangles, 
 26 |         float* depth_buffer,
 27 |         int nver, int tex_nver, int ntri, 
 28 |         int h, int w, int c, 
 29 |         int tex_h, int tex_w, int tex_c, 
 30 |         int mapping_type)
 31 | 
 32 |     void _get_normal_core(
 33 |         float* normal, float* tri_normal, int* triangles,
 34 |         int ntri)
 35 | 
 36 |     void _write_obj_with_colors_texture(string filename, string mtl_name, 
 37 |         float* vertices, int* triangles, float* colors, float* uv_coords,
 38 |         int nver, int ntri, int ntexver)
 39 | 
 40 | def get_normal_core(np.ndarray[float, ndim=2, mode = "c"] normal not None, 
 41 |                 np.ndarray[float, ndim=2, mode = "c"] tri_normal not None, 
 42 |                 np.ndarray[int, ndim=2, mode="c"] triangles not None, 
 43 |                 int ntri
 44 |                 ):
 45 |     _get_normal_core(
 46 |         <float*> np.PyArray_DATA(normal), <float*> np.PyArray_DATA(tri_normal), <int*> np.PyArray_DATA(triangles),  
 47 |         ntri)
 48 | 
 49 | def rasterize_triangles_core(
 50 |                 np.ndarray[float, ndim=2, mode = "c"] vertices not None, 
 51 |                 np.ndarray[int, ndim=2, mode="c"] triangles not None, 
 52 |                 np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
 53 |                 np.ndarray[int, ndim=2, mode = "c"] triangle_buffer not None,
 54 |                 np.ndarray[float, ndim=2, mode = "c"] barycentric_weight not None,
 55 |                 int nver, int ntri,
 56 |                 int h, int w
 57 |                 ):   
 58 |     _rasterize_triangles_core(
 59 |         <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),  
 60 |         <float*> np.PyArray_DATA(depth_buffer), <int*> np.PyArray_DATA(triangle_buffer), <float*> np.PyArray_DATA(barycentric_weight),
 61 |         nver, ntri,
 62 |         h, w)
 63 | 
 64 | def render_colors_core(np.ndarray[unsigned char, ndim=1, mode = "c"] image not None, 
 65 |                 np.ndarray[unsigned char, ndim=1, mode = "c"] face_mask not None, 
 66 |                 np.ndarray[float, ndim=1, mode = "c"] vertices not None, 
 67 |                 np.ndarray[int, ndim=1, mode="c"] triangles not None, 
 68 |                 np.ndarray[float, ndim=1, mode = "c"] colors not None, 
 69 |                 np.ndarray[float, ndim=1, mode = "c"] depth_buffer not None,
 70 |                 int ntri,
 71 |                 int h, int w, int c
 72 |                 ):   
 73 |     _render_colors_core(
 74 |         <unsigned char*> np.PyArray_DATA(image),  <unsigned char*> np.PyArray_DATA(face_mask), <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),  
 75 |         <float*> np.PyArray_DATA(colors), 
 76 |         <float*> np.PyArray_DATA(depth_buffer),
 77 |         ntri,
 78 |         h, w, c)
 79 | 
 80 | def render_texture_core(np.ndarray[float, ndim=3, mode = "c"] image not None, 
 81 |                 np.ndarray[float, ndim=2, mode = "c"] vertices not None, 
 82 |                 np.ndarray[int, ndim=2, mode="c"] triangles not None, 
 83 |                 np.ndarray[float, ndim=3, mode = "c"] texture not None, 
 84 |                 np.ndarray[float, ndim=2, mode = "c"] tex_coords not None, 
 85 |                 np.ndarray[int, ndim=2, mode="c"] tex_triangles not None, 
 86 |                 np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
 87 |                 int nver, int tex_nver, int ntri,
 88 |                 int h, int w, int c,
 89 |                 int tex_h, int tex_w, int tex_c,
 90 |                 int mapping_type
 91 |                 ):   
 92 |     _render_texture_core(
 93 |         <float*> np.PyArray_DATA(image), <float*> np.PyArray_DATA(vertices), <int*> np.PyArray_DATA(triangles),  
 94 |         <float*> np.PyArray_DATA(texture), <float*> np.PyArray_DATA(tex_coords), <int*> np.PyArray_DATA(tex_triangles),  
 95 |         <float*> np.PyArray_DATA(depth_buffer),
 96 |         nver, tex_nver, ntri,
 97 |         h, w, c, 
 98 |         tex_h, tex_w, tex_c, 
 99 |         mapping_type)
100 | 


--------------------------------------------------------------------------------
/utils/cython/setup.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | python setup.py build_ext -i
 3 | to compile
 4 | '''
 5 | 
 6 | # setup.py
 7 | from distutils.core import setup, Extension
 8 | from Cython.Build import cythonize
 9 | from Cython.Distutils import build_ext
10 | from distutils.sysconfig import get_python_lib
11 | import numpy
12 | 
13 | setup(
14 |     name='mesh_core_cython',
15 |     cmdclass={'build_ext': build_ext},
16 |     ext_modules=[Extension("mesh_core_cython",
17 |                            sources=["mesh_core_cython.pyx", "mesh_core.cpp"],
18 |                            language='c++',
19 |                            include_dirs=[get_python_lib(), numpy.get_include()])],
20 | )
21 | 


--------------------------------------------------------------------------------
/utils/reconstruct_mesh.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | # input: coeff with shape [1,257]
  5 | def Split_coeff(coeff):
  6 |   id_coeff = coeff[:, :80]  # identity(shape) coeff of dim 80
  7 |   ex_coeff = coeff[:, 80:144]  # expression coeff of dim 64
  8 |   tex_coeff = coeff[:, 144:224]  # texture(albedo) coeff of dim 80
  9 |   angles = coeff[:, 224:227]  # ruler angles(x,y,z) for rotation of dim 3
 10 |   gamma = coeff[:, 227:254]  # lighting coeff for 3 channel SH function of dim 27
 11 |   translation = coeff[:, 254:]  # translation coeff of dim 3
 12 | 
 13 |   return id_coeff, ex_coeff, tex_coeff, angles, gamma, translation
 14 | 
 15 | 
 16 | # compute face shape with identity and expression coeff, based on BFM model
 17 | # input: id_coeff with shape [1,80]
 18 | #		 ex_coeff with shape [1,64]
 19 | # output: face_shape with shape [1,N,3], N is number of vertices
 20 | def Shape_formation(id_coeff, ex_coeff, facemodel):
 21 |   face_shape = np.einsum('ij,aj->ai', facemodel.idBase, id_coeff) + \
 22 |                np.einsum('ij,aj->ai', facemodel.exBase, ex_coeff) + \
 23 |                facemodel.meanshape
 24 | 
 25 |   face_shape = np.reshape(face_shape, [1, -1, 3])
 26 |   # re-center face shape
 27 |   face_shape = face_shape - np.mean(np.reshape(facemodel.meanshape, [1, -1, 3]), axis=1, keepdims=True)
 28 | 
 29 |   return face_shape
 30 | 
 31 | 
 32 | # compute vertex normal using one-ring neighborhood
 33 | # input: face_shape with shape [1,N,3]
 34 | # output: v_norm with shape [1,N,3]
 35 | def Compute_norm(face_shape, facemodel):
 36 |   face_id = facemodel.tri  # vertex index for each triangle face, with shape [F,3], F is number of faces
 37 |   point_id = facemodel.point_buf  # adjacent face index for each vertex, with shape [N,8], N is number of vertex
 38 |   shape = face_shape
 39 |   face_id = (face_id - 1).astype(np.int32)
 40 |   point_id = (point_id - 1).astype(np.int32)
 41 |   v1 = shape[:, face_id[:, 0], :]
 42 |   v2 = shape[:, face_id[:, 1], :]
 43 |   v3 = shape[:, face_id[:, 2], :]
 44 |   e1 = v1 - v2
 45 |   e2 = v2 - v3
 46 |   face_norm = np.cross(e1, e2)  # compute normal for each face
 47 |   face_norm = np.concatenate([face_norm, np.zeros([1, 1, 3])],
 48 |                              axis=1)  # concat face_normal with a zero vector at the end
 49 |   v_norm = np.sum(face_norm[:, point_id, :], axis=2)  # compute vertex normal using one-ring neighborhood
 50 |   v_norm = v_norm / np.expand_dims(np.linalg.norm(v_norm, axis=2), 2)  # normalize normal vectors
 51 | 
 52 |   return v_norm
 53 | 
 54 | 
 55 | # compute vertex texture(albedo) with tex_coeff
 56 | # input: tex_coeff with shape [1,N,3]
 57 | # output: face_texture with shape [1,N,3], RGB order, range from 0-255
 58 | def Texture_formation(tex_coeff, facemodel):
 59 |   face_texture = np.einsum('ij,aj->ai', facemodel.texBase, tex_coeff) + facemodel.meantex
 60 |   face_texture = np.reshape(face_texture, [1, -1, 3])
 61 | 
 62 |   return face_texture
 63 | 
 64 | 
 65 | # compute rotation matrix based on 3 ruler angles
 66 | # input: angles with shape [1,3]
 67 | # output: rotation matrix with shape [1,3,3]
 68 | def Compute_rotation_matrix(angles):
 69 |   angle_x = angles[:, 0][0]
 70 |   angle_y = angles[:, 1][0]
 71 |   angle_z = angles[:, 2][0]
 72 | 
 73 |   # compute rotation matrix for X,Y,Z axis respectively
 74 |   rotation_X = np.array([1.0, 0, 0, \
 75 |                          0, np.cos(angle_x), -np.sin(angle_x), \
 76 |                          0, np.sin(angle_x), np.cos(angle_x)])
 77 |   rotation_Y = np.array([np.cos(angle_y), 0, np.sin(angle_y), \
 78 |                          0, 1, 0, \
 79 |                          -np.sin(angle_y), 0, np.cos(angle_y)])
 80 |   rotation_Z = np.array([np.cos(angle_z), -np.sin(angle_z), 0, \
 81 |                          np.sin(angle_z), np.cos(angle_z), 0, \
 82 |                          0, 0, 1])
 83 | 
 84 |   rotation_X = np.reshape(rotation_X, [1, 3, 3])
 85 |   rotation_Y = np.reshape(rotation_Y, [1, 3, 3])
 86 |   rotation_Z = np.reshape(rotation_Z, [1, 3, 3])
 87 | 
 88 |   rotation = np.matmul(np.matmul(rotation_Z, rotation_Y), rotation_X)
 89 |   rotation = np.transpose(rotation, axes=[0, 2, 1])  # transpose row and column (dimension 1 and 2)
 90 | 
 91 |   return rotation
 92 | 
 93 | 
 94 | # project 3D face onto image plane
 95 | # input: face_shape with shape [1,N,3]
 96 | # 		 rotation with shape [1,3,3]
 97 | #		 translation with shape [1,3]
 98 | # output: face_projection with shape [1,N,2]
 99 | # 		  z_buffer with shape [1,N,1]
100 | def Projection_layer(face_shape, rotation, translation, focal=1015.0,
101 |                      center=112.0):  # we choose the focal length and camera position empirically
102 | 
103 |   camera_pos = np.reshape(np.array([0.0, 0.0, 10.0]), [1, 1, 3])  # camera position
104 |   reverse_z = np.reshape(np.array([1.0, 0, 0, 0, 1, 0, 0, 0, -1.0]), [1, 3, 3])
105 | 
106 |   p_matrix = np.concatenate([[focal], [0.0], [center], [0.0], [focal], [center], [0.0], [0.0], [1.0]],
107 |                             axis=0)  # projection matrix
108 |   p_matrix = np.reshape(p_matrix, [1, 3, 3])
109 | 
110 |   # calculate face position in camera space
111 |   face_shape_r = np.matmul(face_shape, rotation)
112 |   face_shape_t = face_shape_r + np.reshape(translation, [1, 1, 3])
113 |   face_shape_t = np.matmul(face_shape_t, reverse_z) + camera_pos
114 | 
115 |   # calculate projection of face vertex using perspective projection
116 |   aug_projection = np.matmul(face_shape_t, np.transpose(p_matrix, [0, 2, 1]))
117 |   face_projection = aug_projection[:, :, 0:2] / np.reshape(aug_projection[:, :, 2], [1, np.shape(aug_projection)[1], 1])
118 |   z_buffer = -np.reshape(aug_projection[:, :, 2], [1, -1, 1])
119 | 
120 |   return face_projection, z_buffer
121 | 
122 | 
123 | # compute vertex color using face_texture and SH function lighting approximation
124 | # input: face_texture with shape [1,N,3]
125 | # 	     norm with shape [1,N,3]
126 | #		 gamma with shape [1,27]
127 | # output: face_color with shape [1,N,3], RGB order, range from 0-255
128 | #		  lighting with shape [1,N,3], color under uniform texture
129 | def Illumination_layer(face_texture, norm, gamma):
130 |   # gamma = np.zeros(gamma.shape, dtype=gamma.dtype)
131 |   num_vertex = np.shape(face_texture)[1]
132 | 
133 |   init_lit = np.array([0.8, 0, 0, 0, 0, 0, 0, 0, 0])
134 |   gamma = np.reshape(gamma, [-1, 3, 9])
135 |   gamma = gamma + np.reshape(init_lit, [1, 1, 9])
136 | 
137 |   # parameter of 9 SH function
138 |   a0 = np.pi
139 |   a1 = 2 * np.pi / np.sqrt(3.0)
140 |   a2 = 2 * np.pi / np.sqrt(8.0)
141 |   c0 = 1 / np.sqrt(4 * np.pi)
142 |   c1 = np.sqrt(3.0) / np.sqrt(4 * np.pi)
143 |   c2 = 3 * np.sqrt(5.0) / np.sqrt(12 * np.pi)
144 | 
145 |   Y0 = np.tile(np.reshape(a0 * c0, [1, 1, 1]), [1, num_vertex, 1])
146 |   Y1 = np.reshape(-a1 * c1 * norm[:, :, 1], [1, num_vertex, 1])
147 |   Y2 = np.reshape(a1 * c1 * norm[:, :, 2], [1, num_vertex, 1])
148 |   Y3 = np.reshape(-a1 * c1 * norm[:, :, 0], [1, num_vertex, 1])
149 |   Y4 = np.reshape(a2 * c2 * norm[:, :, 0] * norm[:, :, 1], [1, num_vertex, 1])
150 |   Y5 = np.reshape(-a2 * c2 * norm[:, :, 1] * norm[:, :, 2], [1, num_vertex, 1])
151 |   Y6 = np.reshape(a2 * c2 * 0.5 / np.sqrt(3.0) * (3 * np.square(norm[:, :, 2]) - 1), [1, num_vertex, 1])
152 |   Y7 = np.reshape(-a2 * c2 * norm[:, :, 0] * norm[:, :, 2], [1, num_vertex, 1])
153 |   Y8 = np.reshape(a2 * c2 * 0.5 * (np.square(norm[:, :, 0]) - np.square(norm[:, :, 1])), [1, num_vertex, 1])
154 | 
155 |   Y = np.concatenate([Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y8], axis=2)
156 | 
157 |   # Y shape:[batch,N,9].
158 | 
159 |   lit_r = np.squeeze(np.matmul(Y, np.expand_dims(gamma[:, 0, :], 2)), 2)  # [batch,N,9] * [batch,9,1] = [batch,N]
160 |   lit_g = np.squeeze(np.matmul(Y, np.expand_dims(gamma[:, 1, :], 2)), 2)
161 |   lit_b = np.squeeze(np.matmul(Y, np.expand_dims(gamma[:, 2, :], 2)), 2)
162 | 
163 |   # shape:[batch,N,3]
164 |   face_color = np.stack([lit_r * face_texture[:, :, 0], lit_g * face_texture[:, :, 1], lit_b * face_texture[:, :, 2]],
165 |                         axis=2)
166 |   lighting = np.stack([lit_r, lit_g, lit_b], axis=2) * 128
167 | 
168 |   return face_color, lighting
169 | 
170 | 
171 | # face reconstruction with coeff and BFM model
172 | def Reconstruction(coeff, facemodel):
173 |   id_coeff, ex_coeff, tex_coeff, angles, gamma, translation = Split_coeff(coeff)
174 |   # compute face shape
175 |   face_shape = Shape_formation(id_coeff, ex_coeff, facemodel)
176 |   # compute vertex texture(albedo)
177 |   face_texture = Texture_formation(tex_coeff, facemodel)
178 |   # vertex normal
179 |   face_norm = Compute_norm(face_shape, facemodel)
180 |   # rotation matrix
181 |   rotation = Compute_rotation_matrix(angles)
182 |   face_norm_r = np.matmul(face_norm, rotation)
183 | 
184 |   # compute vertex projection on image plane (with image sized 224*224)
185 |   face_projection, z_buffer = Projection_layer(face_shape, rotation, translation)
186 |   face_projection = np.stack([face_projection[:, :, 0], 224 - face_projection[:, :, 1]], axis=2)
187 | 
188 |   # compute 68 landmark on image plane
189 |   landmarks_2d = face_projection[:, facemodel.keypoints, :]
190 | 
191 |   # compute vertex color using SH function lighting approximation
192 |   face_color, lighting = Illumination_layer(face_texture, face_norm_r, gamma)
193 | 
194 |   return face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, translation
195 | 
196 | 
197 | # face reconstruction with coeff and BFM model
198 | def Reconstruction_rotation(coeff, facemodel, angles):
199 |   id_coeff, ex_coeff, tex_coeff, _, gamma, translation = Split_coeff(coeff)
200 |   # compute face shape
201 |   face_shape = Shape_formation(id_coeff, ex_coeff, facemodel)
202 |   # compute vertex texture(albedo)
203 |   face_texture = Texture_formation(tex_coeff, facemodel)
204 |   # vertex normal
205 |   face_norm = Compute_norm(face_shape, facemodel)
206 |   # rotation matrix
207 |   rotation = Compute_rotation_matrix(angles)
208 |   face_norm_r = np.matmul(face_norm, rotation)
209 | 
210 |   # rotation matrix
211 |   face_shape = np.matmul(face_shape, rotation)
212 | 
213 |   # compute vertex projection on image plane (with image sized 224*224)
214 |   face_projection, z_buffer = Projection_layer(face_shape, rotation, translation)
215 |   face_projection = np.stack([face_projection[:, :, 0], 224 - face_projection[:, :, 1]], axis=2)
216 | 
217 |   # compute 68 landmark on image plane
218 |   landmarks_2d = face_projection[:, facemodel.keypoints, :]
219 | 
220 |   # compute vertex color using SH function lighting approximation
221 |   face_color, lighting = Illumination_layer(face_texture, face_norm_r, gamma)
222 | 
223 |   return face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d
224 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import cv2
  3 | import numpy as np
  4 | import os
  5 | import math
  6 | 
  7 | alignment_handler = None
  8 | dlib_detector = None
  9 | 
 10 | def mkdir(dirname):
 11 |   if(not os.path.isdir(dirname)):
 12 |     os.makedirs(dirname)
 13 | 
 14 | class MXDetectorHandler:
 15 |   '''
 16 |   face 2D landmark alignment by mxnet, refer to https://github.com/deepinx/deep-face-alignment
 17 |   '''
 18 |   def __init__(self, prefix, epoch, mx, name='model'):
 19 |     ctx_id = int(os.environ["CUDA_VISIBLE_DEVICES"])
 20 |     if (ctx_id >= 0):
 21 |       ctx = mx.gpu(ctx_id)
 22 |     else:
 23 |       ctx = mx.cpu()
 24 | 
 25 |     sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(prefix, name), epoch)
 26 |     all_layers = sym.get_internals()
 27 |     sym = all_layers['heatmap_output']
 28 |     image_size = (128, 128)
 29 |     self.image_size = image_size
 30 |     model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
 31 |     model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
 32 |     model.set_params(arg_params, aux_params)
 33 |     self.model = model
 34 | 
 35 | 
 36 | def get_mxnet_sat_alignment(model_dir, image):
 37 |   '''
 38 |   Arguments:
 39 |     model_dir: The folder contains mxnet pretrained model.
 40 |     image: The image contains at least 1 face inside, we only detect the first face.
 41 |   Returns:
 42 |     image: The image input.
 43 |     img_landmarks: The 68 landmarks' coordinates in image.
 44 |     img: The face area expand by sat alignment, resize to out_img_size=224.
 45 |     lmk_cropped: The 68 landmarks' coordinates in img.
 46 |     center_x: the x position of the face center in image.
 47 |     center_y: the y position of the face center in image.
 48 |     ratio: The return image size / original face area size(before resize).
 49 |   '''
 50 |   global alignment_handler, dlib_detector
 51 | 
 52 |   if (alignment_handler is None):
 53 |     alignment_handler = MXDetectorHandler(prefix=model_dir, epoch=0, mx=mx, name='model-sat')
 54 | 
 55 |   import dlib
 56 |   if (dlib_detector is None):
 57 |     dlib_detector = dlib.get_frontal_face_detector()
 58 | 
 59 |   def crop_expand_dlib(image, rect, ratio=1.5):
 60 |     ## rect: [left, right, top, bottom]
 61 |     mean = [(rect[2] + rect[3]) / 2.0, (rect[0] + rect[1]) / 2.0]
 62 |     ## mean: [y, x]
 63 |     width = rect[1] - rect[0]
 64 |     height = rect[3] - rect[2]
 65 | 
 66 |     max_ratio = min([(image.shape[0] - mean[0])/(height/2), (image.shape[1] - mean[1])/(width/2), mean[0]/(height/2), mean[1]/(width/2)])
 67 |     if(max_ratio<ratio):
 68 |       ratio = max_ratio
 69 | 
 70 |     width = int((rect[1] - rect[0]) * ratio)
 71 |     height = int((rect[3] - rect[2]) * ratio)
 72 | 
 73 |     left = int(math.ceil(mean[1] - width // 2))
 74 |     top = int(math.ceil(mean[0] - height // 2))
 75 | 
 76 |     return image, [left, left + width, top, top + height]
 77 | 
 78 |   def crop_expand_alignment(img, xys, out_img_size=224, ratio=1.3):
 79 |     xys = xys.copy()
 80 |     max_x = max(xys[::2])
 81 |     max_y = max(xys[1::2])
 82 |     min_x = min(xys[::2])
 83 |     min_y = min(xys[1::2])
 84 | 
 85 |     center_x = int(round((max_x + min_x) / 2))
 86 |     center_y = int(round((max_y + min_y) / 2))
 87 |     width = max_x - min_x
 88 |     height = max_y - min_y
 89 |     height = width
 90 | 
 91 |     max_ratio = min([(img.shape[0] - center_y)/(height/2), (img.shape[1] - center_x)/(width/2), center_y/(height/2), center_x/(width/2)])
 92 |     if(max_ratio<ratio):
 93 |       ratio = max_ratio
 94 | 
 95 |     width = int((max_x - min_x) * ratio)
 96 |     height = int((max_y - min_y) * ratio)
 97 |     height = width
 98 | 
 99 |     left = int(round(center_x - width / 2))
100 |     top = int(round(center_y - height / 2))
101 |     img = img[top:top + height, left:left + width]
102 | 
103 |     xys[::2] -= left
104 |     xys[1::2] -= top
105 |     xys[::2] = xys[::2] * out_img_size / width
106 |     xys[1::2] = xys[1::2] * out_img_size / height
107 | 
108 |     img = cv2.resize(img, (out_img_size, out_img_size))
109 | 
110 |     return img, xys, center_x, center_y, float(out_img_size) / width
111 | 
112 |   img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
113 |   rects = dlib_detector(img_gray, 0)
114 |   if (len(rects) != 1):
115 |     return
116 | 
117 |   rect = [rects[0].left(), rects[0].right(), rects[0].top(), rects[0].bottom()]
118 |   image, rect = crop_expand_dlib(image, rect)  # dlib region is too small
119 |   ## rect: [left, right, top, bottom]
120 |   img = cv2.cvtColor(image[rect[2]:rect[3], rect[0]:rect[1]], cv2.COLOR_BGR2RGB)
121 |   crop_width = img.shape[1]
122 |   crop_height = img.shape[0]
123 | 
124 |   img = cv2.resize(img, (128, 128))
125 |   img = np.transpose(img, (2, 0, 1))  # 3*128*128, RGB
126 |   input_blob = np.zeros((1, 3, 128, 128), dtype=np.uint8)
127 |   input_blob[0] = img
128 |   data = mx.nd.array(input_blob)
129 |   db = mx.io.DataBatch(data=(data,))
130 |   alignment_handler.model.forward(db, is_train=False)
131 |   alabel = alignment_handler.model.get_outputs()[-1].asnumpy()[0]
132 | 
133 |   img_landmarks = []
134 |   for j in range(alabel.shape[0]):
135 |     a = cv2.resize(alabel[j], (128, 128))
136 |     ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
137 |     ## ind: [y, x]
138 | 
139 |     origin_x = rect[0] + ind[1] * crop_width / 128.0
140 |     origin_y = rect[2] + ind[0] * crop_height / 128.0
141 | 
142 |     img_landmarks.append(origin_x)
143 |     img_landmarks.append(origin_y)
144 | 
145 |   img_landmarks = np.array(img_landmarks)
146 |   img, lmk_cropped, center_x, center_y, ratio = crop_expand_alignment(image, img_landmarks)
147 |   return image, img_landmarks, img, lmk_cropped, center_x, center_y, ratio
148 | 


--------------------------------------------------------------------------------
/voicepuppet/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "builder",
10 |     srcs = ["builder.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "backbone",
10 |     srcs = ["backbone.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 
15 | py_library(
16 |     name = "tinynet",
17 |     srcs = ["tinynet.py"],
18 |     deps = [
19 |     ],
20 | )
21 | 
22 | py_library(
23 |     name = "plot",
24 |     srcs = ["plot.py"],
25 |     deps = [
26 |     ],
27 | )
28 | 
29 | py_library(
30 |     name = "atnet",
31 |     srcs = ["atnet.py"],
32 |     deps = [
33 |         "//config:configure",
34 |         ":tinynet",
35 |         "//voicepuppet:builder"
36 |     ],
37 | )
38 | 
39 | py_library(
40 |     name = "vgnet",
41 |     srcs = ["vgnet.py"],
42 |     deps = [
43 |         "//config:configure",
44 |         "//voicepuppet:builder"
45 |     ],
46 | )
47 | 
48 | py_binary(
49 |     name = "train_atnet",
50 |     srcs = ["train_atnet.py"],
51 |     deps = [
52 |       ":atnet",
53 |       ":plot",
54 |       "//generator:generator"
55 |     ],
56 | )
57 | 
58 | py_binary(
59 |     name = "train_vgnet",
60 |     srcs = ["train_vgnet.py"],
61 |     deps = [
62 |       ":vgnet",
63 |       ":plot",
64 |       "//generator:generator"
65 |     ],
66 | )
67 | 
68 | py_binary(
69 |     name = "test_atnet",
70 |     srcs = ["test_atnet.py"],
71 |     deps = [
72 |       ":atnet",
73 |       ":tinynet"
74 |     ],
75 | )
76 | 
77 | py_binary(
78 |     name = "test_vgnet",
79 |     srcs = ["test_vgnet.py"],
80 |     deps = [
81 |       ":vgnet",
82 |       ":atnet",
83 |     ],
84 | )
85 | 
86 | py_binary(
87 |     name = "infer",
88 |     srcs = ["infer.py"],
89 |     deps = [
90 |       ":vgnet",
91 |       ":tinynet",
92 |       "//config:configure",
93 |       "//generator:loader",
94 |       ":plot"
95 |     ],
96 | )


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 
5 | from atvgnet import *
6 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/backbone.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | 
  4 | 
  5 | def batch_norm(x, axis=-1, trainable=True):
  6 |   bn_op = tf.keras.layers.BatchNormalization(axis=axis, name='bn')
  7 |   x = bn_op(x, training=trainable)
  8 |   if (trainable):
  9 |     for operation in bn_op.updates:
 10 |       tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, operation)
 11 |   return x
 12 | 
 13 | 
 14 | class ThinResnet():
 15 | 
 16 |   def __init__(self, output_channels):
 17 |     self.weight_decay = 1e-4
 18 |     self.kernel_regularizer = tf.contrib.layers.l2_regularizer(self.weight_decay)
 19 |     self.output_channels = output_channels
 20 |     self.nodes = {}
 21 | 
 22 |   def identity_block_2D(self, input_tensor, kernel_sizes, filters, stage, block, trainable=True):
 23 |     """The identity block is the block that has no conv layer at shortcut.
 24 |     # Arguments
 25 |         input_tensor: input tensor
 26 |         kernel_size: list of the kernel size of 3 conv layer at main path
 27 |         filters: list of integers, the filters of 3 conv layer at main path
 28 |         stage: integer, current stage label, used for generating layer names
 29 |         block: 'a','b'..., current block label, used for generating layer names
 30 |     # Returns
 31 |         Output tensor for the block.
 32 |     """
 33 |     bn_axis = 3
 34 |     x = input_tensor
 35 | 
 36 |     for i, flt in enumerate(filters):
 37 |       conv_name = 'conv' + str(stage) + '_' + str(block) + '_' + str(i)
 38 |       with tf.variable_scope(conv_name) as scope:
 39 |         x = tf.layers.conv2d(x, flt, kernel_sizes[i],
 40 |                              padding='same',
 41 |                              kernel_initializer=tf.orthogonal_initializer(),
 42 |                              use_bias=False,
 43 |                              trainable=trainable,
 44 |                              kernel_regularizer=self.kernel_regularizer)
 45 | 
 46 |         x = batch_norm(x, axis=bn_axis, trainable=trainable)
 47 |         x = tf.nn.relu(x)
 48 | 
 49 |     with tf.variable_scope('add'):
 50 |       x = x + input_tensor
 51 |       x = tf.nn.relu(x)
 52 |     return x
 53 | 
 54 |   def conv_block_2D(self, input_tensor, kernel_sizes, filters, stage, block, strides, trainable=True):
 55 |     """A block that has a conv layer at shortcut.
 56 |     # Arguments
 57 |         input_tensor: input tensor
 58 |         kernel_sizes: list of the kernel size of 3 conv layer at main path
 59 |         filters: list of integers, the filterss of 3 conv layer at main path
 60 |         stage: integer, current stage label, used for generating layer names
 61 |         block: 'a','b'..., current block label, used for generating layer names
 62 |         strides: list of the stride size of 3 conv layer at main path
 63 |     # Returns
 64 |         Output tensor for the block.
 65 |     """
 66 |     bn_axis = 3
 67 |     x = input_tensor
 68 | 
 69 |     for i, flt in enumerate(filters):
 70 |       conv_name = 'conv' + str(stage) + '_' + str(block) + '_' + str(i)
 71 |       with tf.variable_scope(conv_name) as scope:
 72 |         x = tf.layers.conv2d(x, flt, kernel_sizes[i], padding='same',
 73 |                              strides=strides[i],
 74 |                              kernel_initializer=tf.orthogonal_initializer(),
 75 |                              use_bias=False,
 76 |                              trainable=trainable,
 77 |                              kernel_regularizer=self.kernel_regularizer)
 78 | 
 79 |         x = batch_norm(x, axis=bn_axis, trainable=trainable)
 80 |         x = tf.nn.relu(x)
 81 | 
 82 |     conv_name = 'conv' + str(stage) + '_' + str(block) + '_1x1_proj'
 83 |     with tf.variable_scope(conv_name) as scope:
 84 |       shortcut = tf.layers.conv2d(input_tensor, filters[-1], kernel_sizes[-1], padding='same',
 85 |                                   strides=strides[1],
 86 |                                   kernel_initializer=tf.orthogonal_initializer(),
 87 |                                   use_bias=False,
 88 |                                   trainable=trainable,
 89 |                                   kernel_regularizer=self.kernel_regularizer)
 90 | 
 91 |       shortcut = batch_norm(shortcut, axis=bn_axis, trainable=trainable)
 92 | 
 93 |     with tf.variable_scope('add'):
 94 |       x = x + shortcut
 95 |       x = tf.nn.relu(x)
 96 |     return x
 97 | 
 98 |   def resnet_2D_v1(self, inputs, trainable=True):
 99 |     bn_axis = 3
100 | 
101 |     # ===============================================
102 |     #            Convolution Block 1
103 |     # ===============================================
104 |     with tf.variable_scope('conv1_1'):
105 |       x1 = tf.layers.conv2d(inputs, 64, [7, 7],
106 |                             kernel_initializer=tf.orthogonal_initializer(),
107 |                             use_bias=False,
108 |                             trainable=trainable,
109 |                             kernel_regularizer=self.kernel_regularizer,
110 |                             padding='same')
111 | 
112 |       x1 = batch_norm(x1, axis=bn_axis, trainable=trainable)
113 | 
114 |     x1 = tf.nn.relu(x1)
115 |     x1 = tf.layers.max_pooling2d(x1, [4, 1], [4, 1], padding='same')
116 |     self.nodes['ThinResnet_Block1'] = x1
117 | 
118 |     # ===============================================
119 |     #            Convolution Section 2
120 |     # ===============================================
121 |     kernel_sizes = [[1, 1], [3, 3], [1, 1]]
122 |     filters = [48, 48, 96]
123 |     strides = [[1, 1], [2, 1], [1, 1]]
124 |     x2 = self.conv_block_2D(x1, kernel_sizes, filters, stage=2, block='a', strides=strides, trainable=trainable)
125 |     x2 = self.identity_block_2D(x2, kernel_sizes, filters, stage=2, block='b', trainable=trainable)
126 |     self.nodes['ThinResnet_Block2'] = x2
127 | 
128 |     # ===============================================
129 |     #            Convolution Section 3
130 |     # ===============================================
131 |     kernel_sizes = [[1, 1], [3, 3], [1, 1]]
132 |     filters = [96, 96, 128]
133 |     strides = [[1, 1], [2, 1], [1, 1]]
134 |     x3 = self.conv_block_2D(x2, kernel_sizes, filters, stage=3, block='a', strides=strides, trainable=trainable)
135 |     x3 = self.identity_block_2D(x3, kernel_sizes, filters, stage=3, block='b', trainable=trainable)
136 |     x3 = self.identity_block_2D(x3, kernel_sizes, filters, stage=3, block='c', trainable=trainable)
137 |     self.nodes['ThinResnet_Block3'] = x3
138 | 
139 |     # ===============================================
140 |     #            Convolution Section 4
141 |     # ===============================================
142 |     kernel_sizes = [[1, 1], [3, 3], [1, 1]]
143 |     filters = [128, 128, 128]
144 |     strides = [[1, 1], [2, 2], [1, 1]]
145 |     x4 = self.conv_block_2D(x3, kernel_sizes, filters, stage=4, block='a', strides=strides, trainable=trainable)
146 |     x4 = self.conv_block_2D(x4, kernel_sizes, filters, stage=4, block='b', strides=strides, trainable=trainable)
147 |     x4 = self.identity_block_2D(x4, kernel_sizes, filters, stage=4, block='c', trainable=trainable)
148 |     self.nodes['ThinResnet_Block4'] = x4
149 | 
150 |     # ===============================================
151 |     #            Convolution Section 5
152 |     # ===============================================
153 |     kernel_sizes = [[1, 1], [3, 3], [1, 1]]
154 |     filters = [128, 128, self.output_channels]
155 |     strides = [[1, 1], [2, 1], [1, 1]]
156 |     x5 = self.conv_block_2D(x4, kernel_sizes, filters, stage=5, block='a', strides=strides, trainable=trainable)
157 |     x5 = self.identity_block_2D(x5, kernel_sizes, filters, stage=5, block='b', trainable=trainable)
158 |     x5 = self.identity_block_2D(x5, kernel_sizes, filters, stage=5, block='c', trainable=trainable)
159 |     y = tf.reduce_mean(x5, [1])
160 |     self.nodes['ThinResnet_Output'] = y
161 |     return y
162 | 
163 |   def __call__(self, inputs, trainable=True):
164 |     return self.resnet_2D_v1(inputs, trainable)
165 | 
166 | 
167 | if (__name__ == '__main__'):
168 |   resnet = ThinResnet(512)
169 |   inputs = tf.placeholder(shape=[None, 417, 256], dtype=tf.float32)
170 |   mfccs = tf.expand_dims(inputs, -1)
171 |   mfccs = tf.transpose(mfccs, [0, 2, 1, 3])
172 |   x = resnet.resnet_2D_v1(mfccs)
173 |   y = x.get_shape().as_list()
174 |   print(y)
175 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/infer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import sys
  9 | 
 10 | from atnet import ATNet
 11 | from vgnet import VGNet
 12 | from dataset.loader import *
 13 | from config.configure import YParams
 14 | from plot import *
 15 | 
 16 | 
 17 | alignment_handler = None
 18 | MXDetectorHandler_prefix = '/Users/donglu/workspace/deep-face-alignment/models/model-sat2d3-cab'
 19 | wav_file = '/Users/donglu/Downloads/cctv_cut.wav'
 20 | img_path = '/Users/donglu/Desktop/kanghui.jpg'
 21 | 
 22 | def extract_mfcc(pcm, params):
 23 |   # A Tensor of [batch_size, num_samples] mono PCM samples in the range [-1, 1].
 24 |   pcm = tf.convert_to_tensor(pcm, dtype=tf.float32)
 25 |   stfts = tf.signal.stft(pcm, frame_length=params.mel['win_length'], frame_step=params.mel['hop_step'], fft_length=params.mel['fft_length'])
 26 |   spectrograms = tf.abs(stfts)
 27 | 
 28 |   # Warp the linear scale spectrograms into the mel-scale.
 29 |   num_spectrogram_bins = stfts.shape[-1].value
 30 |   lower_edge_hertz, upper_edge_hertz = 80.0, 7600.0
 31 |   linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(params.mel['num_mel_bins'],
 32 |                                                                       num_spectrogram_bins,
 33 |                                                                       params.mel['sample_rate'],
 34 |                                                                       lower_edge_hertz,
 35 |                                                                       upper_edge_hertz)
 36 |   mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, axes=[[2], [0]])
 37 |   mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))
 38 | 
 39 |   # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
 40 |   log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
 41 | 
 42 |   return log_mel_spectrograms
 43 | 
 44 | class MXDetectorHandler:
 45 |   def __init__(self, prefix, epoch, ctx_id, mx):
 46 |     if ctx_id>=0:
 47 |       ctx = mx.gpu(ctx_id)
 48 |     else:
 49 |       ctx = mx.cpu()
 50 |     sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(prefix, "model"), epoch)
 51 |     all_layers = sym.get_internals()
 52 |     sym = all_layers['heatmap_output']
 53 |     image_size = (128, 128)
 54 |     self.image_size = image_size
 55 |     model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
 56 |     model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
 57 |     model.set_params(arg_params, aux_params)
 58 |     self.model = model
 59 | 
 60 | 
 61 | def face_alignment(image):
 62 |   import mxnet as mx
 63 |   global alignment_handler
 64 |   global MXDetectorHandler_prefix
 65 |   if(alignment_handler is None):
 66 |     alignment_handler = MXDetectorHandler(prefix=MXDetectorHandler_prefix, epoch=0, ctx_id=-1, mx=mx)
 67 | 
 68 |   import dlib
 69 |   dlib_detector = dlib.get_frontal_face_detector()
 70 | 
 71 |   def crop_expand_dlib(image, rect, ratio=1.5):
 72 |     ## rect: [left, right, top, bottom]
 73 |     mean = [(rect[2] + rect[3]) // 2, (rect[0] + rect[1]) // 2]
 74 |     ## mean: [y, x]
 75 |     half_crop_size = int((rect[1] + rect[3] - rect[0] - rect[2]) * ratio // 4)
 76 | 
 77 |     # padding if the crop area outside of image.
 78 |     if (mean[0] - half_crop_size < 0):
 79 |       image = cv2.copyMakeBorder(image, 0, 0, half_crop_size - mean[0], 0, cv2.BORDER_CONSTANT, 0)
 80 |     if (mean[0] + half_crop_size > image.shape[1]):
 81 |       image = cv2.copyMakeBorder(image, 0, 0, 0, mean[0] + half_crop_size - image.shape[1], cv2.BORDER_CONSTANT, 0)
 82 |     if (mean[1] - half_crop_size < 0):
 83 |       image = cv2.copyMakeBorder(image, half_crop_size - mean[1], 0, 0, 0, cv2.BORDER_CONSTANT, 0)
 84 |     if (mean[1] + half_crop_size > image.shape[0]):
 85 |       image = cv2.copyMakeBorder(image, 0, mean[1] + half_crop_size - image.shape[0], 0, 0, cv2.BORDER_CONSTANT, 0)
 86 | 
 87 |     left = mean[1] - half_crop_size
 88 |     right = mean[1] + half_crop_size
 89 |     top = mean[0] - half_crop_size
 90 |     buttom = mean[0] + half_crop_size
 91 | 
 92 |     if (left < 0):
 93 |       left = 0
 94 |     if (top < 0):
 95 |       top = 0
 96 | 
 97 |     return image, [left, right, top, buttom]
 98 | 
 99 |   def crop_expand_alignment(img, xys, out_img_size=224, ratio=1.3):
100 |     xys = np.array(map(lambda x: int(x), xys))
101 |     max_x = max(xys[::2])
102 |     max_y = max(xys[1::2])
103 |     min_x = min(xys[::2])
104 |     min_y = min(xys[1::2])
105 |     width = int((max_x - min_x) * ratio)
106 |     height = int((max_y - min_y) * ratio)
107 |     height = width
108 | 
109 |     center_x = (max_x + min_x) // 2
110 |     center_y = (max_y + min_y) // 2
111 | 
112 |     left = center_x - width / 2
113 |     top = center_y - height / 2
114 |     img = img[top:top + height, left:left + width]
115 | 
116 |     xys[::2] -= left
117 |     xys[1::2] -= top
118 |     xys[::2] = xys[::2] * out_img_size / width
119 |     xys[1::2] = xys[1::2] * out_img_size / height
120 | 
121 |     img = cv2.resize(img, (out_img_size, out_img_size))
122 |     xys = np.array(list(map(lambda x: float(x)/out_img_size, xys)))
123 | 
124 |     return img, xys
125 | 
126 |   img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
127 |   rects = dlib_detector(img_gray, 0)
128 |   if (len(rects) != 1):
129 |     return None
130 | 
131 |   rect = [rects[0].left(), rects[0].right(), rects[0].top(), rects[0].bottom()]
132 |   image, rect = crop_expand_dlib(image, rect)  # dlib region is too small
133 |   ## rect: [left, right, top, bottom]
134 | 
135 |   img = cv2.cvtColor(image[rect[2]:rect[3], rect[0]:rect[1]], cv2.COLOR_BGR2RGB)
136 |   crop_width = img.shape[1]
137 |   crop_height = img.shape[0]
138 | 
139 |   img = cv2.resize(img, (128, 128))
140 |   img = np.transpose(img, (2, 0, 1))  # 3*128*128, RGB
141 |   input_blob = np.zeros((1, 3, 128, 128), dtype=np.uint8)
142 |   input_blob[0] = img
143 |   data = mx.nd.array(input_blob)
144 |   db = mx.io.DataBatch(data=(data,))
145 |   alignment_handler.model.forward(db, is_train=False)
146 |   alabel = alignment_handler.model.get_outputs()[-1].asnumpy()[0]
147 | 
148 |   img_landmarks = []
149 |   for j in xrange(alabel.shape[0]):
150 |     a = cv2.resize(alabel[j], (128, 128))
151 |     ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
152 |     ## ind: [y, x]
153 | 
154 |     origin_x = rect[0] + ind[1] * crop_width / 128
155 |     origin_y = rect[2] + ind[0] * crop_height / 128
156 | 
157 |     img_landmarks.append(str(origin_x))
158 |     img_landmarks.append(str(origin_y))
159 | 
160 |   image, img_landmarks = crop_expand_alignment(image, img_landmarks)
161 |   return image, img_landmarks
162 | 
163 | def test_atnet(config_path):
164 |   global wav_file
165 |   global img_path
166 |   img = cv2.imread(img_path)
167 |   example_img, example_lmk = face_alignment(img)
168 | 
169 |   params = YParams(config_path, 'default')
170 |   sample_rate = params.mel['sample_rate']
171 |   hop_step = params.mel['hop_step']
172 |   win_length = params.mel['win_length']
173 |   frame_rate = params.frame_rate
174 |   mean = np.load(params.mean_file)
175 |   component = np.load(params.components_file)
176 | 
177 |   example_lmk = np.dot((example_lmk - mean), component[:,:20])
178 |   example_lmk *= np.array([1.5, 1.0, 1.0, 1.0, 1.0, 2.0,  1.0,2.0,1.0,1.0, 1,1,1,1,1, 1,1,1,1,1])
179 |   example_lmk = np.dot(example_lmk, component[:,:20].T)
180 | 
181 |   wav_loader = WavLoader(sr=sample_rate)
182 | 
183 |   pose = np.ones([1000,3], dtype=np.float32)*0.0
184 |   ear = np.ones([1000,1], dtype=np.float32)*0.6
185 |   ear[40:75,:] = np.ones([35,1], dtype=np.float32)*0.2
186 | 
187 |   pcm = wav_loader.get_data(wav_file)
188 | 
189 |   frame_wav_scale = sample_rate / frame_rate
190 |   frame_mfcc_scale = frame_wav_scale / hop_step
191 | 
192 |   assert (frame_mfcc_scale - int(frame_mfcc_scale) == 0), "sample_rate/hop_step must divided by frame_rate."
193 | 
194 |   frame_mfcc_scale = int(frame_mfcc_scale)
195 |   min_len = min(ear.shape[0], pose.shape[0], pcm.shape[0]//frame_wav_scale)
196 | 
197 |   g1 = tf.Graph()
198 |   with g1.as_default():
199 | 
200 |     ear = tf.convert_to_tensor(ear[np.newaxis, :min_len, :], dtype=tf.float32)
201 |     pose = tf.convert_to_tensor(pose[np.newaxis, :min_len, :], dtype=tf.float32)
202 |     seq_len = tf.convert_to_tensor(np.array([min_len]), dtype=tf.int32)
203 |     example_landmark = tf.convert_to_tensor(example_lmk[np.newaxis, :], dtype=tf.float32)
204 | 
205 |     pcm_length = hop_step * (min_len * frame_mfcc_scale - 1) + win_length
206 |     if (pcm.shape[0] < pcm_length):
207 |       pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
208 |     elif(pcm.shape[0] > pcm_length):
209 |       pcm = pcm[:pcm_length]
210 |     mfcc = extract_mfcc(pcm[np.newaxis, :], params)
211 | 
212 |     atnet = ATNet(config_path)
213 |     params = atnet.params
214 |     params.batch_size = 1
215 |     atnet.set_params(params)
216 | 
217 |     infer_nodes = atnet.build_inference_op(ear, pose, mfcc, example_landmark, seq_len)
218 | 
219 |     sess = tf.Session()
220 |     sess.run(tf.global_variables_initializer())
221 |     tf.train.Saver().restore(sess, 'ckpt_atnet/atnet-80000')
222 |     lmk_seq = sess.run(infer_nodes['LandmarkDecoder'])
223 |     save_lmkseq_video(lmk_seq, mean, "atnet.avi", wav_file)
224 | 
225 |   return example_img, example_lmk, lmk_seq
226 | 
227 | def test_vgnet(config_path, example_img, example_landmark, lmk_seq):
228 |   example_img = cv2.resize(example_img, (128, 128)).astype(np.float32)[np.newaxis, ...]
229 |   example_img /= 256.0
230 |   example_img = (example_img - 0.5) / 0.5
231 | 
232 |   params = YParams(config_path, 'default')
233 | 
234 |   g2 = tf.Graph()
235 |   with g2.as_default():
236 |     example_landmark = tf.convert_to_tensor(example_landmark[np.newaxis, :], dtype=tf.float32)
237 |     example_img = tf.convert_to_tensor(example_img, dtype=tf.float32)
238 |     seq_len = tf.convert_to_tensor(np.array([lmk_seq.shape[1]]), dtype=tf.int32)
239 |     lmk_seq = tf.convert_to_tensor((lmk_seq), dtype=tf.float32)
240 | 
241 |     vgnet = VGNet(config_path)
242 |     params = vgnet.params
243 |     params.batch_size = 1
244 |     vgnet.set_params(params)
245 | 
246 |     infer_nodes = vgnet.build_inference_op(lmk_seq, example_landmark, example_img, seq_len)
247 | 
248 |     sess = tf.Session(graph=g2)
249 |     sess.run(tf.global_variables_initializer())
250 |     tf.train.Saver().restore(sess, 'ckpt_vgnet/vgnet-70000')
251 |     img_seq = sess.run(infer_nodes['Fake_img_seq'])
252 | 
253 |   save_imgseq_video(img_seq, "vgnet.mp4", wav_file)
254 | 
255 | 
256 | if (__name__ == '__main__'):
257 | 
258 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
259 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
260 |                         help='the config yaml file')
261 | 
262 |   opts, argv = cmd_parser.parse_args()
263 | 
264 |   if (opts.config_path is None):
265 |     logger.error('Please check your parameters.')
266 |     exit(0)
267 | 
268 |   config_path = opts.config_path
269 | 
270 |   if (not os.path.exists(config_path)):
271 |     logger.error('config_path not exists')
272 |     exit(0)
273 | 
274 |   example_img, example_landmark, lmk_seq = test_atnet(config_path)
275 |   test_vgnet(config_path, example_img, example_landmark, lmk_seq)
276 | 
277 | 
278 |   # lmk_seq = []
279 |   # example_image = None
280 |   # example_landmark = None
281 |   # params = YParams(config_path, 'default')
282 |   # mean = np.load(params.mean_file)
283 |   # component = np.load(params.components_file)
284 | 
285 |   # wav_file = '/Users/donglu/Downloads/cctv_cut.wav'
286 |   # cap = cv2.VideoCapture('/Users/donglu/Downloads/cctv_cut.mp4')
287 |   # if (cap.isOpened()):
288 |   #   success, image = cap.read()
289 |   #   idx = 0
290 |   #   while (success):
291 |   #     idx += 1
292 |   #     if(idx==100):
293 |   #       break
294 |   #     [h, w, c] = image.shape
295 |   #     if c > 3:
296 |   #       image = image[:, :, :3]
297 |   #     example_img, example_lmk = face_alignment(image)
298 |   #     example_lmk = np.dot((example_lmk - mean), component[:,:20])
299 |   #     example_lmk *= np.array([1.5, 1.0, 1.0, 1.0, 1.0, 2.0,  1.0,2.0,1.0,1.0, 1,1,1,1,1, 1,1,1,1,1])
300 |   #     example_lmk = np.dot(example_lmk, component[:,:20].T)
301 |   #     if(example_image is None):
302 |   #       example_image = example_img
303 |   #     if(example_landmark is None):
304 |   #       example_landmark = example_lmk
305 |   #     lmk_seq.append(example_lmk)
306 | 
307 |   #     success, image = cap.read()
308 |   # cap.release()
309 |   # lmk_seq = np.array(lmk_seq)[np.newaxis,...]
310 |   # save_lmkseq_video(lmk_seq, mean, "atnet.avi", wav_file)
311 | 
312 |   # test_vgnet(config_path, example_image, example_landmark, lmk_seq)
313 | 
314 | 
315 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/plot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import numpy as np
  4 | import os
  5 | import cv2
  6 | import subprocess
  7 | 
  8 | 
  9 | def strokeline_lookup():
 10 |   '''
 11 |   the strokeline index of 68 points.
 12 |   '''
 13 |   Mouth = [[48, 49], [49, 50], [50, 51], [51, 52], [52, 53], [53, 54], [54, 55], [55, 56], [56, 57], \
 14 |            [57, 58], [58, 59], [59, 48], [60, 61], [61, 62], [62, 63], [63, 64], [64, 65], [65, 66], \
 15 |            [66, 67], [67, 60]]
 16 | 
 17 |   Nose = [[27, 28], [28, 29], [29, 30], [30, 31], [30, 35], [31, 32], [32, 33], \
 18 |           [33, 34], [34, 35], [27, 31], [27, 35]]
 19 | 
 20 |   leftBrow = [[17, 18], [18, 19], [19, 20], [20, 21]]
 21 |   rightBrow = [[22, 23], [23, 24], [24, 25], [25, 26]]
 22 | 
 23 |   leftEye = [[36, 37], [37, 38], [38, 39], [39, 40], [40, 41], [36, 41]]
 24 |   rightEye = [[42, 43], [43, 44], [44, 45], [45, 46], [46, 47], [42, 47]]
 25 | 
 26 |   other = [[0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], \
 27 |            [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], \
 28 |            [12, 13], [13, 14], [14, 15], [15, 16]]
 29 | 
 30 |   faceLmarkLookups = []
 31 |   faceLmarkLookups.append(Mouth)
 32 |   faceLmarkLookups.append(Nose)
 33 |   faceLmarkLookups.append(leftBrow)
 34 |   faceLmarkLookups.append(rightBrow)
 35 |   faceLmarkLookups.append(leftEye)
 36 |   faceLmarkLookups.append(rightEye)
 37 |   faceLmarkLookups.append(other)
 38 |   return faceLmarkLookups
 39 | 
 40 | 
 41 | def plot_lmk_seq(save_dir, step, mean, seq_len, real_lmk_seq, lmk_seq):
 42 |   '''
 43 |   merge 128x128 images to a large 9*10 grid picture.
 44 |   '''
 45 | 
 46 |   ## 9*10 block
 47 |   block_x = 10
 48 |   block_y = 9
 49 |   img_size = 128
 50 | 
 51 |   faceLmarkLookups = strokeline_lookup()
 52 | 
 53 |   def merge_seq(lmk_seq, big_img, time, h_index):
 54 | 
 55 |     for i in range(time):
 56 |       back_img = np.ones((img_size, img_size), dtype=np.uint8) * 255
 57 |       lmk = (((lmk_seq[0, i, ...] + mean)/2+0.5) * img_size).astype(np.int32)
 58 |       for k in range(68):
 59 |         cv2.circle(back_img, (int(lmk[k * 2]), int(lmk[k * 2 + 1])), 1, [0], -1)
 60 | 
 61 |       for part in faceLmarkLookups:
 62 |         for idx in part:
 63 |           cv2.line(back_img, (int(lmk[idx[0] * 2]), int(lmk[idx[0] * 2 + 1])),
 64 |                    (int(lmk[idx[1] * 2]), int(lmk[idx[1] * 2 + 1])), (0), 1)
 65 | 
 66 |       big_img[(i // block_x + h_index) * img_size: (i // block_x + h_index + 1) * img_size,
 67 |       (i % block_x) * img_size: (i % block_x + 1) * img_size] = back_img
 68 | 
 69 |     return big_img
 70 | 
 71 |   ### We only pick the first sequence of the batch, trim length of 30.
 72 |   if (seq_len[0] > 30):
 73 |     time = 30
 74 |   else:
 75 |     time = seq_len[0]
 76 | 
 77 |   big_img = np.zeros((img_size * block_y, img_size * block_x), dtype=np.uint8)
 78 |   big_img = merge_seq(real_lmk_seq, big_img, time, 0)
 79 |   big_img = merge_seq(lmk_seq, big_img, time, 3)
 80 | 
 81 |   cv2.imwrite('{}/atnet_{}.jpg'.format(save_dir, step), big_img)
 82 | 
 83 | 
 84 | def plot_image_seq(save_dir, step, mean, seq_len, real_lmk_seq, real_mask_seq, real_img_seq, fake_img_seq,
 85 |                    attention_seq):
 86 |   '''
 87 |   merge 2 sequence of image and attention map to a large image (9*10 grid picture).
 88 |   '''
 89 | 
 90 |   ## 9*10 block
 91 |   block_x = 10
 92 |   block_y = 9
 93 |   img_size = real_img_seq.shape[2]
 94 | 
 95 |   ### We only pick the first sequence of the batch, trim length of 30.
 96 |   if (seq_len[0] > 30):
 97 |     time = 30
 98 |   else:
 99 |     time = seq_len[0]
100 | 
101 |   big_img = 255 * np.ones((img_size * block_y, img_size * block_x, 4), dtype=np.uint8)
102 | 
103 |   for i in range(time):
104 |     real_img = (((real_img_seq[0, i, ...] * 0.5) + 0.5) * 256).astype(np.uint8)
105 |     fake_img = (((fake_img_seq[0, i, ...] * 0.5) + 0.5) * 256).astype(np.uint8)
106 |     real_mask = (((real_mask_seq[0, i, ...] + 1) / 2) * 255).astype(np.uint8)
107 |     attention_img = (attention_seq[0, i, ...] * 256).astype(np.uint8)
108 | 
109 |     lmk = (((real_lmk_seq[0, i, ...] + mean)/2+0.5) * img_size).astype(np.int32)
110 |     for k in range(68):
111 |       cv2.circle(real_img, (int(lmk[k * 2]), int(lmk[k * 2 + 1])), 1, [255, 255, 0], 1)
112 | 
113 |     real_img = np.concatenate([real_img, real_mask], axis=-1)
114 | 
115 |     big_img[i // block_x * img_size: (i // block_x + 1) * img_size,
116 |     (i % block_x) * img_size: (i % block_x + 1) * img_size,
117 |     :] = real_img
118 | 
119 |     big_img[(i // block_x + 3) * img_size: (i // block_x + 1 + 3) * img_size,
120 |     (i % block_x) * img_size: (i % block_x + 1) * img_size,
121 |     :-1] = fake_img
122 | 
123 |     big_img[(i // block_x + 6) * img_size: (i // block_x + 1 + 6) * img_size,
124 |     (i % block_x) * img_size: (i % block_x + 1) * img_size,
125 |     :] = cv2.merge((attention_img, attention_img, attention_img, attention_img))
126 | 
127 |   cv2.imwrite('{}/vgnet_{}.png'.format(save_dir, step), big_img)
128 | 
129 | 
130 | def save_lmkseq_video(lmk_seq, mean, output_file, wav_file=None):
131 |   img_size = 480
132 |   seq_len = lmk_seq.shape[1]
133 |   fourcc = cv2.VideoWriter_fourcc(*'MJPG')
134 |   output_movie = cv2.VideoWriter('temp.avi', fourcc, 25, (img_size, img_size), isColor=False)
135 |   faceLmarkLookups = strokeline_lookup()
136 | 
137 |   for i in range(seq_len):
138 |     back_img = np.ones((img_size, img_size), dtype=np.uint8) * 255
139 |     lmk = (((lmk_seq[0, i, ...] + mean)/2+0.5) * img_size).astype(np.int32)
140 |     for k in range(68):
141 |       cv2.circle(back_img, (int(lmk[k * 2]), int(lmk[k * 2 + 1])), 1, [0], -1)
142 | 
143 |     for part in faceLmarkLookups:
144 |       for idx in part:
145 |         cv2.line(back_img, (int(lmk[idx[0] * 2]), int(lmk[idx[0] * 2 + 1])),
146 |                  (int(lmk[idx[1] * 2]), int(lmk[idx[1] * 2 + 1])), (0), 1)
147 | 
148 |     output_movie.write(back_img)
149 | 
150 |   if (wav_file is not None):
151 |     cmd = 'ffmpeg -y -i temp.avi -i ' + wav_file + ' -c:v copy -c:a aac -strict experimental ' + output_file
152 |     subprocess.call(cmd, shell=True)
153 |     os.remove('temp.avi')
154 | 
155 | 
156 | def save_imgseq_video(img_seq, output_file, wav_file=None):
157 |   def mkdir(dirname):
158 |     if not os.path.isdir(dirname):
159 |       os.makedirs(dirname)
160 | 
161 |   img_size = 128
162 |   seq_len = img_seq.shape[1]
163 |   mkdir('temp')
164 | 
165 |   for i in range(seq_len):
166 |     real_img = (((img_seq[0, i, ...] * 0.5) + 0.5) * 256).astype(np.uint8)
167 |     cv2.imwrite('temp/{}.jpg'.format(i), real_img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
168 | 
169 |   if (wav_file is not None):
170 |     cmd = 'ffmpeg -i temp/%d.jpg -i ' + wav_file + ' -c:v libx264 -c:a aac -strict experimental -y -vf format=yuv420p ' + output_file
171 |     subprocess.call(cmd, shell=True)
172 |     cmd = 'rm -rf temp temp.avi'
173 |     subprocess.call(cmd, shell=True)
174 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/test_atnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | """Test for ATNet architectures."""
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | from optparse import OptionParser
  9 | import tensorflow as tf
 10 | import numpy as np
 11 | import os
 12 | from atnet import ATNet
 13 | from tinynet import MfccNet
 14 | 
 15 | 
 16 | class ArchitectureTest(tf.test.TestCase):
 17 | 
 18 |   def testATNet(self):
 19 |     config_path = 'config/params.yml'
 20 |     with tf.Graph().as_default():
 21 |       time = 100
 22 | 
 23 |       ### ATNet setting
 24 |       atnet = ATNet(config_path)
 25 |       params = atnet.params
 26 |       params.batch_size = 2
 27 |       atnet.set_params(params)
 28 | 
 29 |       seq_len = np.random.uniform(1, 100, params.batch_size).astype(np.int32)
 30 |       time = max(seq_len)
 31 | 
 32 |       ## landmark: [batch_size, time, 68*2]
 33 |       landmark = tf.random.uniform([params.batch_size, time, params.landmark_size], minval=-1, maxval=1,
 34 |                                    dtype=tf.float32)
 35 |       ## ears: [batch_size, 1]
 36 |       ears = tf.random.uniform([params.batch_size, time, 1], minval=0, maxval=1, dtype=tf.float32)
 37 |       ## poses: [batch_size, 3]
 38 |       poses = tf.random.uniform([params.batch_size, time, 3], minval=-1, maxval=1, dtype=tf.float32)
 39 |       ## mfccs: [batch_size, time*frame_mfcc_scale, num_mel_bins]
 40 |       mfccs = tf.random.uniform([params.batch_size, time * 5, 80], dtype=tf.float32)
 41 |       ## example_landmark: [batch_size, 68*2]
 42 |       example_landmark = tf.random.uniform([params.batch_size, params.landmark_size], minval=-1, maxval=1,
 43 |                                            dtype=tf.float32)
 44 |       ## seq_len: [batch_size], in rational size
 45 |       seq_len = tf.convert_to_tensor(seq_len, dtype=tf.int32)
 46 | 
 47 |       def check_nodes(nodes):
 48 |         ## Test input tensor
 49 |         self.assertAllEqual(nodes['Landmark'].shape, landmark.shape.as_list())
 50 |         self.assertAllEqual(nodes['Ears'].shape, ears.shape.as_list())
 51 |         self.assertAllEqual(nodes['Poses'].shape, poses.shape.as_list())
 52 |         self.assertAllEqual(nodes['Mfccs'].shape, mfccs.shape.as_list())
 53 |         self.assertAllEqual(nodes['Example_landmark'].shape, example_landmark.shape.as_list())
 54 |         self.assertAllEqual(nodes['Seq_len'].shape, seq_len.shape.as_list())
 55 | 
 56 |         ## Test MfccEncoder output tensor
 57 |         self.assertAllEqual(nodes['MfccEncoder'].shape, [params.batch_size, time, params.encode_embedding_size])
 58 |         ## Test LandmarkEncoder output tensor
 59 |         self.assertAllEqual(nodes['LandmarkEncoder'].shape, [params.batch_size, time, params.encode_embedding_size])
 60 |         ## Test PoseEncoder output tensor
 61 |         self.assertAllEqual(nodes['PoseEncoder'].shape, [params.batch_size, time, params.encode_embedding_size])
 62 |         ## Test RNNModule output tensor
 63 |         self.assertAllEqual(nodes['RNNModule'].shape, [params.batch_size, time, params.rnn_hidden_size])
 64 |         ## Test LandmarkDecoder output tensor
 65 |         self.assertAllEqual(nodes['LandmarkDecoder'].shape, [params.batch_size, time, params.landmark_size])
 66 | 
 67 |         ## Test LandmarkDecoder output value range
 68 |         self.assertAllGreaterEqual(nodes['LandmarkDecoder'], -2)
 69 |         self.assertAllLessEqual(nodes['LandmarkDecoder'], 2)
 70 | 
 71 |       ################## 1. Test train stage ##################
 72 |       nodes = atnet.build_train_op(landmark, ears, poses, mfccs, example_landmark, seq_len)
 73 |       with self.session() as sess:
 74 |         sess.run(tf.global_variables_initializer())
 75 |         result = sess.run([nodes['Landmark'], nodes['Ears'], nodes['Poses'], nodes['Mfccs'], nodes['Example_landmark'],
 76 |                            nodes['Seq_len'], nodes['MfccEncoder'], nodes['LandmarkEncoder'], nodes['PoseEncoder'],
 77 |                            nodes['RNNModule'], nodes['LandmarkDecoder']])
 78 | 
 79 |         nodes = {}
 80 |         nodes.update({'Landmark': result[0]})
 81 |         nodes.update({'Ears': result[1]})
 82 |         nodes.update({'Poses': result[2]})
 83 |         nodes.update({'Mfccs': result[3]})
 84 |         nodes.update({'Example_landmark': result[4]})
 85 |         nodes.update({'Seq_len': result[5]})
 86 |         nodes.update({'MfccEncoder': result[6]})
 87 |         nodes.update({'LandmarkEncoder': result[7]})
 88 |         nodes.update({'PoseEncoder': result[8]})
 89 |         nodes.update({'RNNModule': result[9]})
 90 |         nodes.update({'LandmarkDecoder': result[10]})
 91 |         check_nodes(nodes)
 92 | 
 93 |       ################## 2. Test evaluate stage ##################
 94 |       nodes = atnet.build_eval_op(landmark, ears, poses, mfccs, example_landmark, seq_len)
 95 |       with self.session() as sess:
 96 |         sess.run(tf.global_variables_initializer())
 97 |         result = sess.run([nodes['Landmark'], nodes['Ears'], nodes['Poses'], nodes['Mfccs'], nodes['Example_landmark'],
 98 |                            nodes['Seq_len'], nodes['MfccEncoder'], nodes['LandmarkEncoder'], nodes['PoseEncoder'],
 99 |                            nodes['RNNModule'], nodes['LandmarkDecoder']])
100 | 
101 |         nodes = {}
102 |         nodes.update({'Landmark': result[0]})
103 |         nodes.update({'Ears': result[1]})
104 |         nodes.update({'Poses': result[2]})
105 |         nodes.update({'Mfccs': result[3]})
106 |         nodes.update({'Example_landmark': result[4]})
107 |         nodes.update({'Seq_len': result[5]})
108 |         nodes.update({'MfccEncoder': result[6]})
109 |         nodes.update({'LandmarkEncoder': result[7]})
110 |         nodes.update({'PoseEncoder': result[8]})
111 |         nodes.update({'RNNModule': result[9]})
112 |         nodes.update({'LandmarkDecoder': result[10]})
113 |         check_nodes(nodes)
114 | 
115 | 
116 | if (__name__ == '__main__'):
117 |   tf.test.main()
118 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/test_vgnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | """Test for ATNet architectures."""
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | import os
 11 | import random
 12 | from vgnet import VGNet
 13 | 
 14 | 
 15 | class ArchitectureTest(tf.test.TestCase):
 16 | 
 17 |   def testVGNet(self):
 18 |     config_path = 'config/params.yml'
 19 |     with tf.Graph().as_default():
 20 |       img_size = 128
 21 | 
 22 |       ### VGNet setting
 23 |       vgnet = VGNet(config_path)
 24 |       params = vgnet.params
 25 |       params.batch_size = 2
 26 |       vgnet.set_params(params)
 27 | 
 28 |       seq_len = np.random.uniform(1, 100, params.batch_size).astype(np.int32)
 29 |       time = max(seq_len)
 30 | 
 31 |       ## real_landmark_seq: [batch_size, time, 68*2]
 32 |       real_landmark_seq = tf.random.uniform([params.batch_size, time, params.landmark_size], minval=-1, maxval=1,
 33 |                                             dtype=tf.float32)
 34 |       ## real_mask_seq: [batch_size, time, img_size, img_size, 1]
 35 |       real_mask_seq = tf.random.uniform([params.batch_size, time, img_size, img_size, 1], minval=0, maxval=1,
 36 |                                         dtype=tf.float32)
 37 |       ## real_img_seq: [batch_size, time, img_size, img_size, 3]
 38 |       real_img_seq = tf.random.uniform([params.batch_size, time, img_size, img_size, 3], minval=-1, maxval=1,
 39 |                                        dtype=tf.float32)
 40 |       ## example_landmark: [batch_size, 68*2]
 41 |       example_landmark = tf.random.uniform([params.batch_size, params.landmark_size], minval=-1, maxval=1,
 42 |                                            dtype=tf.float32)
 43 |       ## example_img: [batch_size, img_size, img_size, 3]
 44 |       example_img = tf.random.uniform([params.batch_size, img_size, img_size, 3], minval=-1, maxval=1, dtype=tf.float32)
 45 |       ## seq_len: [batch_size], in rational size
 46 |       seq_len = tf.convert_to_tensor(seq_len, dtype=tf.int32)
 47 | 
 48 |       def check_nodes(nodes):
 49 |         ## Test input tensors' shape
 50 |         self.assertAllEqual(nodes['Real_landmark_seq'].shape, real_landmark_seq.shape.as_list())
 51 |         self.assertAllEqual(nodes['Real_mask_seq'].shape, real_mask_seq.shape.as_list())
 52 |         self.assertAllEqual(nodes['Real_img_seq'].shape, real_img_seq.shape.as_list())
 53 |         self.assertAllEqual(nodes['Example_landmark'].shape, example_landmark.shape.as_list())
 54 |         self.assertAllEqual(nodes['Example_img'].shape, example_img.shape.as_list())
 55 |         self.assertAllEqual(nodes['Seq_len'].shape, seq_len.shape.as_list())
 56 | 
 57 |         ## Test Discriminator tensors' shape
 58 |         self.assertAllEqual(nodes['Discriminator']['Real_node']['Discriminator']['Decision'].shape, [params.batch_size])
 59 |         self.assertAllEqual(nodes['Discriminator']['Real_node']['Discriminator']['LandmarkSeq'].shape,
 60 |                             [params.batch_size, time, params.landmark_size])
 61 |         self.assertAllEqual(nodes['Discriminator']['Fake_node']['Discriminator']['Decision'].shape,
 62 |                             [params.batch_size])
 63 |         self.assertAllEqual(nodes['Discriminator']['Fake_node']['Discriminator']['LandmarkSeq'].shape,
 64 |                             [params.batch_size, time, params.landmark_size])
 65 |         self.assertAllEqual(nodes['Discriminator']['Generator_node']['Generator']['Color'].shape,
 66 |                             [params.batch_size, time, img_size, img_size, 3])
 67 |         self.assertAllEqual(nodes['Discriminator']['Generator_node']['Generator']['Attention'].shape,
 68 |                             [params.batch_size, time, img_size, img_size, 1])
 69 |         self.assertAllEqual(nodes['Discriminator']['Generator_node']['Generator']['Feature'].shape,
 70 |                             [params.batch_size, time, img_size, img_size, 3])
 71 | 
 72 |         ## Test Generator tensors' shape
 73 |         self.assertAllEqual(nodes['Generator']['Discriminator_node']['Discriminator']['Decision'].shape,
 74 |                             [params.batch_size])
 75 |         self.assertAllEqual(nodes['Generator']['Discriminator_node']['Discriminator']['LandmarkSeq'].shape,
 76 |                             [params.batch_size, time, params.landmark_size])
 77 |         self.assertAllEqual(nodes['Generator']['Generator_node']['Generator']['Color'].shape,
 78 |                             [params.batch_size, time, img_size, img_size, 3])
 79 |         self.assertAllEqual(nodes['Generator']['Generator_node']['Generator']['Attention'].shape,
 80 |                             [params.batch_size, time, img_size, img_size, 1])
 81 |         self.assertAllEqual(nodes['Generator']['Generator_node']['Generator']['Feature'].shape,
 82 |                             [params.batch_size, time, img_size, img_size, 3])
 83 | 
 84 |         ## Test input tensors' value range
 85 |         self.assertAllGreaterEqual(nodes['Real_landmark_seq'], -1)
 86 |         self.assertAllLessEqual(nodes['Real_landmark_seq'], 1)
 87 |         self.assertAllGreaterEqual(nodes['Real_mask_seq'], 0)
 88 |         self.assertAllLessEqual(nodes['Real_mask_seq'], 1)
 89 |         self.assertAllGreaterEqual(nodes['Real_img_seq'], -1)
 90 |         self.assertAllLessEqual(nodes['Real_img_seq'], 1)
 91 |         self.assertAllGreaterEqual(nodes['Example_landmark'], -1)
 92 |         self.assertAllLessEqual(nodes['Example_landmark'], 1)
 93 |         self.assertAllGreaterEqual(nodes['Example_img'], -1)
 94 |         self.assertAllLessEqual(nodes['Example_img'], 1)
 95 |         self.assertAllGreaterEqual(nodes['Seq_len'], 1)
 96 |         self.assertAllLessEqual(nodes['Seq_len'], time)
 97 | 
 98 |         ## Test Discriminator tensors' value range
 99 |         self.assertAllGreaterEqual(nodes['Discriminator']['Real_node']['Discriminator']['Decision'], 0)
100 |         self.assertAllLessEqual(nodes['Discriminator']['Real_node']['Discriminator']['Decision'], 1)
101 |         self.assertAllGreaterEqual(nodes['Discriminator']['Real_node']['Discriminator']['LandmarkSeq'], -2)
102 |         self.assertAllLessEqual(nodes['Discriminator']['Real_node']['Discriminator']['LandmarkSeq'], 2)
103 |         self.assertAllGreaterEqual(nodes['Discriminator']['Fake_node']['Discriminator']['Decision'], 0)
104 |         self.assertAllLessEqual(nodes['Discriminator']['Fake_node']['Discriminator']['Decision'], 1)
105 |         self.assertAllGreaterEqual(nodes['Discriminator']['Fake_node']['Discriminator']['LandmarkSeq'], -2)
106 |         self.assertAllLessEqual(nodes['Discriminator']['Fake_node']['Discriminator']['LandmarkSeq'], 2)
107 |         self.assertAllGreaterEqual(nodes['Discriminator']['Generator_node']['Generator']['Color'], -1)
108 |         self.assertAllLessEqual(nodes['Discriminator']['Generator_node']['Generator']['Color'], 1)
109 |         self.assertAllGreaterEqual(nodes['Discriminator']['Generator_node']['Generator']['Attention'], 0)
110 |         self.assertAllLessEqual(nodes['Discriminator']['Generator_node']['Generator']['Attention'], 1)
111 |         self.assertAllGreaterEqual(nodes['Discriminator']['Generator_node']['Generator']['Feature'], -1)
112 |         self.assertAllLessEqual(nodes['Discriminator']['Generator_node']['Generator']['Feature'], 1)
113 | 
114 |         ## Test Generator tensors' value range
115 |         self.assertAllGreaterEqual(nodes['Generator']['Discriminator_node']['Discriminator']['Decision'], 0)
116 |         self.assertAllLessEqual(nodes['Generator']['Discriminator_node']['Discriminator']['Decision'], 1)
117 |         self.assertAllGreaterEqual(nodes['Generator']['Discriminator_node']['Discriminator']['LandmarkSeq'], -2)
118 |         self.assertAllLessEqual(nodes['Generator']['Discriminator_node']['Discriminator']['LandmarkSeq'], 2)
119 |         self.assertAllGreaterEqual(nodes['Generator']['Generator_node']['Generator']['Color'], -1)
120 |         self.assertAllLessEqual(nodes['Generator']['Generator_node']['Generator']['Color'], 1)
121 |         self.assertAllGreaterEqual(nodes['Generator']['Generator_node']['Generator']['Attention'], 0)
122 |         self.assertAllLessEqual(nodes['Generator']['Generator_node']['Generator']['Attention'], 1)
123 |         self.assertAllGreaterEqual(nodes['Generator']['Generator_node']['Generator']['Feature'], -1)
124 |         self.assertAllLessEqual(nodes['Generator']['Generator_node']['Generator']['Feature'], 1)
125 | 
126 |       def walkDict(aDict, key_list, value_list, path=()):
127 |         ## visit the nodes dict into key and value list, while keep the hierarchy
128 |         for k in aDict:
129 |           if type(aDict[k]) != dict:
130 |             if ('_grads' in k or '_tvars' in k):
131 |               continue
132 |             key_list.append(path + (k,))
133 |             value_list.append(aDict[k])
134 |           else:
135 |             walkDict(aDict[k], key_list, value_list, path + (k,))
136 | 
137 |       ################## 1. Test train stage ##################
138 |       nodes = vgnet.build_train_op(real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img,
139 |                                    seq_len)
140 | 
141 |       with self.session() as sess:
142 |         sess.run(tf.global_variables_initializer())
143 |         ## visit the nodes dict into key and value list, while keep the hierarchy
144 |         key_list = []
145 |         value_list = []
146 |         walkDict(nodes, key_list, value_list)
147 | 
148 |         result = sess.run(value_list)
149 | 
150 |         ## replace the tensor in nodes by numpy matrix after sess.run
151 |         for i, tensor in enumerate(result):
152 |           node = nodes
153 |           for key in key_list[i]:
154 |             node = node[key]
155 |           node = tensor
156 | 
157 |         ## test the nodes' shapes and values
158 |         check_nodes(nodes)
159 | 
160 |       ################## 2. Test evaluate stage ##################
161 |       nodes = vgnet.build_eval_op(real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img,
162 |                                   seq_len)
163 |       with self.session() as sess:
164 |         sess.run(tf.global_variables_initializer())
165 |         ## visit the nodes dict into key and value list, while keep the hierarchy
166 |         key_list = []
167 |         value_list = []
168 |         walkDict(nodes, key_list, value_list)
169 | 
170 |         result = sess.run(value_list)
171 | 
172 |         ## replace the tensor in nodes by numpy matrix after sess.run
173 |         for i, tensor in enumerate(result):
174 |           node = nodes
175 |           for key in key_list[i]:
176 |             node = node[key]
177 |           node = tensor
178 | 
179 |         ## test the nodes' shapes and values
180 |         check_nodes(nodes)
181 | 
182 | 
183 | if (__name__ == '__main__'):
184 |   tf.test.main()
185 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/train_atnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | from atnet import ATNet
  9 | from dataset.generator import ATNetDataGenerator
 10 | from plot import *
 11 | 
 12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def mkdir(dirname):
 17 |   if not os.path.isdir(dirname):
 18 |     os.makedirs(dirname)
 19 | 
 20 | 
 21 | if (__name__ == '__main__'):
 22 | 
 23 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 24 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 25 |                         help='the config yaml file')
 26 | 
 27 |   opts, argv = cmd_parser.parse_args()
 28 | 
 29 |   if (opts.config_path is None):
 30 |     logger.error('Please check your parameters.')
 31 |     exit(0)
 32 | 
 33 |   config_path = opts.config_path
 34 | 
 35 |   if (not os.path.exists(config_path)):
 36 |     logger.error('config_path not exists')
 37 |     exit(0)
 38 | 
 39 |   os.environ["CUDA_VISIBLE_DEVICES"] = '1'
 40 | 
 41 |   batch_size = 16
 42 |   ### Generator for training setting
 43 |   train_generator = ATNetDataGenerator(config_path)
 44 |   params = train_generator.params
 45 |   params.dataset_path = params.train_dataset_path
 46 |   params.batch_size = batch_size
 47 |   train_generator.set_params(params)
 48 |   train_dataset = train_generator.get_dataset()
 49 | 
 50 |   ### Generator for evaluation setting
 51 |   eval_generator = ATNetDataGenerator(config_path)
 52 |   params = eval_generator.params
 53 |   params.dataset_path = params.eval_dataset_path
 54 |   params.batch_size = batch_size
 55 |   eval_generator.set_params(params)
 56 |   eval_dataset = eval_generator.get_dataset()
 57 | 
 58 |   sess = tf.Session()
 59 |   tf.train.start_queue_runners(sess=sess)
 60 | 
 61 |   train_iter = train_dataset.make_one_shot_iterator()
 62 |   eval_iter = eval_dataset.make_one_shot_iterator()
 63 | 
 64 |   ### ATNet setting
 65 |   atnet = ATNet(config_path)
 66 |   params = atnet.params
 67 |   epochs = params.training['epochs']
 68 |   params.add_hparam('max_to_keep', 10)
 69 |   params.add_hparam('save_dir', 'ckpt_atnet')
 70 |   params.add_hparam('save_name', 'atnet')
 71 |   params.add_hparam('save_step', 1000)
 72 |   params.add_hparam('eval_step', 1000)
 73 |   params.add_hparam('summary_step', 100)
 74 |   params.add_hparam('eval_visual_dir', 'log/eval_atnet')
 75 |   params.add_hparam('summary_dir', 'log/summary_atnet')
 76 |   params.batch_size = batch_size
 77 |   atnet.set_params(params)
 78 |   mean = np.load(params.mean_file)
 79 | 
 80 |   mkdir(params.save_dir)
 81 |   mkdir(params.eval_visual_dir)
 82 |   mkdir(params.summary_dir)
 83 | 
 84 |   train_nodes = atnet.build_train_op(*train_iter.get_next())
 85 |   eval_nodes = atnet.build_eval_op(*eval_iter.get_next())
 86 |   sess.run(tf.global_variables_initializer())
 87 | 
 88 |   # Restore from save_dir
 89 |   if ('checkpoint' in os.listdir(params.save_dir)):
 90 |     tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir))
 91 | 
 92 |   tf.summary.scalar("loss", train_nodes['Loss'])
 93 |   tf.summary.scalar("lr", train_nodes['Lr'])
 94 |   grads = train_nodes['Grads']
 95 |   tvars = train_nodes['Tvars']
 96 |   # Add histograms for gradients.
 97 |   for i, grad in enumerate(grads):
 98 |     if grad is not None:
 99 |       var = tvars[i]
100 |       if ('BatchNorm' not in var.op.name):
101 |         tf.summary.histogram(var.op.name + '/gradients', grad)
102 | 
103 |   merge_summary_op = tf.summary.merge_all()
104 |   summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
105 | 
106 |   for i in range(epochs):
107 |     ### Run training
108 |     result = sess.run([train_nodes['Train_op'],
109 |                        merge_summary_op,
110 |                        train_nodes['Loss'],
111 |                        train_nodes['Lr'],
112 |                        train_nodes['Global_step'],
113 |                        train_nodes['Mfccs'],
114 |                        train_nodes['Poses'],
115 |                        train_nodes['Ears'],
116 |                        train_nodes['Seq_len'],
117 |                        train_nodes['Landmark'],
118 |                        train_nodes['Example_landmark']])
119 |     _, summary, loss, lr, global_step, mfccs, poses, ears, seq_len, landmark, example_landmark = result
120 |     print('Step {}: Loss= {:.3f}, Lr= {:.2e}'.format(global_step, loss, lr))
121 | 
122 |     if (global_step % params.summary_step == 0):
123 |       summary_writer.add_summary(summary, global_step)
124 | 
125 |     ### Run evaluation
126 |     if (global_step % params.eval_step == 0):
127 |       result = sess.run([eval_nodes['Loss'],
128 |                          eval_nodes['Seq_len'],
129 |                          eval_nodes['Landmark'],
130 |                          eval_nodes['LandmarkDecoder']])
131 |       loss, seq_len, real_lmk_seq, lmk_seq = result
132 | 
133 |       print('\r\nEvaluation >>> Loss= {:.3f}'.format(loss))
134 |       plot_lmk_seq(params.eval_visual_dir, global_step, mean, seq_len, real_lmk_seq, lmk_seq)
135 | 
136 |     ### Save checkpoint
137 |     if (global_step % params.save_step == 0):
138 |       tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
139 |                                                                                           os.path.join(params.save_dir,
140 |                                                                                                        params.save_name),
141 |                                                                                           global_step=global_step)
142 | 


--------------------------------------------------------------------------------
/voicepuppet/atvgnet/train_vgnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from vgnet import VGNet
  7 | from dataset.generator import VGNetDataGenerator
  8 | from optparse import OptionParser
  9 | import logging
 10 | from plot import *
 11 | 
 12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def mkdir(dirname):
 17 |   if not os.path.isdir(dirname):
 18 |     os.makedirs(dirname)
 19 | 
 20 | 
 21 | if (__name__ == '__main__'):
 22 | 
 23 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 24 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 25 |                         help='the config yaml file')
 26 | 
 27 |   opts, argv = cmd_parser.parse_args()
 28 | 
 29 |   if (opts.config_path is None):
 30 |     logger.error('Please check your parameters.')
 31 |     exit(0)
 32 | 
 33 |   config_path = opts.config_path
 34 | 
 35 |   if (not os.path.exists(config_path)):
 36 |     logger.error('config_path not exists')
 37 |     exit(0)
 38 | 
 39 |   os.environ["CUDA_VISIBLE_DEVICES"] = '2'
 40 | 
 41 |   batch_size = 4
 42 |   ### Generator for training setting
 43 |   train_generator = VGNetDataGenerator(config_path)
 44 |   params = train_generator.params
 45 |   params.dataset_path = params.train_dataset_path
 46 |   params.batch_size = batch_size
 47 |   train_generator.set_params(params)
 48 |   train_dataset = train_generator.get_dataset()
 49 | 
 50 |   ### Generator for evaluation setting
 51 |   eval_generator = VGNetDataGenerator(config_path)
 52 |   params = eval_generator.params
 53 |   params.dataset_path = params.eval_dataset_path
 54 |   params.batch_size = batch_size
 55 |   eval_generator.set_params(params)
 56 |   eval_dataset = eval_generator.get_dataset()
 57 | 
 58 |   sess = tf.Session()
 59 |   tf.train.start_queue_runners(sess=sess)
 60 | 
 61 |   train_iter = train_dataset.make_one_shot_iterator()
 62 |   eval_iter = eval_dataset.make_one_shot_iterator()
 63 | 
 64 |   ### VGNet setting
 65 |   vgnet = VGNet(config_path)
 66 |   params = vgnet.params
 67 |   epochs = params.training['epochs']
 68 |   params.add_hparam('max_to_keep', 10)
 69 |   params.add_hparam('save_dir', 'ckpt_vgnet')
 70 |   params.add_hparam('save_name', 'vgnet')
 71 |   params.add_hparam('save_step', 1000)
 72 |   params.add_hparam('eval_step', 1000)
 73 |   params.add_hparam('summary_step', 100)
 74 |   params.add_hparam('alternative', 1000)
 75 |   params.add_hparam('eval_visual_dir', 'log/eval_vgnet')
 76 |   params.add_hparam('summary_dir', 'log/summary_vgnet')
 77 |   params.batch_size = batch_size
 78 |   vgnet.set_params(params)
 79 |   mean = np.load(params.mean_file)
 80 | 
 81 |   mkdir(params.save_dir)
 82 |   mkdir(params.eval_visual_dir)
 83 |   mkdir(params.summary_dir)
 84 | 
 85 |   train_nodes = vgnet.build_train_op(*train_iter.get_next())
 86 |   eval_nodes = vgnet.build_eval_op(*eval_iter.get_next())
 87 |   sess.run(tf.global_variables_initializer())
 88 | 
 89 |   # Restore from save_dir
 90 |   if ('checkpoint' in os.listdir(params.save_dir)):
 91 |     tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir, latest_filename=None))
 92 | 
 93 |   # Add summary when training
 94 |   discriminator_summary = []
 95 |   discriminator_summary.append(tf.summary.scalar("real_bce_loss", train_nodes['Discriminator']['Real_bce_loss']))
 96 |   discriminator_summary.append(tf.summary.scalar("real_lmk_loss", train_nodes['Discriminator']['Real_lmk_loss']))
 97 |   discriminator_summary.append(tf.summary.scalar("fake_bce_loss", train_nodes['Discriminator']['Fake_bce_loss']))
 98 |   discriminator_summary.append(tf.summary.scalar("fake_lmk_loss", train_nodes['Discriminator']['Fake_lmk_loss']))
 99 |   discriminator_summary.append(
100 |       tf.summary.scalar("discriminator_loss", train_nodes['Discriminator']['Discriminator_loss']))
101 | 
102 |   generator_summary = []
103 |   generator_summary.append(tf.summary.scalar("bce_loss", train_nodes['Generator']['Bce_loss']))
104 |   generator_summary.append(tf.summary.scalar("lmk_loss", train_nodes['Generator']['Lmk_loss']))
105 |   generator_summary.append(tf.summary.scalar("pix_loss", train_nodes['Generator']['Pix_loss']))
106 |   generator_summary.append(tf.summary.scalar("generator_loss", train_nodes['Generator']['Generator_loss']))
107 | 
108 |   # Add gradient to summary
109 |   grads = train_nodes['Discriminator_grads']
110 |   tvars = train_nodes['Discriminator_tvars']
111 |   for i, grad in enumerate(grads):
112 |     if grad is not None:
113 |       var = tvars[i]
114 |       if('BatchNorm' not in var.name):
115 |         discriminator_summary.append(tf.summary.histogram(var.op.name + '/gradients', grad))
116 | 
117 |   grads = train_nodes['Generator_grads']
118 |   tvars = train_nodes['Generator_tvars']
119 |   for i, grad in enumerate(grads):
120 |     if grad is not None:
121 |       var = tvars[i]
122 |       if('BatchNorm' not in var.name):
123 |         generator_summary.append(tf.summary.histogram(var.op.name + '/gradients', grad))
124 | 
125 |   discriminator_summary_op = tf.summary.merge(discriminator_summary)
126 |   generator_summary_op = tf.summary.merge(generator_summary)
127 |   lr_summary_op = tf.summary.scalar("lr", train_nodes['Lr'])
128 | 
129 |   summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
130 | 
131 |   # Run epoch
132 |   for i in range(epochs):
133 |     if ((i // params.alternative) % 2 == 0):
134 |       ### Run discriminator training
135 |       result = sess.run([train_nodes['Train_discriminator'],
136 |                          discriminator_summary_op,
137 |                          train_nodes['Lr'],
138 |                          train_nodes['Global_step'],
139 |                          train_nodes['Discriminator']['Real_bce_loss'],
140 |                          train_nodes['Discriminator']['Real_lmk_loss'],
141 |                          train_nodes['Discriminator']['Fake_bce_loss'],
142 |                          train_nodes['Discriminator']['Fake_lmk_loss'],
143 |                          train_nodes['Discriminator']['Discriminator_loss']])
144 |       _, summary, lr, global_step, real_bce_loss, real_lmk_loss, fake_bce_loss, fake_lmk_loss, discriminator_loss = result
145 |       print(
146 |         'Step {}: Lr= {:.2e}, Discriminator_loss= {:.3f}, [Real_bce_loss= {:.3f}, Real_lmk_loss= {:.3f}, Fake_bce_loss= {:.3f}, Fake_lmk_loss= {:.3f}]'.format(
147 |             global_step, lr, discriminator_loss, real_bce_loss, real_lmk_loss, fake_bce_loss, fake_lmk_loss))
148 | 
149 |     else:
150 |       ### Run generator training
151 |       result = sess.run([train_nodes['Train_generator'],
152 |                          generator_summary_op,
153 |                          train_nodes['Lr'],
154 |                          train_nodes['Global_step'],
155 |                          train_nodes['Generator']['Bce_loss'],
156 |                          train_nodes['Generator']['Lmk_loss'],
157 |                          train_nodes['Generator']['Pix_loss'],
158 |                          train_nodes['Generator']['Generator_loss']])
159 |       _, summary, lr, global_step, bce_loss, lmk_loss, pix_loss, generator_loss = result
160 |       print(
161 |       'Step {}: Lr= {:.2e}, Generator_loss= {:.3f}, [Bce_loss= {:.3f}, Lmk_loss= {:.3f}, Pix_loss= {:.3f}]'.format(
162 |           global_step, lr,
163 |           generator_loss,
164 |           bce_loss, lmk_loss, pix_loss))
165 | 
166 |     if (global_step % params.summary_step == 0):
167 |       summary_writer.add_summary(summary, global_step)
168 | 
169 |     ### Run evaluation
170 |     if (global_step % params.eval_step == 0):
171 |       result = sess.run([eval_nodes['Real_landmark_seq'],
172 |                          eval_nodes['Real_mask_seq'],
173 |                          eval_nodes['Real_img_seq'],
174 |                          eval_nodes['Example_landmark'],
175 |                          eval_nodes['Example_img'],
176 |                          eval_nodes['Seq_len'],
177 |                          eval_nodes['Generator']['Fake_img_seq'],
178 |                          eval_nodes['Generator']['Attention'],
179 |                          eval_nodes['Generator']['Generator_loss'],
180 |                          eval_nodes['Discriminator']['Discriminator_loss']])
181 |       real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img, seq_len, fake_img_seq, attention, generator_loss, discriminator_loss = result
182 | 
183 |       print('\r\nEvaluation >>> Generator_loss= {:.3f}, Discriminator_loss= {:.3f}'.format(generator_loss,
184 |                                                                                            discriminator_loss))
185 |       plot_image_seq(params.eval_visual_dir, global_step, mean, seq_len, real_landmark_seq, real_mask_seq, real_img_seq,
186 |                      fake_img_seq, attention)
187 | 
188 |     ### Save checkpoint
189 |     if (global_step % params.save_step == 0):
190 |       tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
191 |                                                                                           os.path.join(params.save_dir,
192 |                                                                                                        params.save_name),
193 |                                                                                           global_step=global_step)
194 | 


--------------------------------------------------------------------------------
/voicepuppet/bfmnet/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "tinynet",
10 |     srcs = ["tinynet.py"],
11 |     deps = [
12 |     ],
13 | )
14 | 
15 | py_library(
16 |     name = "bfmnet",
17 |     srcs = ["bfmnet.py"],
18 |     deps = [
19 |         "//config:configure",
20 |         ":tinynet",
21 |         "//voicepuppet:builder"
22 |     ],
23 | )
24 | 
25 | py_binary(
26 |     name = "train_bfmnet",
27 |     srcs = ["train_bfmnet.py"],
28 |     deps = [
29 |       "//utils:bfm_load_data",
30 |       "//utils:bfm_visual",
31 |       "//utils:reconstruct_mesh",
32 |       "//utils:utils",
33 |       ":bfmnet",
34 |       "//generator:generator"
35 |     ],
36 | )
37 | 
38 | py_binary(
39 |     name = "infer_bfmnet",
40 |     srcs = ["infer_bfmnet.py"],
41 |     deps = [
42 |       "//utils:bfm_load_data",
43 |       "//utils:bfm_visual",
44 |       "//utils:reconstruct_mesh",
45 |       "//utils:utils",
46 |       ":bfmnet",
47 |       "//generator:generator",
48 |       "//generator:loader"
49 |     ],
50 | )
51 | 


--------------------------------------------------------------------------------
/voicepuppet/bfmnet/infer_bfmnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import subprocess
  9 | from generator.loader import *
 10 | from bfmnet import BFMNet
 11 | from generator.generator import DataGenerator
 12 | from utils.bfm_load_data import *
 13 | from utils.bfm_visual import *
 14 | from utils.utils import *
 15 | import scipy
 16 | 
 17 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | # #########################################################################################################
 21 | # facemodel = BFM('../allmodels')
 22 | # def visual_3dface(root, name):
 23 | #   mkdir('output')
 24 | #   for file in os.listdir('output'):
 25 | #     os.system('rm -rf output/{}'.format(file))
 26 | 
 27 | #   bfmcoeff_loader = BFMCoeffLoader()
 28 | #   bfm_coeff_seq = bfmcoeff_loader.get_data(os.path.join(root, 'bfmcoeff.txt'))
 29 | #   audio_file = os.path.join(root, 'audio.wav')
 30 | #   id_coeff = np.mean(bfm_coeff_seq[:, :80], 0)
 31 | 
 32 | #   for i in range(bfm_coeff_seq.shape[0]):
 33 | #     bfm_coeff_seq[i, :80] = id_coeff
 34 | 
 35 | #   for i in range(bfm_coeff_seq.shape[0]):
 36 | #     face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, _ = Reconstruction(
 37 | #         bfm_coeff_seq[i:i + 1, ...], facemodel)
 38 | #     if(i>300):
 39 | #       break
 40 | #     shape = np.squeeze(face_shape, (0))
 41 | #     color = np.squeeze(face_color, (0))
 42 | #     color = np.clip(color, 0, 255).astype(np.int32)
 43 | #     shape[:, :2] = 112 - shape[:, :2] * 112
 44 | #     shape *=3
 45 | 
 46 | #     img_size = 672
 47 | #     new_image = np.zeros((img_size * img_size * 3), dtype=np.uint8)
 48 | #     face_mask = np.zeros((img_size * img_size), dtype=np.uint8)
 49 | 
 50 | #     vertices = shape.reshape(-1).astype(np.float32).copy()
 51 | #     triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
 52 | #     colors = color.reshape(-1).astype(np.float32).copy()
 53 | #     depth_buffer = (np.zeros((img_size * img_size)) - 99999.0).astype(np.float32)
 54 | #     mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
 55 | #                                         facemodel.tri.shape[0], img_size, img_size, 3)
 56 | #     new_image = new_image.reshape([img_size, img_size, 3])
 57 | 
 58 | #     new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
 59 | 
 60 | #     cv2.imwrite('output/{}.jpg'.format(i), new_image)
 61 | #     print(i)
 62 | 
 63 | #   cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y {}'.format(name)
 64 | #   subprocess.call(cmd, shell=True)
 65 | 
 66 | # root = '/media/dong/DiskData/gridcorpus/todir/vid1'
 67 | # for folder in os.listdir(root):
 68 | #   name = os.path.join(root, folder+'.mp4')
 69 | #   visual_3dface(os.path.join(root, folder), name)
 70 | # sys.exit(0)
 71 | # #########################################################################################################
 72 | 
 73 | def alignto_bfm_coeff(model_dir, img, xys):
 74 |   from PIL import Image
 75 |   import tensorflow as tf
 76 | 
 77 |   def load_graph(graph_filename):
 78 |     with tf.gfile.GFile(graph_filename, 'rb') as f:
 79 |       graph_def = tf.GraphDef()
 80 |       graph_def.ParseFromString(f.read())
 81 | 
 82 |     return graph_def
 83 | 
 84 |   # read standard landmarks for preprocessing images
 85 |   lm3D = load_lm3d(model_dir)
 86 | 
 87 |   # build reconstruction model
 88 |   with tf.Graph().as_default() as graph, tf.device('/cpu:0'):
 89 |     images = tf.placeholder(name='input_imgs', shape=[None, 224, 224, 3], dtype=tf.float32)
 90 |     graph_def = load_graph(os.path.join(model_dir, "FaceReconModel.pb"))
 91 |     tf.import_graph_def(graph_def, name='resnet', input_map={'input_imgs:0': images})
 92 | 
 93 |     # output coefficients of R-Net (dim = 257) 
 94 |     coeff = graph.get_tensor_by_name('resnet/coeff:0')
 95 | 
 96 |     with tf.Session() as sess:
 97 |       ps = map(lambda x: int(x), xys)
 98 | 
 99 |       left_eye_x = int(round((ps[72] + ps[74] + ps[76] + ps[78] + ps[80] + ps[82]) / 6))
100 |       left_eye_y = int(round((ps[73] + ps[75] + ps[77] + ps[79] + ps[81] + ps[83]) / 6))
101 |       right_eye_x = int(round((ps[84] + ps[86] + ps[88] + ps[90] + ps[92] + ps[94]) / 6))
102 |       right_eye_y = int(round((ps[85] + ps[87] + ps[89] + ps[91] + ps[93] + ps[95]) / 6))
103 |       nose_x = int(round(ps[60]))
104 |       nose_y = int(round(ps[61]))
105 |       left_mouse_x = int(round(ps[96]))
106 |       left_mouse_y = int(round(ps[97]))
107 |       right_mouse_x = int(round(ps[108]))
108 |       right_mouse_y = int(round(ps[109]))
109 | 
110 |       lmk5 = np.array(
111 |           [[left_eye_x, left_eye_y], [right_eye_x, right_eye_y], [nose_x, nose_y], [left_mouse_x, left_mouse_y],
112 |            [right_mouse_x, right_mouse_y]])
113 | 
114 |       image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
115 |       # preprocess input image
116 |       input_img, lm_new, transform_params = Preprocess(image, lmk5, lm3D)
117 |       bfmcoeff = sess.run(coeff, feed_dict={images: input_img})
118 |       return bfmcoeff, input_img, transform_params
119 | 
120 | 
121 | if (__name__ == '__main__'):
122 | 
123 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
124 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
125 |                         help='the config yaml file')
126 | 
127 |   opts, argv = cmd_parser.parse_args()
128 | 
129 |   if (opts.config_path is None):
130 |     logger.error('Please check your parameters.')
131 |     exit(0)
132 | 
133 |   config_path = opts.config_path
134 | 
135 |   if (not os.path.exists(config_path)):
136 |     logger.error('config_path not exists')
137 |     exit(0)
138 | 
139 |   image_file, audio_file = argv
140 | 
141 |   os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
142 |   mkdir('output')
143 |   for file in os.listdir('output'):
144 |     os.system('rm -rf output/{}'.format(file))
145 | 
146 |   batch_size = 1
147 |   ### Generator for inference setting
148 |   infer_generator = DataGenerator(config_path)
149 |   params = infer_generator.params
150 |   params.batch_size = batch_size
151 |   infer_generator.set_params(params)
152 |   wav_loader = WavLoader(sr=infer_generator.sample_rate)
153 |   pcm = wav_loader.get_data(audio_file)
154 | 
155 |   pad_len = int(1 + pcm.shape[0] / infer_generator.frame_wav_scale)
156 |   # calculate the rational length of pcm in order to keep the alignment of mfcc and landmark sequence.
157 |   pcm_length = infer_generator.hop_step * (pad_len * infer_generator.frame_mfcc_scale - 1) + infer_generator.win_length
158 |   if (pcm.shape[0] < pcm_length):
159 |     pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
160 |   pcm_slice = pcm[:pcm_length][np.newaxis, :]
161 | 
162 |   ears = np.ones([1, pad_len, 1], dtype=np.float32)*0.9
163 |   for i in range(pad_len//2):
164 |     ears[0, i, 0] = 0.2
165 |   ears = tf.convert_to_tensor(ears, dtype=tf.float32)
166 |   mfcc = infer_generator.extract_mfcc(pcm_slice)
167 |   img = cv2.imread(image_file)
168 | 
169 |   _, _, img_cropped, lmk_cropped, center_x, center_y, ratio = get_mxnet_sat_alignment(params.model_dir, img)
170 |   bfmcoeff, input_img, transform_params = alignto_bfm_coeff(params.model_dir, img_cropped, lmk_cropped)
171 |   ratio *= transform_params[2]
172 |   tx = -int(round(transform_params[3] / ratio))
173 |   ty = -int(round(transform_params[4] / ratio))
174 | 
175 |   seq_len = tf.convert_to_tensor([pad_len], dtype=tf.int32)
176 | 
177 |   config = tf.ConfigProto()
178 |   config.gpu_options.allow_growth = True
179 |   sess = tf.Session(config=config)
180 | 
181 |   ### BFMNet setting
182 |   bfmnet = BFMNet(config_path)
183 |   params = bfmnet.params
184 |   params.batch_size = batch_size
185 |   bfmnet.set_params(params)
186 |   facemodel = BFM(params.model_dir)
187 | 
188 |   infer_nodes = bfmnet.build_inference_op(ears, mfcc, seq_len)
189 |   sess.run(tf.global_variables_initializer())
190 | 
191 |   # Restore from save_dir
192 |   tf.train.Saver().restore(sess, 'ckpt_bfmnet/bfmnet-65000')
193 | 
194 |   ### Run inference
195 |   bfm_coeff_seq = sess.run(infer_nodes['BFMCoeffDecoder'])
196 |   bfmcoeff = np.tile(bfmcoeff[:, np.newaxis, :], [1, bfm_coeff_seq.shape[1], 1])
197 | 
198 |   bfm_coeff_seq = np.concatenate([bfmcoeff[:, :, :80], bfm_coeff_seq[:, :, :], bfmcoeff[:, :, 144:]], axis=2)
199 |   merge_images = []
200 | 
201 |   ### step 2: generate tuple image sequence
202 |   angles = np.array([[0, 0, 0]], dtype=np.float32)
203 |   shift = 0.04
204 |   for i in range(bfm_coeff_seq.shape[1]):
205 |     angles[0][1] += shift
206 |     if (angles[0][1] > 0.8 or angles[0][1] < -0.8):
207 |       shift = -shift
208 | 
209 |     face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, _ = Reconstruction(
210 |         bfm_coeff_seq[0, i:i + 1, ...], facemodel)
211 | 
212 |     shape = np.squeeze(face_shape, (0))
213 |     color = np.squeeze(face_color, (0))
214 |     color = np.clip(color, 0, 255).astype(np.int32)
215 |     shape[:, :2] = 112 - shape[:, :2] * 112
216 |     shape *=3
217 | 
218 |     img_size = 672
219 |     new_image = np.zeros((img_size * img_size * 3), dtype=np.uint8)
220 |     face_mask = np.zeros((img_size * img_size), dtype=np.uint8)
221 | 
222 |     vertices = shape.reshape(-1).astype(np.float32).copy()
223 |     triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
224 |     colors = color.reshape(-1).astype(np.float32).copy()
225 |     depth_buffer = (np.zeros((img_size * img_size)) - 99999.0).astype(np.float32)
226 |     mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
227 |                                         facemodel.tri.shape[0], img_size, img_size, 3)
228 |     new_image = new_image.reshape([img_size, img_size, 3])
229 | 
230 |     new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
231 | 
232 |     cv2.imwrite('output/{}.jpg'.format(i), new_image)
233 | 
234 |   cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp.mp4'
235 |   subprocess.call(cmd, shell=True)
236 | 


--------------------------------------------------------------------------------
/voicepuppet/bfmnet/train_bfmnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import sys
  9 | 
 10 | sys.path.append(os.path.join(os.getcwd(), 'generator'))
 11 | sys.path.append(os.path.join(os.getcwd(), 'utils'))
 12 | 
 13 | from bfmnet import BFMNet
 14 | from generator import BFMNetDataGenerator
 15 | from bfm_load_data import *
 16 | from bfm_visual import *
 17 | from utils import *
 18 | 
 19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | if (__name__ == '__main__'):
 24 | 
 25 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 26 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 27 |                         help='the config yaml file')
 28 | 
 29 |   opts, argv = cmd_parser.parse_args()
 30 | 
 31 |   if (opts.config_path is None):
 32 |     logger.error('Please check your parameters.')
 33 |     exit(0)
 34 | 
 35 |   config_path = opts.config_path
 36 | 
 37 |   if (not os.path.exists(config_path)):
 38 |     logger.error('config_path not exists')
 39 |     exit(0)
 40 | 
 41 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 42 | 
 43 |   batch_size = 4
 44 |   ### Generator for training setting
 45 |   train_generator = BFMNetDataGenerator(config_path)
 46 |   params = train_generator.params
 47 |   params.dataset_path = params.train_dataset_path
 48 |   params.batch_size = batch_size
 49 |   train_generator.set_params(params)
 50 |   train_dataset = train_generator.get_dataset()
 51 | 
 52 |   ### Generator for evaluation setting
 53 |   eval_generator = BFMNetDataGenerator(config_path)
 54 |   params = eval_generator.params
 55 |   params.dataset_path = params.eval_dataset_path
 56 |   params.batch_size = batch_size
 57 |   eval_generator.set_params(params)
 58 |   eval_dataset = eval_generator.get_dataset()
 59 | 
 60 |   config = tf.ConfigProto()
 61 |   config.gpu_options.allow_growth = True
 62 |   sess = tf.Session(config=config)
 63 | 
 64 |   tf.train.start_queue_runners(sess=sess)
 65 | 
 66 |   train_iter = train_dataset.make_one_shot_iterator()
 67 |   eval_iter = eval_dataset.make_one_shot_iterator()
 68 | 
 69 |   ### BFMNet setting
 70 |   bfmnet = BFMNet(config_path)
 71 |   params = bfmnet.params
 72 |   epochs = params.training['epochs']
 73 |   params.add_hparam('max_to_keep', 10)
 74 |   params.add_hparam('save_dir', 'ckpt_bfmnet')
 75 |   params.add_hparam('save_name', 'bfmnet')
 76 |   params.add_hparam('save_step', 5000)
 77 |   params.add_hparam('eval_step', 1000)
 78 |   # params.add_hparam('summary_step', 1000)
 79 |   params.add_hparam('eval_visual_dir', 'log/eval_bfmnet')
 80 |   # params.add_hparam('summary_dir', 'log/summary_bfmnet')
 81 |   params.batch_size = batch_size
 82 |   bfmnet.set_params(params)
 83 |   facemodel = BFM(params.model_dir)
 84 | 
 85 |   mkdir(params.save_dir)
 86 |   mkdir(params.eval_visual_dir)
 87 |   # mkdir(params.summary_dir)
 88 | 
 89 |   train_nodes = bfmnet.build_train_op(*train_iter.get_next())
 90 |   eval_nodes = bfmnet.build_eval_op(*eval_iter.get_next())
 91 |   sess.run(tf.global_variables_initializer())
 92 | 
 93 |   # Restore from save_dir
 94 |   if ('checkpoint' in os.listdir(params.save_dir)):
 95 |     print('Restore from {}\n'.format(params.save_dir))
 96 |     tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir))
 97 | 
 98 |   # tf.summary.scalar("loss", train_nodes['Loss'])
 99 |   # tf.summary.scalar("lr", train_nodes['Lr'])
100 |   # grads = train_nodes['Grads']
101 |   # tvars = train_nodes['Tvars']
102 |   # # Add histograms for gradients.
103 |   # for i, grad in enumerate(grads):
104 |   #   if grad is not None:
105 |   #     var = tvars[i]
106 |   #     if ('BatchNorm' not in var.op.name):
107 |   #       tf.summary.histogram(var.op.name + '/gradients', grad)
108 | 
109 |   # merge_summary_op = tf.summary.merge_all()
110 |   # summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
111 | 
112 |   for i in range(epochs):
113 |     ### Run training
114 |     result = sess.run([train_nodes['Train_op'],
115 |                       #  merge_summary_op,
116 |                        train_nodes['Loss'],
117 |                        train_nodes['Lr'],
118 |                        train_nodes['Global_step'],
119 |                        train_nodes['Mfccs'],
120 |                        train_nodes['Seq_len'],
121 |                        train_nodes['BFM_coeff_seq'],
122 |                        train_nodes['Ears']])
123 |     _, loss, lr, global_step, mfccs, seq_len, bfm_coeff_seq, ears = result
124 |     print('Step {}: Loss= {:.3f}, Lr= {:.2e}'.format(global_step, loss, lr))
125 | 
126 |     # if (global_step % params.summary_step == 0):
127 |     #   summary_writer.add_summary(summary, global_step)
128 | 
129 |     ### Run evaluation
130 |     if (global_step % params.eval_step == 0):
131 |       result = sess.run([eval_nodes['Loss'],
132 |                          eval_nodes['Seq_len'],
133 |                          eval_nodes['BFM_coeff_seq'],
134 |                          eval_nodes['BFMCoeffDecoder']])
135 |       loss, seq_len, real_bfm_coeff_seq, bfm_coeff_seq = result
136 | 
137 |       print('\r\nEvaluation >>> Loss= {:.3f}'.format(loss))
138 |       plot_bfm_coeff_seq(params.eval_visual_dir, facemodel, global_step, seq_len, real_bfm_coeff_seq, bfm_coeff_seq)
139 | 
140 |     ### Save checkpoint
141 |     if (global_step % params.save_step == 0):
142 |       tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
143 |                                                                                           os.path.join(params.save_dir,
144 |                                                                                                        params.save_name),
145 |                                                                                           global_step=global_step)
146 | 


--------------------------------------------------------------------------------
/voicepuppet/builder.py:
--------------------------------------------------------------------------------
 1 | class ModelBuilder(object):
 2 | 
 3 |   def __init__(self):
 4 |     raise NotImplementError('__init__ not implemented.')
 5 | 
 6 |   def build_network(self):
 7 |     raise NotImplementError('build_network not implemented.')
 8 | 
 9 |   def __call__(self):
10 |     raise NotImplementError('__call__ not implemented.')
11 | 


--------------------------------------------------------------------------------
/voicepuppet/pixflow/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "pixflow",
10 |     srcs = ["pixflow.py"],
11 |     deps = [
12 |       "//config:configure",
13 |       "//voicepuppet:builder"
14 |     ],
15 | )
16 | 
17 | py_binary(
18 |     name = "train_pixflow",
19 |     srcs = ["train_pixflow.py"],
20 |     deps = [
21 |       "//utils:utils",
22 |       ":pixflow",
23 |       "//generator:generator",
24 |       "//generator:loader"
25 |     ],
26 | )
27 | 
28 | py_binary(
29 |     name = "infer_pixflow",
30 |     srcs = ["infer_pixflow.py"],
31 |     deps = [
32 |       "//utils:bfm_load_data",
33 |       "//utils:reconstruct_mesh",
34 |       "//utils:bfm_visual",
35 |       "//utils:utils",
36 |       ":pixflow",
37 |       "//voicepuppet/bfmnet:bfmnet",
38 |       "//generator:generator",
39 |       "//generator:loader"
40 |     ],
41 | )
42 | 
43 | py_binary(
44 |     name = "infer_bfm_pixflow",
45 |     srcs = ["infer_bfm_pixflow.py"],
46 |     deps = [
47 |       "//utils:bfm_load_data",
48 |       "//utils:reconstruct_mesh",
49 |       "//utils:bfm_visual",
50 |       "//utils:utils",
51 |       ":pixflow",
52 |       "//voicepuppet/bfmnet:bfmnet",
53 |       "//generator:generator",
54 |       "//generator:loader"
55 |     ],
56 | )
57 | 


--------------------------------------------------------------------------------
/voicepuppet/pixflow/infer_bfm_pixflow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import subprocess
  9 | from pixflow import PixFlowNet
 10 | from voicepuppet.bfmnet.bfmnet import BFMNet
 11 | from generator.loader import *
 12 | from generator.generator import DataGenerator
 13 | from utils.bfm_load_data import *
 14 | from utils.bfm_visual import *
 15 | from utils.utils import *
 16 | import scipy
 17 | import random
 18 | 
 19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | def alignto_bfm_coeff(model_dir, img, xys):
 23 |   from PIL import Image
 24 |   import tensorflow as tf
 25 | 
 26 |   def load_graph(graph_filename):
 27 |     with tf.gfile.GFile(graph_filename, 'rb') as f:
 28 |       graph_def = tf.GraphDef()
 29 |       graph_def.ParseFromString(f.read())
 30 | 
 31 |     return graph_def
 32 | 
 33 |   # read standard landmarks for preprocessing images
 34 |   lm3D = load_lm3d(model_dir)
 35 | 
 36 |   # build reconstruction model
 37 |   with tf.Graph().as_default() as graph, tf.device('/cpu:0'):
 38 |     images = tf.placeholder(name='input_imgs', shape=[None, 224, 224, 3], dtype=tf.float32)
 39 |     graph_def = load_graph(os.path.join(model_dir, "FaceReconModel.pb"))
 40 |     tf.import_graph_def(graph_def, name='resnet', input_map={'input_imgs:0': images})
 41 | 
 42 |     # output coefficients of R-Net (dim = 257) 
 43 |     coeff = graph.get_tensor_by_name('resnet/coeff:0')
 44 | 
 45 |     with tf.Session() as sess:
 46 |       ps = map(lambda x: int(x), xys)
 47 | 
 48 |       left_eye_x = int(round((ps[72] + ps[74] + ps[76] + ps[78] + ps[80] + ps[82]) / 6))
 49 |       left_eye_y = int(round((ps[73] + ps[75] + ps[77] + ps[79] + ps[81] + ps[83]) / 6))
 50 |       right_eye_x = int(round((ps[84] + ps[86] + ps[88] + ps[90] + ps[92] + ps[94]) / 6))
 51 |       right_eye_y = int(round((ps[85] + ps[87] + ps[89] + ps[91] + ps[93] + ps[95]) / 6))
 52 |       nose_x = int(round(ps[60]))
 53 |       nose_y = int(round(ps[61]))
 54 |       left_mouse_x = int(round(ps[96]))
 55 |       left_mouse_y = int(round(ps[97]))
 56 |       right_mouse_x = int(round(ps[108]))
 57 |       right_mouse_y = int(round(ps[109]))
 58 | 
 59 |       lmk5 = np.array(
 60 |           [[left_eye_x, left_eye_y], [right_eye_x, right_eye_y], [nose_x, nose_y], [left_mouse_x, left_mouse_y],
 61 |            [right_mouse_x, right_mouse_y]])
 62 | 
 63 |       image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 64 |       # preprocess input image
 65 |       input_img, lm_new, transform_params = Preprocess(image, lmk5, lm3D)
 66 |       bfmcoeff = sess.run(coeff, feed_dict={images: input_img})
 67 |       return bfmcoeff, input_img, transform_params
 68 | 
 69 | angles = np.array([[0, 0, 0]], dtype=np.float32)
 70 | shift = 0.005
 71 | 
 72 | def render_face(center_x, center_y, ratio, bfmcoeff, img, transform_params, facemodel):
 73 |   ratio *= transform_params[2]
 74 |   tx = -int((transform_params[3] / ratio))
 75 |   ty = -int((transform_params[4] / ratio))
 76 |   global angles, shift
 77 | 
 78 |   # angles[0][0] += shift
 79 |   # angles[0][1] += shift
 80 |   # angles[0][2] += shift
 81 |   # if (angles[0][1] > 0.03 or angles[0][1] < -0.03):
 82 |   #   shift = -shift
 83 | 
 84 |   face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d = Reconstruction_rotation(
 85 |     bfmcoeff, facemodel, angles)
 86 |   face_projection2 = np.concatenate([face_projection, z_buffer], axis=2)
 87 |   face_projection = np.squeeze(face_projection2, (0))
 88 | 
 89 |   shape = np.squeeze(face_projection2, (0))
 90 |   color = np.squeeze(face_color, (0))
 91 |   color = np.clip(color, 0, 255).astype(np.int32)
 92 | 
 93 |   new_image = np.zeros((224 * 224 * 3), dtype=np.uint8)
 94 |   face_mask = np.zeros((224 * 224), dtype=np.uint8)
 95 | 
 96 |   vertices = shape.reshape(-1).astype(np.float32).copy()
 97 |   triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
 98 |   colors = color.reshape(-1).astype(np.float32).copy()
 99 |   depth_buffer = (np.zeros((224 * 224)) - 99999.0).astype(np.float32)
100 |   mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
101 |                                       facemodel.tri.shape[0], 224, 224, 3)
102 |   new_image = new_image.reshape([224, 224, 3])
103 | 
104 |   new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
105 |   new_image = cv2.resize(new_image, (
106 |   int(round(new_image.shape[0] / ratio)), int(round(new_image.shape[1] / ratio))))
107 | 
108 |   back_new_image = np.zeros((img.shape[0], img.shape[1], img.shape[2]), dtype=img.dtype)
109 |   center_face_x = new_image.shape[1] // 2
110 |   center_face_y = new_image.shape[0] // 2
111 | 
112 |   ry = center_y - center_face_y + new_image.shape[0] - ty
113 |   rx = center_x - center_face_x + new_image.shape[1] - tx
114 |   back_new_image[center_y - center_face_y - ty:ry, center_x - center_face_x - tx:rx, :] = new_image
115 |   return back_new_image
116 | 
117 | 
118 | if (__name__ == '__main__'):
119 | 
120 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
121 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
122 |                         help='the config yaml file')
123 | 
124 |   opts, argv = cmd_parser.parse_args()
125 | 
126 |   if (opts.config_path is None):
127 |     logger.error('Please check your parameters.')
128 |     exit(0)
129 | 
130 |   config_path = opts.config_path
131 | 
132 |   if (not os.path.exists(config_path)):
133 |     logger.error('config_path not exists')
134 |     exit(0)
135 | 
136 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
137 |   os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
138 | 
139 |   image_file, audio_file = argv
140 | 
141 |   mkdir('output')
142 |   for file in os.listdir('output'):
143 |     os.system('rm -rf output/{}'.format(file))
144 | 
145 |   batch_size = 1
146 |   ### Generator for inference setting
147 |   infer_generator = DataGenerator(config_path)
148 |   params = infer_generator.params
149 |   params.batch_size = batch_size
150 |   infer_generator.set_params(params)
151 |   wav_loader = WavLoader(sr=infer_generator.sample_rate)
152 |   pcm = wav_loader.get_data(audio_file)
153 |   facemodel = BFM(params.pretrain_dir)
154 | 
155 |   pad_len = int(1 + pcm.shape[0] / infer_generator.frame_wav_scale)
156 |   # calculate the rational length of pcm in order to keep the alignment of mfcc and landmark sequence.
157 |   pcm_length = infer_generator.hop_step * (pad_len * infer_generator.frame_mfcc_scale - 1) + infer_generator.win_length
158 |   if (pcm.shape[0] < pcm_length):
159 |     pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
160 |   pcm_slice = pcm[:pcm_length][np.newaxis, :]
161 | 
162 |   mfcc = infer_generator.extract_mfcc(pcm_slice)
163 |   img_size = 512
164 |   img = cv2.imread(image_file)[:, :512, :]
165 |   img, img_landmarks, img_cropped, lmk_cropped, center_x, center_y, ratio = get_mxnet_sat_alignment(params.pretrain_dir, img)
166 |   bfmcoeff, input_img, transform_params = alignto_bfm_coeff(params.pretrain_dir, img_cropped, lmk_cropped)
167 | 
168 |   img = cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
169 |   face3d_refer = img[:, 512:512*2, :]
170 |   fg_refer = img[:, :512, :] * img[:, 512*2:, :]
171 |   img = img[:, :512, :]
172 | 
173 |   with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
174 |     seq_len = tf.convert_to_tensor([pad_len], dtype=tf.int32)
175 |     ear = np.random.rand(1, pad_len, 1).astype(np.float32)/100
176 |     ear = tf.convert_to_tensor(ear, dtype=tf.float32)
177 | 
178 |     with tf.variable_scope('localization'):
179 |       ### BFMNet setting
180 |       bfmnet = BFMNet(config_path)
181 |       params = bfmnet.params
182 |       params.batch_size = 1
183 |       bfmnet.set_params(params)
184 | 
185 |       bfmnet_nodes = bfmnet.build_inference_op(ear, mfcc, seq_len)
186 | 
187 |     with tf.variable_scope('recognition'):
188 |       ### Vid2VidNet setting
189 |       vid2vidnet = PixFlowNet(config_path)
190 |       params = vid2vidnet.params
191 |       params.batch_size = 1
192 |       vid2vidnet.set_params(params)
193 | 
194 |       inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
195 |       targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
196 |       vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, targets_holder)
197 | 
198 |     variables_to_restore = tf.global_variables()
199 |     loc_varlist = {v.name[13:][:-2]: v 
200 |                             for v in variables_to_restore if v.name[:12]=='localization'}
201 |     rec_varlist = {v.name[12:][:-2]: v 
202 |                             for v in variables_to_restore if v.name[:11]=='recognition'}
203 | 
204 |     loc_saver = tf.train.Saver(var_list=loc_varlist)
205 |     rec_saver = tf.train.Saver(var_list=rec_varlist)
206 | 
207 |     sess.run(tf.global_variables_initializer())
208 |     loc_saver.restore(sess, 'ckpt_bfmnet_new3/bfmnet-40000')
209 |     rec_saver.restore(sess, 'ckpt_pixflow3/pixflownet-50000')
210 | 
211 |     ### Run inference
212 |     bfm_coeff_seq = sess.run(bfmnet_nodes['BFMCoeffDecoder'])
213 |     bfmcoeff = np.tile(bfmcoeff[:, np.newaxis, :], [1, bfm_coeff_seq.shape[1], 1])
214 | 
215 |     bfm_coeff_seq = np.concatenate([bfmcoeff[:, :, :80], bfm_coeff_seq[:, :, :], bfmcoeff[:, :, 144:]], axis=2)
216 |     
217 |     inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
218 |     inputs[0, ..., 0:3] = face3d_refer
219 | 
220 |     for i in range(bfm_coeff_seq.shape[1]):
221 |       face3d = render_face(center_x+random.randint(0, 0), center_y+random.randint(0, 0), ratio, bfm_coeff_seq[0, i:i + 1, ...], img, transform_params, facemodel)
222 |       # cv2.imwrite('output/{}.jpg'.format(i), face3d)
223 |       face3d = cv2.cvtColor(face3d, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
224 | 
225 |       inputs[0, ..., 0:3] = face3d
226 | 
227 |       bg_img = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
228 |       bg_img[0, ..., :3] = cv2.resize(cv2.imread('background/{}.jpg'.format(i+1)), (img_size, img_size)).astype(np.float32)/255.0
229 |       bg_img[0, ..., 3:] = bg_img[0, ..., :3]
230 | 
231 |       # bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB)
232 |       frames = sess.run(vid2vid_nodes['Outputs'], 
233 |         feed_dict={inputs_holder: inputs, targets_holder: bg_img})
234 | 
235 |       cv2.imwrite('output/{}.jpg'.format(i), cv2.cvtColor((frames[0,..., :3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
236 | 
237 |     # image_loader = ImageLoader()
238 |     # for index in range(4, 195):
239 |     #   img = image_loader.get_data(os.path.join('/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05', '{}.jpg'.format(index)))
240 |     #   face3d = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)[:, img_size:img_size*2, :]
241 | 
242 |     #   inputs[0, ..., 3:6] = inputs[0, ..., 6:9]
243 |     #   inputs[0, ..., 6:9] = face3d
244 | 
245 |     #   frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']], 
246 |     #     feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: np.tile(bg_img, (1, 1, 3))[np.newaxis, ...]})
247 |     #   fg_inputs[0, ..., 3:6] = last
248 | 
249 |     #   cv2.imwrite('output/{}.jpg'.format(index), cv2.cvtColor((last[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
250 | 


--------------------------------------------------------------------------------
/voicepuppet/pixflow/infer_pixflow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import subprocess
  9 | from pixflow import PixFlowNet
 10 | from voicepuppet.bfmnet.bfmnet import BFMNet
 11 | from generator.loader import *
 12 | from generator.generator import DataGenerator
 13 | from utils.bfm_load_data import *
 14 | from utils.bfm_visual import *
 15 | from utils.utils import *
 16 | import scipy
 17 | 
 18 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | if (__name__ == '__main__'):
 23 | 
 24 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 25 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 26 |                         help='the config yaml file')
 27 | 
 28 |   opts, argv = cmd_parser.parse_args()
 29 | 
 30 |   if (opts.config_path is None):
 31 |     logger.error('Please check your parameters.')
 32 |     exit(0)
 33 | 
 34 |   config_path = opts.config_path
 35 | 
 36 |   if (not os.path.exists(config_path)):
 37 |     logger.error('config_path not exists')
 38 |     exit(0)
 39 | 
 40 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 41 |   os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
 42 | 
 43 |   mkdir('output')
 44 |   for file in os.listdir('output'):
 45 |     os.system('rm -rf output/{}'.format(file))
 46 | 
 47 |   batch_size = 1
 48 |   img_size = 512
 49 |   image_loader = ImageLoader()
 50 |   root = '/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05'
 51 |   bg_img = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
 52 |   bg_img[0, ..., :3] = cv2.resize(cv2.imread('/home/dong/Downloads/bg.jpg'), (img_size, img_size)).astype(np.float32)/255.0
 53 |   bg_img[0, ..., 3:] = bg_img[0, ..., :3]
 54 | 
 55 |   with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
 56 |     with tf.variable_scope('recognition'):
 57 |       ### Vid2VidNet setting
 58 |       vid2vidnet = PixFlowNet(config_path)
 59 |       params = vid2vidnet.params
 60 |       params.batch_size = 1
 61 |       vid2vidnet.set_params(params)
 62 | 
 63 |       inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
 64 |       targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
 65 |       vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, targets_holder)
 66 | 
 67 |     variables_to_restore = tf.global_variables()
 68 |     rec_varlist = {v.name[12:][:-2]: v 
 69 |                             for v in variables_to_restore if v.name[:11]=='recognition'}
 70 | 
 71 |     rec_saver = tf.train.Saver(var_list=rec_varlist)
 72 | 
 73 |     sess.run(tf.global_variables_initializer())
 74 |     rec_saver.restore(sess, 'ckpt_pixflow3/pixflownet-60000')
 75 | 
 76 |     inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
 77 | 
 78 |     img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(10)))
 79 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 80 |     inputs[0, :, :, 0:3] = img[:, img_size:img_size*2, :]
 81 | 
 82 |     for index in range(195):
 83 |       img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(index)))
 84 |       if (img is not None):
 85 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 86 |         inputs[0, ..., 3:6] = img[:, img_size:img_size*2, :]
 87 | 
 88 |         frames = sess.run(vid2vid_nodes['Outputs'], 
 89 |           feed_dict={inputs_holder: inputs, targets_holder: bg_img})
 90 | 
 91 |         cv2.imwrite('output/_{}.jpg'.format(index), cv2.cvtColor((frames[0,...,3:]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
 92 |         # cv2.imshow('', last[0, ...])
 93 |         # cv2.waitKey(0)
 94 | 
 95 | 
 96 |   #     cv2.imwrite('output/_{}.jpg'.format(i), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
 97 | 
 98 |   # cmd = 'ffmpeg -i output/_%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp2.mp4'
 99 |   # subprocess.call(cmd, shell=True)
100 | 
101 |   # cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp.mp4'
102 |   # subprocess.call(cmd, shell=True)
103 | 


--------------------------------------------------------------------------------
/voicepuppet/pixflow/train_pixflow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | from pixflow import PixFlowNet
  9 | from generator.generator import PixFlowDataGenerator
 10 | from utils.utils import *
 11 | 
 12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | if (__name__ == '__main__'):
 17 | 
 18 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 19 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 20 |                         help='the config yaml file')
 21 | 
 22 |   opts, argv = cmd_parser.parse_args()
 23 | 
 24 |   if (opts.config_path is None):
 25 |     logger.error('Please check your parameters.')
 26 |     exit(0)
 27 | 
 28 |   config_path = opts.config_path
 29 | 
 30 |   if (not os.path.exists(config_path)):
 31 |     logger.error('config_path not exists')
 32 |     exit(0)
 33 | 
 34 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 35 | 
 36 |   batch_size = 3
 37 |   ### Generator for training setting
 38 |   train_generator = PixFlowDataGenerator(config_path)
 39 |   params = train_generator.params
 40 |   params.dataset_path = params.train_dataset_path
 41 |   params.batch_size = batch_size
 42 |   train_generator.set_params(params)
 43 |   train_dataset = train_generator.get_dataset()
 44 | 
 45 |   config = tf.ConfigProto()
 46 |   config.gpu_options.allow_growth = True
 47 |   sess = tf.Session(config=config)
 48 |   tf.train.start_queue_runners(sess=sess)
 49 | 
 50 |   train_iter = train_dataset.make_one_shot_iterator()
 51 | 
 52 |   # inputs, fg_inputs, targets, masks = sess.run(train_iter.get_next())
 53 |   # inp1 = cv2.cvtColor((inputs[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 54 |   # inp2 = cv2.cvtColor((inputs[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 55 |   # fg_inputs1 = cv2.cvtColor((fg_inputs[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 56 |   # fg_inputs2 = cv2.cvtColor((fg_inputs[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 57 |   # targets1 = cv2.cvtColor((targets[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 58 |   # targets2 = cv2.cvtColor((targets[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 59 |   # masks1 = cv2.cvtColor((masks[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 60 |   # masks2 = cv2.cvtColor((masks[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 61 | 
 62 |   # cv2.imwrite('to/inp1.jpg', inp1)
 63 |   # cv2.imwrite('to/inp2.jpg', inp2)
 64 |   # cv2.imwrite('to/fg_inputs1.jpg', fg_inputs1)
 65 |   # cv2.imwrite('to/fg_inputs2.jpg', fg_inputs2)
 66 |   # cv2.imwrite('to/targets1.jpg', targets1)
 67 |   # cv2.imwrite('to/targets2.jpg', targets2)
 68 |   # cv2.imwrite('to/masks1.jpg', masks1)
 69 |   # cv2.imwrite('to/masks2.jpg', masks2)
 70 |   # sys.exit(0)
 71 | 
 72 | 
 73 |   ### Vid2VidNet setting
 74 |   vid2vidnet = PixFlowNet(config_path)
 75 |   params = vid2vidnet.params
 76 |   epochs = params.training['epochs']
 77 |   params.add_hparam('max_to_keep', 2)
 78 |   params.add_hparam('save_dir', 'ckpt_pixflow')
 79 |   params.add_hparam('save_name', 'pixflownet')
 80 |   params.add_hparam('save_step', 5000)
 81 |   params.add_hparam('summary_step', 100)
 82 |   params.add_hparam('summary_dir', 'log/summary_pixflow')
 83 |   params.batch_size = batch_size
 84 |   vid2vidnet.set_params(params)
 85 | 
 86 |   mkdir(params.save_dir)
 87 |   mkdir(params.summary_dir)
 88 | 
 89 |   train_nodes = vid2vidnet.build_train_op(*train_iter.get_next())
 90 |   sess.run(tf.global_variables_initializer())
 91 | 
 92 |   # Restore from save_dir
 93 |   if ('checkpoint' in os.listdir(params.save_dir)):
 94 |     tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir))
 95 | 
 96 |   tf.summary.scalar("discriminator_loss", train_nodes['Discrim_loss'])
 97 |   tf.summary.scalar("generator_loss_GAN", train_nodes['Gen_loss_GAN'])
 98 |   tf.summary.scalar("generator_loss_L1", train_nodes['Gen_loss_L1'])
 99 | 
100 |   with tf.name_scope("inputs_summary"):
101 |     tf.summary.image("inputs", tf.image.convert_image_dtype(train_nodes['Inputs'][... ,3:6], dtype=tf.uint8))
102 | 
103 |   with tf.name_scope("targets_summary"):
104 |     tf.summary.image("targets", tf.image.convert_image_dtype(train_nodes['FG_Inputs'][... ,3:6], dtype=tf.uint8))
105 | 
106 |   with tf.name_scope("outputs_summary"):
107 |     tf.summary.image("outputs", tf.image.convert_image_dtype(train_nodes['Outputs'], dtype=tf.uint8))
108 | 
109 |   with tf.name_scope("alpha_summary"):
110 |     tf.summary.image("alphas", tf.image.convert_image_dtype(train_nodes['Alphas'], dtype=tf.uint8))
111 | 
112 |   # Add histograms for gradients.
113 |   for grad, var in train_nodes['Discrim_grads_and_vars'] + train_nodes['Gen_grads_and_vars']:
114 |     if(grad is not None):
115 |       tf.summary.histogram(var.op.name, grad)
116 | 
117 |   merge_summary_op = tf.summary.merge_all()
118 |   summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
119 | 
120 |   for i in range(epochs):
121 |     ### Run training
122 |     result = sess.run([train_nodes['Train_op'],
123 |                        merge_summary_op,
124 |                        train_nodes['Gen_loss_GAN'],
125 |                        train_nodes['Gen_loss_L1'],
126 |                        train_nodes['Discrim_loss'],
127 |                        train_nodes['Lr'],
128 |                        train_nodes['Global_step']])
129 |     _, summary, gen_loss_GAN, gen_loss_L1, discrim_loss, lr, global_step = result
130 |     if(global_step % params.summary_step==0):
131 |       print('Step {}, Lr= {:.2e}: \n\tgen_loss_GAN= {:.3f}, \n\tgen_loss_L1= {:.3f}, \n\tdiscrim_loss= {:.3f}'.format(global_step, lr, gen_loss_GAN, gen_loss_L1, discrim_loss))
132 |       summary_writer.add_summary(summary, global_step)
133 | 
134 |     ### Save checkpoint
135 |     if (global_step % params.save_step == 0):
136 |       tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
137 |                                                                                           os.path.join(params.save_dir,
138 |                                                                                                        params.save_name),
139 |                                                                                           global_step=global_step)
140 | 


--------------------------------------------------------------------------------
/voicepuppet/pixrefer/BUILD:
--------------------------------------------------------------------------------
 1 | package(
 2 |     default_visibility = [
 3 |         "//visibility:public",
 4 |     ],
 5 |     features = ["-layering_check"],
 6 | )
 7 | 
 8 | py_library(
 9 |     name = "vgg_simple",
10 |     srcs = ["vgg_simple.py"],
11 |     deps = [
12 |       "//config:configure",
13 |       "//voicepuppet:builder"
14 |     ],
15 | )
16 | 
17 | py_library(
18 |     name = "pixrefer",
19 |     srcs = ["pixrefer.py"],
20 |     deps = [
21 |       "//config:configure",
22 |       "//voicepuppet:builder",
23 |       ":vgg_simple"
24 |     ],
25 | )
26 | 
27 | py_binary(
28 |     name = "train_pixrefer",
29 |     srcs = ["train_pixrefer.py"],
30 |     deps = [
31 |       "//utils:utils",
32 |       ":pixrefer",
33 |       "//generator:generator",
34 |       "//generator:loader"
35 |     ],
36 | )
37 | 
38 | py_binary(
39 |     name = "infer_pixrefer",
40 |     srcs = ["infer_pixrefer.py"],
41 |     deps = [
42 |       "//utils:bfm_load_data",
43 |       "//utils:reconstruct_mesh",
44 |       "//utils:bfm_visual",
45 |       "//utils:utils",
46 |       ":pixrefer",
47 |       "//voicepuppet/bfmnet:bfmnet",
48 |       "//generator:generator",
49 |       "//generator:loader"
50 |     ],
51 | )
52 | 
53 | py_binary(
54 |     name = "infer_bfmvid",
55 |     srcs = ["infer_bfmvid.py"],
56 |     deps = [
57 |       "//utils:bfm_load_data",
58 |       "//utils:reconstruct_mesh",
59 |       "//utils:bfm_visual",
60 |       "//utils:utils",
61 |       ":pixrefer",
62 |       "//voicepuppet/bfmnet:bfmnet",
63 |       "//generator:generator",
64 |       "//generator:loader"
65 |     ],
66 | )
67 | 


--------------------------------------------------------------------------------
/voicepuppet/pixrefer/infer_bfmvid.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import subprocess
  9 | import scipy
 10 | import random
 11 | import sys
 12 | 
 13 | sys.path.append(os.path.join(os.getcwd(), 'utils'))
 14 | 
 15 | from pixrefer import PixReferNet
 16 | from voicepuppet.bfmnet.bfmnet import BFMNet
 17 | from generator.loader import *
 18 | from generator.generator import DataGenerator
 19 | from bfm_load_data import *
 20 | from bfm_visual import *
 21 | from utils import *
 22 | 
 23 | bfmcoeff_loader = BFMCoeffLoader()
 24 | # vid_bfmcoeff = bfmcoeff_loader.get_data('/media/dong/DiskData/gridcorpus/todir/bilibili/4_16/bfmcoeff.txt')
 25 | 
 26 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | def alignto_bfm_coeff(model_dir, img, xys):
 30 |   from PIL import Image
 31 |   import tensorflow as tf
 32 | 
 33 |   def load_graph(graph_filename):
 34 |     with tf.gfile.GFile(graph_filename, 'rb') as f:
 35 |       graph_def = tf.GraphDef()
 36 |       graph_def.ParseFromString(f.read())
 37 | 
 38 |     return graph_def
 39 | 
 40 |   # read standard landmarks for preprocessing images
 41 |   lm3D = load_lm3d(model_dir)
 42 | 
 43 |   # build reconstruction model
 44 |   with tf.Graph().as_default() as graph, tf.device('/cpu:0'):
 45 |     images = tf.placeholder(name='input_imgs', shape=[None, 224, 224, 3], dtype=tf.float32)
 46 |     graph_def = load_graph(os.path.join(model_dir, "FaceReconModel.pb"))
 47 |     tf.import_graph_def(graph_def, name='resnet', input_map={'input_imgs:0': images})
 48 | 
 49 |     # output coefficients of R-Net (dim = 257) 
 50 |     coeff = graph.get_tensor_by_name('resnet/coeff:0')
 51 | 
 52 |     with tf.Session() as sess:
 53 |       ps = list(map(lambda x: int(x), xys))
 54 | 
 55 |       left_eye_x = int(round((ps[72] + ps[74] + ps[76] + ps[78] + ps[80] + ps[82]) / 6))
 56 |       left_eye_y = int(round((ps[73] + ps[75] + ps[77] + ps[79] + ps[81] + ps[83]) / 6))
 57 |       right_eye_x = int(round((ps[84] + ps[86] + ps[88] + ps[90] + ps[92] + ps[94]) / 6))
 58 |       right_eye_y = int(round((ps[85] + ps[87] + ps[89] + ps[91] + ps[93] + ps[95]) / 6))
 59 |       nose_x = int(round(ps[60]))
 60 |       nose_y = int(round(ps[61]))
 61 |       left_mouse_x = int(round(ps[96]))
 62 |       left_mouse_y = int(round(ps[97]))
 63 |       right_mouse_x = int(round(ps[108]))
 64 |       right_mouse_y = int(round(ps[109]))
 65 | 
 66 |       lmk5 = np.array(
 67 |           [[left_eye_x, left_eye_y], [right_eye_x, right_eye_y], [nose_x, nose_y], [left_mouse_x, left_mouse_y],
 68 |            [right_mouse_x, right_mouse_y]])
 69 | 
 70 |       image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 71 |       # preprocess input image
 72 |       input_img, lm_new, transform_params = Preprocess(image, lmk5, lm3D)
 73 |       bfmcoeff = sess.run(coeff, feed_dict={images: input_img})
 74 |       return bfmcoeff, input_img, transform_params
 75 | 
 76 | angles = np.array([[0, 0, 0]], dtype=np.float32)
 77 | shift = 0.005
 78 | 
 79 | def render_face(center_x, center_y, ratio, bfmcoeff, img, transform_params, facemodel):
 80 |   ratio *= transform_params[2]
 81 |   tx = -int((transform_params[3] / ratio))
 82 |   ty = -int((transform_params[4] / ratio))
 83 |   global angles, shift
 84 | 
 85 |   angles[0][0] += shift
 86 |   angles[0][1] += shift
 87 |   angles[0][2] += shift
 88 |   if (angles[0][1] > 0.03 or angles[0][1] < -0.03):
 89 |     shift = -shift
 90 | 
 91 |   face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d = Reconstruction_rotation(
 92 |     bfmcoeff, facemodel, angles)
 93 |   face_projection2 = np.concatenate([face_projection, z_buffer], axis=2)
 94 |   face_projection = np.squeeze(face_projection2, (0))
 95 | 
 96 |   shape = np.squeeze(face_projection2, (0))
 97 |   color = np.squeeze(face_color, (0))
 98 |   color = np.clip(color, 0, 255).astype(np.int32)
 99 | 
100 |   new_image = np.zeros((224 * 224 * 3), dtype=np.uint8)
101 |   face_mask = np.zeros((224 * 224), dtype=np.uint8)
102 | 
103 |   vertices = shape.reshape(-1).astype(np.float32).copy()
104 |   triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
105 |   colors = color.reshape(-1).astype(np.float32).copy()
106 |   depth_buffer = (np.zeros((224 * 224)) - 99999.0).astype(np.float32)
107 |   mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
108 |                                       facemodel.tri.shape[0], 224, 224, 3)
109 |   new_image = new_image.reshape([224, 224, 3])
110 | 
111 |   new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
112 |   new_image = cv2.resize(new_image, (
113 |   int(round(new_image.shape[0] / ratio)), int(round(new_image.shape[1] / ratio))))
114 | 
115 |   back_new_image = np.zeros((img.shape[0], img.shape[1], img.shape[2]), dtype=img.dtype)
116 |   center_face_x = new_image.shape[1] // 2
117 |   center_face_y = new_image.shape[0] // 2
118 | 
119 |   ry = center_y - center_face_y + new_image.shape[0] - ty
120 |   rx = center_x - center_face_x + new_image.shape[1] - tx
121 |   back_new_image[center_y - center_face_y - ty:ry, center_x - center_face_x - tx:rx, :] = new_image
122 |   return back_new_image
123 | 
124 | 
125 | if (__name__ == '__main__'):
126 | 
127 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
128 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
129 |                         help='the config yaml file')
130 | 
131 |   opts, argv = cmd_parser.parse_args()
132 | 
133 |   if (opts.config_path is None):
134 |     logger.error('Please check your parameters.')
135 |     exit(0)
136 | 
137 |   config_path = opts.config_path
138 | 
139 |   if (not os.path.exists(config_path)):
140 |     logger.error('config_path not exists')
141 |     exit(0)
142 | 
143 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
144 |   os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
145 | 
146 |   image_file, audio_file = argv
147 | 
148 |   mkdir('output')
149 |   for file in os.listdir('output'):
150 |     os.system('rm -rf output/{}'.format(file))
151 | 
152 |   batch_size = 1
153 |   ### Generator for inference setting
154 |   infer_generator = DataGenerator(config_path)
155 |   params = infer_generator.params
156 |   params.batch_size = batch_size
157 |   infer_generator.set_params(params)
158 |   wav_loader = WavLoader(sr=infer_generator.sample_rate)
159 |   pcm = wav_loader.get_data(audio_file)
160 |   facemodel = BFM(params.model_dir)
161 | 
162 |   pad_len = int(1 + pcm.shape[0] / infer_generator.frame_wav_scale)
163 |   # calculate the rational length of pcm in order to keep the alignment of mfcc and landmark sequence.
164 |   pcm_length = infer_generator.hop_step * (pad_len * infer_generator.frame_mfcc_scale - 1) + infer_generator.win_length
165 |   if (pcm.shape[0] < pcm_length):
166 |     pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
167 |   pcm_slice = pcm[:pcm_length][np.newaxis, :]
168 | 
169 |   mfcc = infer_generator.extract_mfcc(pcm_slice)
170 |   img_size = 512
171 |   img = cv2.imread(image_file)[:, :512, :]
172 |   img, img_landmarks, img_cropped, lmk_cropped, center_x, center_y, ratio = get_mxnet_sat_alignment(params.model_dir, img)
173 |   bfmcoeff, input_img, transform_params = alignto_bfm_coeff(params.model_dir, img_cropped, lmk_cropped)
174 | 
175 |   img = cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
176 |   face3d_refer = img[:, 512:512*2, :]
177 |   fg_refer = img[:, :512, :] * img[:, 512*2:, :]
178 |   img = img[:, :512, :]
179 | 
180 |   with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
181 |     seq_len = tf.convert_to_tensor([pad_len], dtype=tf.int32)
182 |     ear = np.random.rand(1, pad_len, 1).astype(np.float32)/100
183 |     ear = tf.convert_to_tensor(ear, dtype=tf.float32)
184 | 
185 |     with tf.variable_scope('bfm_scope'):
186 |       ### BFMNet setting
187 |       bfmnet = BFMNet(config_path)
188 |       params = bfmnet.params
189 |       params.batch_size = 1
190 |       bfmnet.set_params(params)
191 | 
192 |       bfmnet_nodes = bfmnet.build_inference_op(ear, mfcc, seq_len)
193 | 
194 |     with tf.variable_scope('vid_scope'):
195 |       ### Vid2VidNet setting
196 |       vid2vidnet = PixReferNet(config_path)
197 |       params = vid2vidnet.params
198 |       params.batch_size = 1
199 |       params.add_hparam('is_training', False)
200 |       vid2vidnet.set_params(params)
201 | 
202 |       inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
203 |       fg_inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
204 |       targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
205 |       vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, fg_inputs_holder, targets_holder)
206 | 
207 |     variables_to_restore = tf.global_variables()
208 |     bfm_varlist = {v.name[len('bfm_scope')+1:][:-2]: v 
209 |                             for v in variables_to_restore if v.name[:len('bfm_scope')]=='bfm_scope'}
210 |     vid_varlist = {v.name[len('vid_scope')+1:][:-2]: v 
211 |                             for v in variables_to_restore if v.name[:len('vid_scope')]=='vid_scope'}
212 | 
213 |     bfm_saver = tf.train.Saver(var_list=bfm_varlist)
214 |     vid_saver = tf.train.Saver(var_list=vid_varlist)
215 | 
216 |     sess.run(tf.global_variables_initializer())
217 |     bfm_saver.restore(sess, 'ckpt_bfmnet/bfmnet-65000')
218 |     vid_saver.restore(sess, 'ckpt_pixrefer/pixrefernet-20000')
219 | 
220 |     # ### Run inference
221 |     bfm_coeff_seq = sess.run(bfmnet_nodes['BFMCoeffDecoder'])
222 |     # bfm_coeff_seq = vid_bfmcoeff[np.newaxis, :, 80:144]
223 |     bfmcoeff = np.tile(bfmcoeff[:, np.newaxis, :], [1, bfm_coeff_seq.shape[1], 1])
224 |     bfm_coeff_seq = np.concatenate([bfmcoeff[:, :, :80], bfm_coeff_seq, bfmcoeff[:, :, 144:]], axis=2)
225 |     
226 |     inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
227 |     fg_inputs = np.zeros([1, img_size, img_size, 3], dtype=np.float32)
228 |     inputs[0, ..., 0:3] = face3d_refer
229 |     fg_inputs[0, ..., 0:3] = fg_refer
230 | 
231 |     for i in range(bfm_coeff_seq.shape[1]):#
232 |       face3d = render_face(center_x+random.randint(-0, 0), center_y+random.randint(-0, 0), ratio, bfm_coeff_seq[0, i:i + 1, ...], img, transform_params, facemodel)
233 |       # cv2.imwrite('output/{}.jpg'.format(i), face3d)
234 |       face3d = cv2.cvtColor(face3d, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
235 | 
236 |       inputs[0, ..., 3:6] = face3d
237 | 
238 |       bg_img = cv2.resize(cv2.imread('background/{}.jpg'.format(i%100+1)), (img_size, img_size)).astype(np.float32)/255.0
239 |       bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB)
240 |       frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']], 
241 |         feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: bg_img[np.newaxis, ...]})
242 | 
243 |       cv2.imwrite('output/{}.jpg'.format(i), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
244 | 
245 |     cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y output.mp4'
246 |     subprocess.call(cmd, shell=True)
247 | 
248 |     # image_loader = ImageLoader()
249 |     # for index in range(4, 195):
250 |     #   img = image_loader.get_data(os.path.join('/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05', '{}.jpg'.format(index)))
251 |     #   face3d = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)[:, img_size:img_size*2, :]
252 | 
253 |     #   inputs[0, ..., 3:6] = inputs[0, ..., 6:9]
254 |     #   inputs[0, ..., 6:9] = face3d
255 | 
256 |     #   frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']], 
257 |     #     feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: np.tile(bg_img, (1, 1, 3))[np.newaxis, ...]})
258 |     #   fg_inputs[0, ..., 3:6] = last
259 | 
260 |     #   cv2.imwrite('output/{}.jpg'.format(index), cv2.cvtColor((last[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
261 | 


--------------------------------------------------------------------------------
/voicepuppet/pixrefer/infer_pixrefer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | import subprocess
  9 | from pixrefer import PixReferNet
 10 | from voicepuppet.bfmnet.bfmnet import BFMNet
 11 | from generator.loader import *
 12 | from generator.generator import DataGenerator
 13 | from utils.bfm_load_data import *
 14 | from utils.bfm_visual import *
 15 | from utils.utils import *
 16 | import scipy
 17 | 
 18 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | if (__name__ == '__main__'):
 23 | 
 24 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 25 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 26 |                         help='the config yaml file')
 27 | 
 28 |   opts, argv = cmd_parser.parse_args()
 29 | 
 30 |   if (opts.config_path is None):
 31 |     logger.error('Please check your parameters.')
 32 |     exit(0)
 33 | 
 34 |   config_path = opts.config_path
 35 | 
 36 |   if (not os.path.exists(config_path)):
 37 |     logger.error('config_path not exists')
 38 |     exit(0)
 39 | 
 40 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 41 |   os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
 42 | 
 43 |   mkdir('output')
 44 |   for file in os.listdir('output'):
 45 |     os.system('rm -rf output/{}'.format(file))
 46 | 
 47 |   batch_size = 1
 48 |   img_size = 512
 49 |   image_loader = ImageLoader()
 50 |   root = '/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05'
 51 |   bg_img = cv2.resize(cv2.imread('/home/dong/Downloads/bg.jpg'), (img_size, img_size)).astype(np.float32)/255.0
 52 | 
 53 |   with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
 54 |     with tf.variable_scope('recognition'):
 55 |       ### Vid2VidNet setting
 56 |       vid2vidnet = PixReferNet(config_path)
 57 |       params = vid2vidnet.params
 58 |       params.batch_size = 1
 59 |       params.add_hparam('is_training', False)
 60 |       vid2vidnet.set_params(params)
 61 | 
 62 |       inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
 63 |       fg_inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
 64 |       targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
 65 |       vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, fg_inputs_holder, targets_holder)
 66 | 
 67 |     variables_to_restore = tf.global_variables()
 68 |     rec_varlist = {v.name[12:][:-2]: v 
 69 |                             for v in variables_to_restore if v.name[:11]=='recognition'}
 70 | 
 71 |     rec_saver = tf.train.Saver(var_list=rec_varlist)
 72 | 
 73 |     sess.run(tf.global_variables_initializer())
 74 |     rec_saver.restore(sess, 'ckpt_pixrefer/pixrefernet-20000')
 75 | 
 76 |     inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
 77 |     fg_inputs = np.zeros([1, img_size, img_size, 3], dtype=np.float32)
 78 | 
 79 |     img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(0)))
 80 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 81 |     inputs[0, :, :, 0:3] = img[:, img_size:img_size*2, :]
 82 |     fg_inputs[0, :, :, 0:3] = img[:, :img_size, :] * img[:, img_size*2:, :]
 83 | 
 84 |     for index in range(4, 195):
 85 |       img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(index)))
 86 |       if (img is not None):
 87 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 88 |         inputs[0, ..., 3:6] = img[:, img_size:img_size*2, :]
 89 | 
 90 |         frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']], 
 91 |           feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: bg_img[np.newaxis, ...]})
 92 | 
 93 |         cv2.imwrite('output/_{}.jpg'.format(index), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
 94 |         # cv2.imshow('', last[0, ...])
 95 |         # cv2.waitKey(0)
 96 | 
 97 | 
 98 |   #     cv2.imwrite('output/_{}.jpg'.format(i), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
 99 | 
100 |   # cmd = 'ffmpeg -i output/_%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp2.mp4'
101 |   # subprocess.call(cmd, shell=True)
102 | 
103 |   # cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp.mp4'
104 |   # subprocess.call(cmd, shell=True)
105 | 


--------------------------------------------------------------------------------
/voicepuppet/pixrefer/train_pixrefer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import os
  6 | from optparse import OptionParser
  7 | import logging
  8 | from pixrefer import PixReferNet
  9 | from generator.generator import PixReferDataGenerator
 10 | from utils.utils import *
 11 | 
 12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | if (__name__ == '__main__'):
 17 | 
 18 |   cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
 19 |   cmd_parser.add_option('--config_path', type="string", dest="config_path",
 20 |                         help='the config yaml file')
 21 | 
 22 |   opts, argv = cmd_parser.parse_args()
 23 | 
 24 |   if (opts.config_path is None):
 25 |     logger.error('Please check your parameters.')
 26 |     exit(0)
 27 | 
 28 |   config_path = opts.config_path
 29 | 
 30 |   if (not os.path.exists(config_path)):
 31 |     logger.error('config_path not exists')
 32 |     exit(0)
 33 | 
 34 |   os.environ["CUDA_VISIBLE_DEVICES"] = '0'
 35 | 
 36 |   batch_size = 2
 37 |   ### Generator for training setting
 38 |   train_generator = PixReferDataGenerator(config_path)
 39 |   params = train_generator.params
 40 |   params.dataset_path = params.train_dataset_path
 41 |   params.batch_size = batch_size
 42 |   train_generator.set_params(params)
 43 |   train_dataset = train_generator.get_dataset()
 44 | 
 45 |   config = tf.ConfigProto()
 46 |   config.gpu_options.allow_growth = True
 47 |   sess = tf.Session(config=config)
 48 |   tf.train.start_queue_runners(sess=sess)
 49 | 
 50 |   train_iter = train_dataset.make_one_shot_iterator()
 51 | 
 52 |   # inputs, fg_inputs, targets, masks = sess.run(train_iter.get_next())
 53 |   # inp1 = cv2.cvtColor((inputs[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 54 |   # inp2 = cv2.cvtColor((inputs[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 55 |   # fg1 = cv2.cvtColor((fg_inputs[0, ...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 56 |   # targets1 = cv2.cvtColor((targets[0, ...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 57 |   # masks1 = cv2.cvtColor((masks[0, ...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
 58 | 
 59 |   # cv2.imwrite('to/inp1.jpg', inp1)
 60 |   # cv2.imwrite('to/inp2.jpg', inp2)
 61 |   # cv2.imwrite('to/fg1.jpg', fg1)
 62 |   # cv2.imwrite('to/targets1.jpg', targets1)
 63 |   # cv2.imwrite('to/masks1.jpg', masks1)
 64 |   # sys.exit(0)
 65 | 
 66 | 
 67 |   ### Vid2VidNet setting
 68 |   vid2vidnet = PixReferNet(config_path)
 69 |   params = vid2vidnet.params
 70 |   epochs = params.training['epochs']
 71 |   params.add_hparam('max_to_keep', 2)
 72 |   params.add_hparam('save_dir', 'ckpt_pixrefer')
 73 |   params.add_hparam('save_name', 'pixrefernet')
 74 |   params.add_hparam('save_step', 5000)
 75 |   params.add_hparam('summary_step', 100)
 76 |   params.add_hparam('summary_dir', 'log/summary_pixrefer')
 77 |   params.batch_size = batch_size
 78 |   params.add_hparam('is_training', True)
 79 |   params.sess = sess
 80 |   params.vgg_model_path = os.path.join(params.model_dir, 'vgg_16.ckpt')
 81 |   vid2vidnet.set_params(params)
 82 | 
 83 |   mkdir(params.save_dir)
 84 |   mkdir(params.summary_dir)
 85 | 
 86 |   train_nodes = vid2vidnet.build_train_op(*train_iter.get_next())
 87 |   # sess.run(tf.global_variables_initializer())
 88 | 
 89 |   all_var = tf.global_variables()
 90 |   init_var = [v for v in all_var if 'vgg_16' not in v.name]
 91 |   init = tf.variables_initializer(var_list=init_var)
 92 |   sess.run(init)
 93 | 
 94 |   # # Restore from save_dir
 95 |   # if ('checkpoint' in os.listdir(params.save_dir)):
 96 |   #   variables_to_restore = tf.trainable_variables()
 97 |   #   varlist = {v.name[:-2]: v for v in variables_to_restore if v.name[:6]!='vgg_16'}
 98 |   #   print(varlist)
 99 |   #   tf.train.Saver(varlist).restore(sess, tf.train.latest_checkpoint(params.save_dir))
100 | 
101 |   tf.summary.scalar("discriminator_loss", train_nodes['Discrim_loss'])
102 |   tf.summary.scalar("generator_loss_GAN", train_nodes['Gen_loss_GAN'])
103 |   tf.summary.scalar("generator_loss_L1", train_nodes['Gen_loss_L1'])
104 | 
105 |   with tf.name_scope("inputs1_summary"):
106 |     tf.summary.image("inputs1", tf.image.convert_image_dtype(train_nodes['Inputs'][... ,3:6], dtype=tf.uint8))
107 | 
108 |   with tf.name_scope("targets_summary"):
109 |     tf.summary.image("targets", tf.image.convert_image_dtype(train_nodes['Targets'], dtype=tf.uint8))
110 | 
111 |   with tf.name_scope("outputs_summary"):
112 |     tf.summary.image("outputs", tf.image.convert_image_dtype(train_nodes['Outputs'], dtype=tf.uint8))
113 | 
114 |   with tf.name_scope("alpha_summary"):
115 |     tf.summary.image("alphas", tf.image.convert_image_dtype(train_nodes['Alphas'], dtype=tf.uint8))
116 | 
117 |   with tf.name_scope("inputs0_summary"):
118 |     tf.summary.image("inputs0", tf.image.convert_image_dtype(train_nodes['Inputs'][:,:,:,:3], dtype=tf.uint8))
119 | 
120 |   # with tf.name_scope("fg_inputs0_summary"):
121 |   #   tf.summary.image("fg_inputs0", tf.image.convert_image_dtype(train_nodes['FGInputs'], dtype=tf.uint8))
122 | 
123 |   # with tf.name_scope("inputs_fg_summary"):
124 |   #   tf.summary.image("inputs_fg", tf.image.convert_image_dtype(train_nodes['Inputs'][:,:,:,:3], dtype=tf.uint8))
125 | 
126 |   # # Add histograms for gradients.
127 |   # for grad, var in train_nodes['Discrim_grads_and_vars'] + train_nodes['Gen_grads_and_vars']:
128 |   #   if(grad is not None):
129 |   #     tf.summary.histogram(var.op.name + "/gradients", grad)
130 | 
131 |   merge_summary_op = tf.summary.merge_all()
132 |   summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
133 | 
134 |   for i in range(epochs):
135 |     ### Run training
136 |     result = sess.run([train_nodes['Train_op'],
137 |                        merge_summary_op,
138 |                        train_nodes['Gen_loss_GAN'],
139 |                        train_nodes['Gen_loss_L1'],
140 |                        train_nodes['Discrim_loss'],
141 |                        train_nodes['Lr'],
142 |                        train_nodes['Global_step']])
143 |     _, summary, gen_loss_GAN, gen_loss_L1, discrim_loss, lr, global_step = result
144 |     if(global_step % params.summary_step==0):
145 |       print('Step {}, Lr= {:.2e}: \n\tgen_loss_GAN= {:.3f}, \n\tgen_loss_L1= {:.3f}, \n\tdiscrim_loss= {:.3f}'.format(global_step, lr, gen_loss_GAN, gen_loss_L1, discrim_loss))
146 |       summary_writer.add_summary(summary, global_step)
147 | 
148 |     ### Save checkpoint
149 |     if (global_step % params.save_step == 0):
150 |       tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
151 |                                                                                           os.path.join(params.save_dir,
152 |                                                                                                        params.save_name),
153 |                                                                                           global_step=global_step)
154 | 


--------------------------------------------------------------------------------
/voicepuppet/pixrefer/vgg_simple.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | slim = tf.contrib.slim
  4 | 
  5 | 
  6 | def vgg_arg_scope(weight_decay=0.0005):
  7 |   """Defines the VGG arg scope.
  8 |   Args:
  9 |     weight_decay: The l2 regularization coefficient.
 10 |   Returns:
 11 |     An arg_scope.
 12 |   """
 13 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 14 |                       activation_fn=tf.nn.relu,
 15 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
 16 |                       biases_initializer=tf.zeros_initializer()):
 17 |     with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
 18 |       return arg_sc
 19 | 
 20 | 
 21 | def vgg_a(inputs,
 22 |           num_classes=1000,
 23 |           is_training=True,
 24 |           dropout_keep_prob=0.5,
 25 |           spatial_squeeze=True,
 26 |           scope='vgg_a',
 27 |           fc_conv_padding='VALID',
 28 |           global_pool=False):
 29 |   """Oxford Net VGG 11-Layers version A Example.
 30 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 31 |         To use in classification mode, resize input to 224x224.
 32 |   Args:
 33 |     inputs: a tensor of size [batch_size, height, width, channels].
 34 |     num_classes: number of predicted classes. If 0 or None, the logits layer is
 35 |       omitted and the input features to the logits layer are returned instead.
 36 |     is_training: whether or not the model is being trained.
 37 |     dropout_keep_prob: the probability that activations are kept in the dropout
 38 |       layers during training.
 39 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 40 |       outputs. Useful to remove unnecessary dimensions for classification.
 41 |     scope: Optional scope for the variables.
 42 |     fc_conv_padding: the type of padding to use for the fully connected layer
 43 |       that is implemented as a convolutional layer. Use 'SAME' padding if you
 44 |       are applying the network in a fully convolutional manner and want to
 45 |       get a prediction map downsampled by a factor of 32 as an output.
 46 |       Otherwise, the output prediction map will be (input / 32) - 6 in case of
 47 |       'VALID' padding.
 48 |     global_pool: Optional boolean flag. If True, the input to the classification
 49 |       layer is avgpooled to size 1x1, for any input size. (This is not part
 50 |       of the original VGG architecture.)
 51 |   Returns:
 52 |     net: the output of the logits layer (if num_classes is a non-zero integer),
 53 |       or the input to the logits layer (if num_classes is 0 or None).
 54 |     end_points: a dict of tensors with intermediate activations.
 55 |   """
 56 |   with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
 57 |     end_points_collection = sc.original_name_scope + '_end_points'
 58 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
 59 |     with slim.arg_scope([slim.conv2d, slim.max_pool2d],
 60 |                         outputs_collections=end_points_collection):
 61 |       net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
 62 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
 63 |       net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
 64 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
 65 |       net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
 66 |       net = slim.max_pool2d(net, [2, 2], scope='pool3')
 67 |       net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
 68 |       net = slim.max_pool2d(net, [2, 2], scope='pool4')
 69 |       net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
 70 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
 71 | 
 72 |       # Use conv2d instead of fully_connected layers.
 73 |       net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
 74 |       net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
 75 |                          scope='dropout6')
 76 |       net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
 77 |       # Convert end_points_collection into a end_point dict.
 78 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
 79 |       if global_pool:
 80 |         net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
 81 |         end_points['global_pool'] = net
 82 |       if num_classes:
 83 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
 84 |                            scope='dropout7')
 85 |         net = slim.conv2d(net, num_classes, [1, 1],
 86 |                           activation_fn=None,
 87 |                           normalizer_fn=None,
 88 |                           scope='fc8')
 89 |         if spatial_squeeze:
 90 |           net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
 91 |         end_points[sc.name + '/fc8'] = net
 92 |       return net, end_points
 93 | vgg_a.default_image_size = 224
 94 | 
 95 | 
 96 | def vgg_16(inputs,
 97 |            num_classes=1000,
 98 |            is_training=False,
 99 |            dropout_keep_prob=0.5,
100 |            spatial_squeeze=True,
101 |            scope='vgg_16',
102 |            fc_conv_padding='VALID',
103 |            global_pool=False,
104 |            reuse=False):
105 |   """Oxford Net VGG 16-Layers version D Example.
106 |   Note: All the fully_connected layers have been transformed to conv2d layers.
107 |         To use in classification mode, resize input to 224x224.
108 |   Args:
109 |     inputs: a tensor of size [batch_size, height, width, channels].
110 |     num_classes: number of predicted classes. If 0 or None, the logits layer is
111 |       omitted and the input features to the logits layer are returned instead.
112 |     is_training: whether or not the model is being trained.
113 |     dropout_keep_prob: the probability that activations are kept in the dropout
114 |       layers during training.
115 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
116 |       outputs. Useful to remove unnecessary dimensions for classification.
117 |     scope: Optional scope for the variables.
118 |     fc_conv_padding: the type of padding to use for the fully connected layer
119 |       that is implemented as a convolutional layer. Use 'SAME' padding if you
120 |       are applying the network in a fully convolutional manner and want to
121 |       get a prediction map downsampled by a factor of 32 as an output.
122 |       Otherwise, the output prediction map will be (input / 32) - 6 in case of
123 |       'VALID' padding.
124 |     global_pool: Optional boolean flag. If True, the input to the classification
125 |       layer is avgpooled to size 1x1, for any input size. (This is not part
126 |       of the original VGG architecture.)
127 |   Returns:
128 |     net: the output of the logits layer (if num_classes is a non-zero integer),
129 |       or the input to the logits layer (if num_classes is 0 or None).
130 |     end_points: a dict of tensors with intermediate activations.
131 |   """
132 |   with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc:
133 |     out = []
134 |     end_points_collection = sc.original_name_scope + '_end_points'
135 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
136 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
137 |                         outputs_collections=end_points_collection):
138 |       net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
139 | 
140 |       # with tf.variable_scope('relu1'):
141 |       out1 = net
142 | 
143 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
144 |       net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
145 | 
146 |       # with tf.variable_scope('relu2'):
147 |         # out = tf.add(net, tf.zeros_like(net), name='conv2_2')
148 |       out2 = net
149 | 
150 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
151 |       net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
152 | 
153 |       # with tf.variable_scope('relu3'):
154 |       out3 = net
155 | 
156 |       net = slim.max_pool2d(net, [2, 2], scope='pool3')
157 |       net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
158 | 
159 |       out4 = net
160 |       exclude = ['vgg_16/fc6', 'vgg_16/pool4','vgg_16/conv5','vgg_16/pool5','vgg_16/fc7','vgg_16/global_pool','vgg_16/fc8/squeezed','vgg_16/fc8']
161 | 
162 |       return out1, out2, out3, out4, exclude
163 | vgg_16.default_image_size = 224
164 | 


--------------------------------------------------------------------------------