├── .gitignore
├── LICENSE
├── README.md
├── WORKSPACE
├── background
├── 1.jpg
├── 10.jpg
├── 100.jpg
├── 11.jpg
├── 12.jpg
├── 13.jpg
├── 14.jpg
├── 15.jpg
├── 16.jpg
├── 17.jpg
├── 18.jpg
├── 19.jpg
├── 2.jpg
├── 20.jpg
├── 21.jpg
├── 22.jpg
├── 23.jpg
├── 24.jpg
├── 25.jpg
├── 26.jpg
├── 27.jpg
├── 28.jpg
├── 29.jpg
├── 3.jpg
├── 30.jpg
├── 31.jpg
├── 32.jpg
├── 33.jpg
├── 34.jpg
├── 35.jpg
├── 36.jpg
├── 37.jpg
├── 38.jpg
├── 39.jpg
├── 4.jpg
├── 40.jpg
├── 41.jpg
├── 42.jpg
├── 43.jpg
├── 44.jpg
├── 45.jpg
├── 46.jpg
├── 47.jpg
├── 48.jpg
├── 49.jpg
├── 5.jpg
├── 50.jpg
├── 51.jpg
├── 52.jpg
├── 53.jpg
├── 54.jpg
├── 55.jpg
├── 56.jpg
├── 57.jpg
├── 58.jpg
├── 59.jpg
├── 6.jpg
├── 60.jpg
├── 61.jpg
├── 62.jpg
├── 63.jpg
├── 64.jpg
├── 65.jpg
├── 66.jpg
├── 67.jpg
├── 68.jpg
├── 69.jpg
├── 7.jpg
├── 70.jpg
├── 71.jpg
├── 72.jpg
├── 73.jpg
├── 74.jpg
├── 75.jpg
├── 76.jpg
├── 77.jpg
├── 78.jpg
├── 79.jpg
├── 8.jpg
├── 80.jpg
├── 81.jpg
├── 82.jpg
├── 83.jpg
├── 84.jpg
├── 85.jpg
├── 86.jpg
├── 87.jpg
├── 88.jpg
├── 89.jpg
├── 9.jpg
├── 90.jpg
├── 91.jpg
├── 92.jpg
├── 93.jpg
├── 94.jpg
├── 95.jpg
├── 96.jpg
├── 97.jpg
├── 98.jpg
└── 99.jpg
├── config
├── BUILD
├── configure.py
└── params.yml
├── datasets
├── BUILD
├── make_data_from_GRID.py
├── makelist_bfm.py
├── makelist_pixrefer.py
└── models.py
├── generator
├── BUILD
├── generator.py
├── loader.py
└── test_generator.py
├── res
├── 1.png
├── 2.png
└── 3.jpg
├── sample
├── 22.jpg
└── test.aac
├── utils
├── BUILD
├── bfm_load_data.py
├── bfm_visual.py
├── cython
│ ├── mesh_core.cpp
│ ├── mesh_core.h
│ ├── mesh_core_cython.pyx
│ └── setup.py
├── reconstruct_mesh.py
└── utils.py
└── voicepuppet
├── BUILD
├── atvgnet
├── BUILD
├── __init__.py
├── atnet.py
├── backbone.py
├── infer.py
├── plot.py
├── test_atnet.py
├── test_vgnet.py
├── tinynet.py
├── train_atnet.py
├── train_vgnet.py
└── vgnet.py
├── bfmnet
├── BUILD
├── bfmnet.py
├── infer_bfmnet.py
├── tinynet.py
└── train_bfmnet.py
├── builder.py
├── pixflow
├── BUILD
├── infer_bfm_pixflow.py
├── infer_pixflow.py
├── pixflow.py
└── train_pixflow.py
└── pixrefer
├── BUILD
├── infer_bfmvid.py
├── infer_pixrefer.py
├── pixrefer.py
├── train_pixrefer.py
└── vgg_simple.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 DongLu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # VoicePuppet #
4 | - This repository provided a common pipeline to generate speaking actor by voice input automatically.
5 | - For a better feeling, there's a [short video](https://youtu.be/h24MGPTTw5M) to demostrate it.
6 |
7 | ## The archecture of the network ##
8 | - Composed of 2 parts, one for predict 3D face coeffcients of each frame align to a certain stride window size of waveform, called BFMNet(basel face model network). The another for redraw the real face foreground using the rasterized face which produced by the rendered 3D face coeffcients of previous step, called PixReferNet.
9 |
10 |
11 |
12 |
13 |
14 | |
15 |
16 |
17 | BFMNet component
18 | |
19 |
20 |
21 | |
22 |
23 |
24 | PixReferNet component
25 | |
26 |
27 |
28 |
29 | ## Run the prediction pipeline ##
30 | ------------------------
31 |
32 | 1. Download the pretrained model and required models.
33 | Baidu Disk: [[ckpt.zip](https://pan.baidu.com/s/1cVIVFhSsEA1MbgqL7H7mMw), code: a6pn], [[allmodels.zip](https://pan.baidu.com/s/11FKHjGjnPtD2c7Ttg-mXng), code: brfh]
34 | or Google Drive: [[ckpt.zip](https://drive.google.com/file/d/1RgMSQUL2pzvwCWGgnkvwxHxHeEnZ7FlN/view?usp=sharing)], [[allmodels.zip](https://drive.google.com/file/d/1Z1Pm39sp977nED_HHZtvn5glRrmiThwB/view?usp=sharing)]
35 | Extract the `ckpt.zip` to `ckpt_bfmnet` and `ckpt_pixrefer`, extract the `allmodels.zip` to current root dir
36 | 2. `cd utils/cython` && `python3 setup.py install`
37 | 3. Install ffmpeg tool if you want to merge the png sequence and audio file to video container like mp4.
38 | 4. `python3 voicepuppet/pixrefer/infer_bfmvid.py --config_path config/params.yml sample/22.jpg sample/test.aac`
39 |
40 | ## Run the training pipeline ##
41 | ------------------------
42 |
43 | #### Requirements ####
44 |
45 | - tensorflow>=1.14.0
46 | - pytorch>=1.4.0, only for data preparation (face foreground segmentation and matting)
47 | - mxnet>=1.5.1, only for data preparation (face alignment)
48 | tips: you can use other models to do the same label marking instead, such as dlib
49 |
50 | #### Data preparation ####
51 |
52 | 1. Check your `config/params.yml` to make sure the dataset folder in specified structure (same as the [grid dataset](http://spandh.dcs.shef.ac.uk/gridcorpus/), you can extend the dataset by using the same folder structure which contains common video files)
53 | ```
54 | |- srcdir/
55 | | |- s10/
56 | | |- video/
57 | | |- mpg_6000/
58 | | |- bbab8n.mpg
59 | | |- bbab9s.mpg
60 | | |- bbac1a.mpg
61 | | |- ...
62 | | |- s8/
63 | | |- video/
64 | | |- mpg_6000/
65 | | |- bbae5n.mpg
66 | | |- bbae6s.mpg
67 | | |- bbae7p.mpg
68 | | |- ...
69 | ```
70 | 2. Extract audio stream from mpg video file, `todir` was a output folder which you want to store the labels.
71 | `python3 datasets/make_data_from_GRID.py --gpu 0 --step 2 srcdir todir`
72 |
73 | 3. Face detection and alignment
74 | `python3 datasets/make_data_from_GRID.py --gpu 0 --step 3 srcdir todir ./allmodels`
75 |
76 | 4. 3D face reconstruction
77 | `python3 datasets/make_data_from_GRID.py --gpu 0 --step 4 todir ./allmodels`
78 |
79 | 5. It will take several hours to finish the above steps, subsequently, you'll find there's `*.jpg, landmark.txt, audio.wav, bfmcoeff.txt` in each output subfolder. The above labels(`audio.wav`, `bfmcoeff.txt`) are used for BFMNet training, the others are only temp files.
80 | ```
81 | |- todir/
82 | | |- s10/
83 | | |- bbab8n/
84 | | |- landmark.txt
85 | | |- audio.wav
86 | | |- bfmcoeff.txt
87 | | |- 0.jpg
88 | | |- 1.jpg
89 | | |- ...
90 | | |- bbab9s/
91 | | |- ...
92 | | |- s8/
93 | | |- bbae5n/
94 | | |- landmark.txt
95 | | |- audio.wav
96 | | |- bfmcoeff.txt
97 | | |- 0.jpg
98 | | |- 1.jpg
99 | | |- ...
100 | | |- bbae6s/
101 | | |- ...
102 | ```
103 | 6. Face(human foreground) segmentation and matting for PixelReferNet training. Before invoke the python shell, you should make sure the width and height of the video was in the same size(1:1). In general, 3-5 minutes video was enough for training the PixelReferNet network, the trained model will only take effect on this specified person too.
104 | `python3 datasets/make_data_from_GRID.py --gpu 0 --step 6 src_dir to_dvp_dir ./allmodels`
105 | the `src_dir` has the same folder structure as [tip1 in Data preparation], when finish the above step, you will find `*.jpg` in subfolders, like this
106 |
107 |

108 |
109 |
110 | #### Train BFMNet ####
111 |
112 | 1. Prepare train and eval txt, check the `root_path` parameter in `config/params.yml` is the output folder of [tip1 in Data preparation]
113 | `python3 datasets/makelist_bfm.py --config_path config/params.yml`
114 | 2. train the model
115 | `python3 voicepuppet/bfmnet/train_bfmnet.py --config_path config/params.yml`
116 | 3. Watch the evalalute images every 1000 step in `log/eval_bfmnet`, the upper was the target sequence, and the under was the evaluated sequence.
117 |
118 |

119 |
120 |
121 | #### Train PixReferNet ####
122 |
123 | 1. Prepare train and eval txt, check the `root_path` parameter in `config/params.yml` is the output folder of [tip6 in Data preparation]
124 | `python3 datasets/makelist_pixrefer.py --config_path config/params.yml`
125 | 2. train the model
126 | `python3 voicepuppet/pixrefer/train_pixrefer.py --config_path config/params.yml`
127 | 3. Use tensorboard to watch the training process
128 | `tensorboard --logdir=log/summary_pixrefer`
129 |
130 | ## Acknowledgement ##
131 | 1. The face alignment model was refer to [Deepinx's work](https://github.com/deepinx/deep-face-alignment), it's more stable than Dlib.
132 | 2. 3D face reconstruction model was refer to [microsoft's work](https://github.com/microsoft/Deep3DFaceReconstruction)
133 | 3. Image segmentation model was refer to [gasparian's work](https://github.com/gasparian/PicsArtHack-binary-segmentation)
134 | 4. Image matting model was refer to [foamliu's work](https://github.com/foamliu/Deep-Image-Matting)
135 |
--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
1 | workspace(name = "voicepuppet")
2 |
--------------------------------------------------------------------------------
/background/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/1.jpg
--------------------------------------------------------------------------------
/background/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/10.jpg
--------------------------------------------------------------------------------
/background/100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/100.jpg
--------------------------------------------------------------------------------
/background/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/11.jpg
--------------------------------------------------------------------------------
/background/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/12.jpg
--------------------------------------------------------------------------------
/background/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/13.jpg
--------------------------------------------------------------------------------
/background/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/14.jpg
--------------------------------------------------------------------------------
/background/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/15.jpg
--------------------------------------------------------------------------------
/background/16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/16.jpg
--------------------------------------------------------------------------------
/background/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/17.jpg
--------------------------------------------------------------------------------
/background/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/18.jpg
--------------------------------------------------------------------------------
/background/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/19.jpg
--------------------------------------------------------------------------------
/background/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/2.jpg
--------------------------------------------------------------------------------
/background/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/20.jpg
--------------------------------------------------------------------------------
/background/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/21.jpg
--------------------------------------------------------------------------------
/background/22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/22.jpg
--------------------------------------------------------------------------------
/background/23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/23.jpg
--------------------------------------------------------------------------------
/background/24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/24.jpg
--------------------------------------------------------------------------------
/background/25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/25.jpg
--------------------------------------------------------------------------------
/background/26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/26.jpg
--------------------------------------------------------------------------------
/background/27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/27.jpg
--------------------------------------------------------------------------------
/background/28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/28.jpg
--------------------------------------------------------------------------------
/background/29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/29.jpg
--------------------------------------------------------------------------------
/background/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/3.jpg
--------------------------------------------------------------------------------
/background/30.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/30.jpg
--------------------------------------------------------------------------------
/background/31.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/31.jpg
--------------------------------------------------------------------------------
/background/32.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/32.jpg
--------------------------------------------------------------------------------
/background/33.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/33.jpg
--------------------------------------------------------------------------------
/background/34.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/34.jpg
--------------------------------------------------------------------------------
/background/35.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/35.jpg
--------------------------------------------------------------------------------
/background/36.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/36.jpg
--------------------------------------------------------------------------------
/background/37.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/37.jpg
--------------------------------------------------------------------------------
/background/38.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/38.jpg
--------------------------------------------------------------------------------
/background/39.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/39.jpg
--------------------------------------------------------------------------------
/background/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/4.jpg
--------------------------------------------------------------------------------
/background/40.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/40.jpg
--------------------------------------------------------------------------------
/background/41.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/41.jpg
--------------------------------------------------------------------------------
/background/42.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/42.jpg
--------------------------------------------------------------------------------
/background/43.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/43.jpg
--------------------------------------------------------------------------------
/background/44.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/44.jpg
--------------------------------------------------------------------------------
/background/45.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/45.jpg
--------------------------------------------------------------------------------
/background/46.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/46.jpg
--------------------------------------------------------------------------------
/background/47.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/47.jpg
--------------------------------------------------------------------------------
/background/48.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/48.jpg
--------------------------------------------------------------------------------
/background/49.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/49.jpg
--------------------------------------------------------------------------------
/background/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/5.jpg
--------------------------------------------------------------------------------
/background/50.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/50.jpg
--------------------------------------------------------------------------------
/background/51.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/51.jpg
--------------------------------------------------------------------------------
/background/52.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/52.jpg
--------------------------------------------------------------------------------
/background/53.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/53.jpg
--------------------------------------------------------------------------------
/background/54.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/54.jpg
--------------------------------------------------------------------------------
/background/55.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/55.jpg
--------------------------------------------------------------------------------
/background/56.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/56.jpg
--------------------------------------------------------------------------------
/background/57.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/57.jpg
--------------------------------------------------------------------------------
/background/58.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/58.jpg
--------------------------------------------------------------------------------
/background/59.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/59.jpg
--------------------------------------------------------------------------------
/background/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/6.jpg
--------------------------------------------------------------------------------
/background/60.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/60.jpg
--------------------------------------------------------------------------------
/background/61.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/61.jpg
--------------------------------------------------------------------------------
/background/62.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/62.jpg
--------------------------------------------------------------------------------
/background/63.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/63.jpg
--------------------------------------------------------------------------------
/background/64.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/64.jpg
--------------------------------------------------------------------------------
/background/65.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/65.jpg
--------------------------------------------------------------------------------
/background/66.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/66.jpg
--------------------------------------------------------------------------------
/background/67.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/67.jpg
--------------------------------------------------------------------------------
/background/68.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/68.jpg
--------------------------------------------------------------------------------
/background/69.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/69.jpg
--------------------------------------------------------------------------------
/background/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/7.jpg
--------------------------------------------------------------------------------
/background/70.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/70.jpg
--------------------------------------------------------------------------------
/background/71.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/71.jpg
--------------------------------------------------------------------------------
/background/72.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/72.jpg
--------------------------------------------------------------------------------
/background/73.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/73.jpg
--------------------------------------------------------------------------------
/background/74.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/74.jpg
--------------------------------------------------------------------------------
/background/75.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/75.jpg
--------------------------------------------------------------------------------
/background/76.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/76.jpg
--------------------------------------------------------------------------------
/background/77.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/77.jpg
--------------------------------------------------------------------------------
/background/78.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/78.jpg
--------------------------------------------------------------------------------
/background/79.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/79.jpg
--------------------------------------------------------------------------------
/background/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/8.jpg
--------------------------------------------------------------------------------
/background/80.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/80.jpg
--------------------------------------------------------------------------------
/background/81.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/81.jpg
--------------------------------------------------------------------------------
/background/82.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/82.jpg
--------------------------------------------------------------------------------
/background/83.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/83.jpg
--------------------------------------------------------------------------------
/background/84.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/84.jpg
--------------------------------------------------------------------------------
/background/85.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/85.jpg
--------------------------------------------------------------------------------
/background/86.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/86.jpg
--------------------------------------------------------------------------------
/background/87.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/87.jpg
--------------------------------------------------------------------------------
/background/88.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/88.jpg
--------------------------------------------------------------------------------
/background/89.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/89.jpg
--------------------------------------------------------------------------------
/background/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/9.jpg
--------------------------------------------------------------------------------
/background/90.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/90.jpg
--------------------------------------------------------------------------------
/background/91.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/91.jpg
--------------------------------------------------------------------------------
/background/92.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/92.jpg
--------------------------------------------------------------------------------
/background/93.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/93.jpg
--------------------------------------------------------------------------------
/background/94.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/94.jpg
--------------------------------------------------------------------------------
/background/95.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/95.jpg
--------------------------------------------------------------------------------
/background/96.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/96.jpg
--------------------------------------------------------------------------------
/background/97.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/97.jpg
--------------------------------------------------------------------------------
/background/98.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/98.jpg
--------------------------------------------------------------------------------
/background/99.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/background/99.jpg
--------------------------------------------------------------------------------
/config/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "configure",
10 | srcs = ["configure.py"],
11 | deps = [
12 | ],
13 | )
14 |
--------------------------------------------------------------------------------
/config/configure.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import yaml
4 | from tensorflow.contrib.training import HParams
5 |
6 |
7 | class YParams(HParams):
8 | def __init__(self, yaml_fn, config_name):
9 | HParams.__init__(self)
10 | with open(yaml_fn) as fp:
11 | for k, v in yaml.load(fp, Loader=yaml.FullLoader)[config_name].items():
12 | self.add_hparam(k, v)
13 |
--------------------------------------------------------------------------------
/config/params.yml:
--------------------------------------------------------------------------------
1 | default: &DEFAULT
2 | train_dataset_path: config/train.txt
3 | eval_dataset_path: config/eval.txt
4 |
5 | root_path: /media/dong/DiskData/gridcorpus/todir # used by makelist_*
6 | # root_path: /media/dong/DiskData/gridcorpus/todir_vid2vid
7 | train_by_eval: 9 # train/eval
8 |
9 | sample_file: # used by generator
10 | landmark_name: landmark.txt
11 | wav_name: audio.wav
12 | bfmcoeff_name: bfmcoeff.txt
13 |
14 | model_dir: ./allmodels
15 |
16 | mel:
17 | sample_rate: 16000
18 | num_mel_bins: 80
19 | win_length: 512
20 | fft_length: 512
21 | hop_step: 128
22 |
23 | frame_rate: 25
24 |
25 | training:
26 | epochs: 100000
27 | drop_rate: 0.25
28 | learning_rate: 0.001
29 | max_grad_norm: 50
30 | decay_steps: 1000
31 | decay_rate: 0.95
32 |
--------------------------------------------------------------------------------
/datasets/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "models",
10 | srcs = ["models.py"],
11 | deps = [
12 | ],
13 | )
14 |
15 | py_binary(
16 | name = "makelist_bfm",
17 | srcs = ["makelist_bfm.py"],
18 | deps = [
19 | "//config:configure"
20 | ],
21 | )
22 |
23 | py_binary(
24 | name = "makelist_pixrefer",
25 | srcs = ["makelist_pixrefer.py"],
26 | deps = [
27 | "//config:configure"
28 | ],
29 | )
30 |
31 | py_binary(
32 | name = "make_data_from_GRID",
33 | srcs = ["make_data_from_GRID.py"],
34 | deps = [
35 | "//utils:bfm_load_data",
36 | "//utils:reconstruct_mesh",
37 | "//utils:utils",
38 | ":models"
39 | ],
40 | )
--------------------------------------------------------------------------------
/datasets/makelist_bfm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | from optparse import OptionParser
4 | import json
5 | import logging
6 | import sys
7 |
8 | sys.path.append(os.getcwd())
9 | from config.configure import YParams
10 |
11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | def write_dataset(params):
16 | train_dataset_path = params.train_dataset_path
17 | eval_dataset_path = params.eval_dataset_path
18 | root_path = params.root_path
19 | train_by_eval = params.train_by_eval
20 |
21 | landmark_name = params.sample_file['landmark_name']
22 | wav_name = params.sample_file['wav_name']
23 | bfmcoeff_name = params.sample_file['bfmcoeff_name']
24 |
25 | sample_index = 0
26 |
27 | with open(train_dataset_path, "w") as train_file:
28 | with open(eval_dataset_path, "w") as eval_file:
29 | for root, subdirs, files in os.walk(root_path):
30 | if not subdirs:
31 | if (os.path.exists(os.path.join(root, landmark_name)) and
32 | os.path.exists(os.path.join(root, wav_name)) and
33 | os.path.exists(os.path.join(root, bfmcoeff_name))):
34 |
35 | logger.info('Processing {}'.format(root))
36 | count = 0
37 | for file in files:
38 | if (file.endswith('.jpg')):
39 | count += 1
40 |
41 | sample_index += 1
42 | if (sample_index % (train_by_eval + 1) == 0):
43 | eval_file.write("{}|{}\n".format(root, count))
44 | else:
45 | train_file.write("{}|{}\n".format(root, count))
46 |
47 |
48 | if (__name__ == '__main__'):
49 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
50 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
51 | help='the config json file')
52 |
53 | opts, argv = cmd_parser.parse_args()
54 |
55 | if (not opts.config_path is None):
56 | config_path = opts.config_path
57 |
58 | if (not os.path.exists(config_path)):
59 | logger.error('config_path not exists')
60 | exit(0)
61 |
62 | params = YParams(config_path, 'default')
63 | write_dataset(params)
64 | else:
65 | print('Please check your parameters.')
66 |
--------------------------------------------------------------------------------
/datasets/makelist_pixrefer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | from optparse import OptionParser
4 | import json
5 | import logging
6 | import sys
7 |
8 | sys.path.append(os.getcwd())
9 | from config.configure import YParams
10 |
11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | def write_dataset(params):
16 | train_dataset_path = params.train_dataset_path
17 | eval_dataset_path = params.eval_dataset_path
18 | root_path = params.root_path
19 | train_by_eval = params.train_by_eval
20 |
21 | sample_index = 0
22 |
23 | with open(train_dataset_path, "w") as train_file:
24 | with open(eval_dataset_path, "w") as eval_file:
25 | for root, subdirs, files in os.walk(root_path):
26 | if not subdirs:
27 | logger.info('Processing {}'.format(root))
28 | count = 0
29 | for file in files:
30 | if (file.endswith('.jpg')):
31 | count += 1
32 |
33 | sample_index += 1
34 | if (sample_index % (train_by_eval + 1) == 0):
35 | eval_file.write("{}|{}\n".format(root, count))
36 | else:
37 | train_file.write("{}|{}\n".format(root, count))
38 |
39 |
40 | if (__name__ == '__main__'):
41 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
42 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
43 | help='the config json file')
44 |
45 | opts, argv = cmd_parser.parse_args()
46 |
47 | if (not opts.config_path is None):
48 | config_path = opts.config_path
49 |
50 | if (not os.path.exists(config_path)):
51 | logger.error('config_path not exists')
52 | exit(0)
53 |
54 | params = YParams(config_path, 'default')
55 | write_dataset(params)
56 | else:
57 | print('Please check your parameters.')
58 |
--------------------------------------------------------------------------------
/generator/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "loader",
10 | srcs = ["loader.py"],
11 | deps = [
12 | ],
13 | )
14 |
15 | py_library(
16 | name = "generator",
17 | srcs = ["generator.py"],
18 | deps = [
19 | ":loader",
20 | "//config:configure"
21 | ],
22 | )
23 |
24 | py_library(
25 | name = "test_generator",
26 | srcs = ["test_generator.py"],
27 | deps = [
28 | ":generator"
29 | ],
30 | )
31 |
--------------------------------------------------------------------------------
/generator/loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import librosa
4 | import cv2
5 | from scipy.io import wavfile
6 | import resampy
7 |
8 |
9 | class Loader:
10 | ### root_path: None if the file_path is full path
11 | def __init__(self, root_path=None):
12 | self.root_path = root_path
13 |
14 | ### load txt data, each line split by comma, default float format
15 | ### file_path: file name in root_path, or full path.
16 | ### return: numpy array(float32)
17 | def get_text_data(self, file_path):
18 | if (self.root_path):
19 | file_path = os.path.join(self.root_path, file_path)
20 |
21 | with open(file_path) as f:
22 | lines = f.readlines()
23 | data_list = []
24 | for line in lines:
25 | pts = line.strip().split(',')
26 | if (len(pts) != 0):
27 | pts = list(map(lambda x: np.float32(x), pts))
28 | data_list.append(np.array(pts))
29 |
30 | return np.array(data_list)
31 |
32 | ### load binary data of pickle format.
33 | ### file_path: file name in root_path, or full path.
34 | ### return: numpy array(float32)
35 | def get_bin_data(self, file_path):
36 | if (self.root_path):
37 | file_path = os.path.join(self.root_path, file_path)
38 |
39 | if (file_path.endswith('.npy') or file_path.endswith('.npz')):
40 | data = np.load(file_path)
41 | return data
42 |
43 |
44 | class EarLoader(Loader):
45 |
46 | def get_data(self, file_path):
47 | data = self.get_text_data(file_path)
48 | return data
49 |
50 |
51 | class PoseLoader(Loader):
52 |
53 | def get_data(self, file_path):
54 | data = self.get_text_data(file_path)
55 | return data
56 |
57 |
58 | class LandmarkLoader(Loader):
59 | def __init__(self, root_path=None, norm_size=128):
60 | Loader.__init__(self, root_path)
61 | self.norm_size = norm_size
62 |
63 | def get_data(self, file_path):
64 | data = self.get_text_data(file_path).astype(np.float32)
65 | data /= self.norm_size
66 | return data
67 |
68 |
69 | class BFMCoeffLoader(Loader):
70 |
71 | def get_data(self, file_path):
72 | data = self.get_text_data(file_path)
73 | return data
74 |
75 |
76 | class ImageLoader(Loader):
77 | def __init__(self, root_path=None, resize=None):
78 | Loader.__init__(self, root_path)
79 | self.resize = resize
80 |
81 | def get_data(self, file_path):
82 | if (self.root_path):
83 | file_path = os.path.join(self.root_path, file_path)
84 |
85 | data = cv2.imread(file_path).astype(np.float32)
86 | if (self.resize is not None):
87 | data = cv2.resize(data, (self.resize[0], self.resize[1]))
88 | data /= 255.0
89 | return data
90 |
91 |
92 | class WavLoader(Loader):
93 | def __init__(self, root_path=None, sr=16000):
94 | self.sr = sr
95 | Loader.__init__(self, root_path)
96 |
97 | def get_data(self, file_path):
98 | if (self.root_path):
99 | file_path = os.path.join(self.root_path, file_path)
100 |
101 | data, _ = librosa.load(file_path, sr=self.sr)
102 | return data
103 |
104 |
105 | class AudioLoader(Loader):
106 | def __init__(self, root_path=None, sr=16000):
107 | self.sr = sr
108 | Loader.__init__(self, root_path)
109 |
110 | def get_data(self, file_path):
111 | if (self.root_path):
112 | file_path = os.path.join(self.root_path, file_path)
113 |
114 | rate, data = wavfile.read(file_path)
115 | if data.ndim != 1:
116 | data = data[:,0]
117 |
118 | data = resampy.resample(data.astype(np.float32), rate, self.sr)
119 | return data
120 |
--------------------------------------------------------------------------------
/generator/test_generator.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | import numpy as np
4 | from generator import ATNetDataGenerator
5 | from generator import VGNetDataGenerator
6 |
7 |
8 | class GeneratorTest(tf.test.TestCase):
9 |
10 | def testATNetGenerator(self):
11 | config_path = 'config/params.yml'
12 | batch_size = 2
13 | landmark_size = 136
14 | ### Generator for training setting
15 | generator = ATNetDataGenerator(config_path)
16 | params = generator.params
17 | params.dataset_path = params.train_dataset_path
18 | params.batch_size = batch_size
19 | generator.set_params(params)
20 | dataset = generator.get_dataset()
21 |
22 | sess = tf.Session()
23 | tf.train.start_queue_runners(sess=sess)
24 |
25 | iterator = dataset.make_one_shot_iterator()
26 | landmark, ears, poses, mfccs, example_landmark, seq_len = sess.run(iterator.get_next())
27 |
28 | frame_mfcc_scale = params.mel['sample_rate'] / params.frame_rate / params.mel['hop_step']
29 |
30 | assert (frame_mfcc_scale - int(frame_mfcc_scale) == 0), "sample_rate/hop_step must divided by frame_rate."
31 |
32 | ## Test seq_len value range
33 | self.assertAllGreaterEqual(seq_len, params.min_squence_len)
34 | self.assertAllLessEqual(seq_len, params.max_squence_len)
35 |
36 | max_seq_len = np.max(seq_len)
37 |
38 | ## Test seq_len shape, [batch_size]
39 | self.assertAllEqual(seq_len.shape, [params.batch_size])
40 | ## Test landmark shape, [batch_size, padding_time, landmark_size]
41 | self.assertAllEqual(landmark.shape, [params.batch_size, max_seq_len, landmark_size])
42 | ## Test ears shape, [batch_size, padding_time, 1]
43 | self.assertAllEqual(ears.shape, [params.batch_size, max_seq_len, 1])
44 | ## Test poses shape, [batch_size, padding_time, 3]
45 | self.assertAllEqual(poses.shape, [params.batch_size, max_seq_len, 3])
46 | ## Test mfccs shape, [batch_size, padding_time, num_mel_bins]
47 | self.assertAllEqual(mfccs.shape, [params.batch_size, max_seq_len * frame_mfcc_scale, params.mel['num_mel_bins']])
48 | ## Test example_landmark shape, [batch_size, landmark_size]
49 | self.assertAllEqual(example_landmark.shape, [params.batch_size, landmark_size])
50 |
51 | ## Test the range of value, landmark [-1, 1]
52 | self.assertAllGreaterEqual(landmark, -1)
53 | self.assertAllLessEqual(landmark, 1)
54 | self.assertAllGreaterEqual(example_landmark, -1)
55 | self.assertAllLessEqual(example_landmark, 1)
56 |
57 | ## Test the range of value, ears [0, 1]
58 | self.assertAllGreaterEqual(ears, 0)
59 | self.assertAllLessEqual(ears, 1)
60 |
61 | ## Test the range of value, poses [-1, 1]
62 | self.assertAllGreaterEqual(poses, -1)
63 | self.assertAllLessEqual(poses, 1)
64 |
65 | def testVGNetGenerator(self):
66 | config_path = 'config/params.yml'
67 | batch_size = 2
68 | landmark_size = 136
69 | ### Generator for training setting
70 | generator = VGNetDataGenerator(config_path)
71 | params = generator.params
72 | params.dataset_path = params.train_dataset_path
73 | params.batch_size = batch_size
74 | generator.set_params(params)
75 | dataset = generator.get_dataset()
76 |
77 | sess = tf.Session()
78 | tf.train.start_queue_runners(sess=sess)
79 |
80 | iterator = dataset.make_one_shot_iterator()
81 | real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img, seq_len = sess.run(
82 | iterator.get_next())
83 |
84 | ## Test seq_len value range
85 | self.assertAllGreaterEqual(seq_len, params.min_squence_len)
86 | self.assertAllLessEqual(seq_len, params.max_squence_len)
87 |
88 | max_seq_len = np.max(seq_len)
89 |
90 | ## Test seq_len shape, [batch_size]
91 | self.assertAllEqual(seq_len.shape, [params.batch_size])
92 | ## Test real_landmark_seq shape, [batch_size, padding_time, landmark_size]
93 | self.assertAllEqual(real_landmark_seq.shape, [params.batch_size, max_seq_len, landmark_size])
94 | ## Test real_mask_seq shape, [batch_size, padding_time, img_height, img_width, 1]
95 | self.assertAllEqual(real_mask_seq.shape, [params.batch_size, max_seq_len, params.img_size, params.img_size, 1])
96 | ## Test real_img_seq shape, [batch_size, padding_time, img_height, img_width, 3]
97 | self.assertAllEqual(real_img_seq.shape, [params.batch_size, max_seq_len, params.img_size, params.img_size, 3])
98 | ## Test example_landmark shape, [batch_size, 136]
99 | self.assertAllEqual(example_landmark.shape, [params.batch_size, landmark_size])
100 | ## Test example_img shape, [batch_size, img_height, img_width, 3]
101 | self.assertAllEqual(example_img.shape, [params.batch_size, params.img_size, params.img_size, 3])
102 |
103 | ## Test the range of value, real_landmark_seq [-1, 1]
104 | self.assertAllGreaterEqual(real_landmark_seq, -1)
105 | self.assertAllLessEqual(real_landmark_seq, 1)
106 | self.assertAllGreaterEqual(example_landmark, -1)
107 | self.assertAllLessEqual(example_landmark, 1)
108 |
109 | ## Test the range of value, real_mask_seq [0, 1]
110 | self.assertAllGreaterEqual(real_mask_seq, 0)
111 | self.assertAllLessEqual(real_mask_seq, 1)
112 |
113 | ## Test the range of value, real_img_seq [-1, 1]
114 | self.assertAllGreaterEqual(real_img_seq, -1)
115 | self.assertAllLessEqual(real_img_seq, 1)
116 | self.assertAllGreaterEqual(example_img, -1)
117 | self.assertAllLessEqual(example_img, 1)
118 |
119 |
120 | if (__name__ == '__main__'):
121 | tf.test.main()
122 |
--------------------------------------------------------------------------------
/res/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/res/1.png
--------------------------------------------------------------------------------
/res/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/res/2.png
--------------------------------------------------------------------------------
/res/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/res/3.jpg
--------------------------------------------------------------------------------
/sample/22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/sample/22.jpg
--------------------------------------------------------------------------------
/sample/test.aac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taylorlu/voicepuppet/a0d3ca3296aca15abbfe75663a1bf682fb491efa/sample/test.aac
--------------------------------------------------------------------------------
/utils/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "bfm_load_data",
10 | srcs = ["bfm_load_data.py"],
11 | deps = [
12 | ],
13 | )
14 |
15 | py_library(
16 | name = "reconstruct_mesh",
17 | srcs = ["reconstruct_mesh.py"],
18 | deps = [
19 | ],
20 | )
21 |
22 | py_library(
23 | name = "bfm_visual",
24 | srcs = ["bfm_visual.py"],
25 | deps = [
26 | ],
27 | )
28 |
29 | py_library(
30 | name = "utils",
31 | srcs = ["utils.py"],
32 | deps = [
33 | ],
34 | )
--------------------------------------------------------------------------------
/utils/bfm_load_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 | from scipy.io import loadmat, savemat
4 | from array import array
5 | import os
6 |
7 |
8 | # define facemodel for reconstruction
9 | class BFM():
10 | def __init__(self, model_dir='BFM'):
11 | model_path = os.path.join(model_dir, 'BFM_model_front.mat')
12 | model = loadmat(model_path)
13 | self.meanshape = model['meanshape'] # mean face shape
14 | self.idBase = model['idBase'] # identity basis
15 | self.exBase = model['exBase'] # expression basis
16 | self.meantex = model['meantex'] # mean face texture
17 | self.texBase = model['texBase'] # texture basis
18 | self.point_buf = model[
19 | 'point_buf'] # adjacent face index for each vertex, starts from 1 (only used for calculating face normal)
20 | self.tri = model['tri'] # vertex index for each triangle face, starts from 1
21 | self.keypoints = np.squeeze(model['keypoints']).astype(np.int32) - 1 # 68 face landmark index, starts from 0
22 |
23 |
24 | # load expression basis
25 | def LoadExpBasis(model_dir='BFM'):
26 | n_vertex = 53215
27 | Expbin = open(os.path.join(model_dir, 'Exp_Pca.bin'), 'rb')
28 | exp_dim = array('i')
29 | exp_dim.fromfile(Expbin, 1)
30 | expMU = array('f')
31 | expPC = array('f')
32 | expMU.fromfile(Expbin, 3 * n_vertex)
33 | expPC.fromfile(Expbin, 3 * exp_dim[0] * n_vertex)
34 |
35 | expPC = np.array(expPC)
36 | expPC = np.reshape(expPC, [exp_dim[0], -1])
37 | expPC = np.transpose(expPC)
38 |
39 | expEV = np.loadtxt(os.path.join(model_dir, 'std_exp.txt'))
40 |
41 | return expPC, expEV
42 |
43 |
44 | # transfer original BFM09 to our face model
45 | def transferBFM09(model_dir='BFM'):
46 | original_BFM = loadmat(os.path.join(model_dir, '01_MorphableModel.mat'))
47 | shapePC = original_BFM['shapePC'] # shape basis
48 | shapeEV = original_BFM['shapeEV'] # corresponding eigen value
49 | shapeMU = original_BFM['shapeMU'] # mean face
50 | texPC = original_BFM['texPC'] # texture basis
51 | texEV = original_BFM['texEV'] # eigen value
52 | texMU = original_BFM['texMU'] # mean texture
53 |
54 | expPC, expEV = LoadExpBasis()
55 |
56 | # transfer BFM09 to our face model
57 |
58 | idBase = shapePC * np.reshape(shapeEV, [-1, 199])
59 | idBase = idBase / 1e5 # unify the scale to decimeter
60 | idBase = idBase[:, :80] # use only first 80 basis
61 |
62 | exBase = expPC * np.reshape(expEV, [-1, 79])
63 | exBase = exBase / 1e5 # unify the scale to decimeter
64 | exBase = exBase[:, :64] # use only first 64 basis
65 |
66 | texBase = texPC * np.reshape(texEV, [-1, 199])
67 | texBase = texBase[:, :80] # use only first 80 basis
68 |
69 | # our face model is cropped align face landmarks which contains only 35709 vertex.
70 | # original BFM09 contains 53490 vertex, and expression basis provided by JuYong contains 53215 vertex.
71 | # thus we select corresponding vertex to get our face model.
72 |
73 | index_exp = loadmat(os.path.join(model_dir, 'BFM_front_idx.mat'))
74 | index_exp = index_exp['idx'].astype(np.int32) - 1 # starts from 0 (to 53215)
75 |
76 | index_shape = loadmat(os.path.join(model_dir, 'BFM_exp_idx.mat'))
77 | index_shape = index_shape['trimIndex'].astype(np.int32) - 1 # starts from 0 (to 53490)
78 | index_shape = index_shape[index_exp]
79 |
80 | idBase = np.reshape(idBase, [-1, 3, 80])
81 | idBase = idBase[index_shape, :, :]
82 | idBase = np.reshape(idBase, [-1, 80])
83 |
84 | texBase = np.reshape(texBase, [-1, 3, 80])
85 | texBase = texBase[index_shape, :, :]
86 | texBase = np.reshape(texBase, [-1, 80])
87 |
88 | exBase = np.reshape(exBase, [-1, 3, 64])
89 | exBase = exBase[index_exp, :, :]
90 | exBase = np.reshape(exBase, [-1, 64])
91 |
92 | meanshape = np.reshape(shapeMU, [-1, 3]) / 1e5
93 | meanshape = meanshape[index_shape, :]
94 | meanshape = np.reshape(meanshape, [1, -1])
95 |
96 | meantex = np.reshape(texMU, [-1, 3])
97 | meantex = meantex[index_shape, :]
98 | meantex = np.reshape(meantex, [1, -1])
99 |
100 | # other info contains triangles, region used for computing photometric loss,
101 | # region used for skin texture regularization, and 68 landmarks index etc.
102 | other_info = loadmat(os.path.join(model_dir, 'facemodel_info.mat'))
103 | frontmask2_idx = other_info['frontmask2_idx']
104 | skinmask = other_info['skinmask']
105 | keypoints = other_info['keypoints']
106 | point_buf = other_info['point_buf']
107 | tri = other_info['tri']
108 | tri_mask2 = other_info['tri_mask2']
109 |
110 | # save our face model
111 | savemat(os.path.join(model_dir, 'BFM_model_front.mat'),
112 | {'meanshape': meanshape, 'meantex': meantex, 'idBase': idBase, 'exBase': exBase, 'texBase': texBase,
113 | 'tri': tri, 'point_buf': point_buf, 'tri_mask2': tri_mask2 \
114 | , 'keypoints': keypoints, 'frontmask2_idx': frontmask2_idx, 'skinmask': skinmask})
115 |
116 |
117 | # load landmarks for standard face, which is used for image preprocessing
118 | def load_lm3d(model_dir='BFM'):
119 | Lm3D = loadmat(os.path.join(model_dir, 'similarity_Lm3D_all.mat'))
120 | Lm3D = Lm3D['lm']
121 |
122 | # calculate 5 facial landmarks using 68 landmarks
123 | lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1
124 | Lm3D = np.stack(
125 | [Lm3D[lm_idx[0], :], np.mean(Lm3D[lm_idx[[1, 2]], :], 0), np.mean(Lm3D[lm_idx[[3, 4]], :], 0), Lm3D[lm_idx[5], :],
126 | Lm3D[lm_idx[6], :]], axis=0)
127 | Lm3D = Lm3D[[1, 2, 0, 3, 4], :]
128 |
129 | return Lm3D
130 |
131 |
132 | # save 3D face to obj file
133 | def save_obj(path, v, f, c):
134 | with open(path, 'w') as file:
135 | for i in range(len(v)):
136 | file.write('v %f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2]))
137 | # file.write('v %f %f %f\n'%(v[i,0],v[i,1],v[i,2]))
138 |
139 | file.write('\n')
140 |
141 | for i in range(len(f)):
142 | file.write('f %d %d %d\n' % (f[i, 0], f[i, 1], f[i, 2]))
143 |
144 | file.close()
145 |
146 |
147 | # calculating least sqaures problem
148 | def POS(xp, x):
149 | npts = xp.shape[1]
150 |
151 | A = np.zeros([2 * npts, 8])
152 |
153 | A[0:2 * npts - 1:2, 0:3] = x.transpose()
154 | A[0:2 * npts - 1:2, 3] = 1
155 |
156 | A[1:2 * npts:2, 4:7] = x.transpose()
157 | A[1:2 * npts:2, 7] = 1;
158 |
159 | b = np.reshape(xp.transpose(), [2 * npts, 1])
160 |
161 | k, _, _, _ = np.linalg.lstsq(A, b)
162 |
163 | R1 = k[0:3]
164 | R2 = k[4:7]
165 | sTx = k[3]
166 | sTy = k[7]
167 | s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2
168 | t = np.stack([sTx, sTy], axis=0)
169 |
170 | return t, s
171 |
172 |
173 | def process_img(img, lm, t, s):
174 | w0, h0 = img.size
175 | img = img.transform(img.size, Image.AFFINE, (1, 0, t[0] - w0 / 2, 0, 1, h0 / 2 - t[1]))
176 | w = (w0 / s * 102).astype(np.int32)
177 | h = (h0 / s * 102).astype(np.int32)
178 | img = img.resize((w, h), resample=Image.BILINEAR)
179 | lm = np.stack([lm[:, 0] - t[0] + w0 / 2, lm[:, 1] - t[1] + h0 / 2], axis=1) / s * 102
180 |
181 | # crop the image to 224*224 from image center
182 | left = (w / 2 - 112).astype(np.int32)
183 | right = left + 224
184 | up = (h / 2 - 112).astype(np.int32)
185 | below = up + 224
186 |
187 | img = img.crop((left, up, right, below))
188 | img = np.array(img)
189 | img = img[:, :, ::-1]
190 | img = np.expand_dims(img, 0)
191 | lm = lm - np.reshape(np.array([(w / 2 - 112), (h / 2 - 112)]), [1, 2])
192 |
193 | return img, lm, t[0] - w0 / 2, h0 / 2 - t[1]
194 |
195 |
196 | # resize and crop input images before sending to the R-Net
197 | def Preprocess(img, lm, lm3D):
198 | w0, h0 = img.size
199 |
200 | # change from image plane coordinates to 3D sapce coordinates(X-Y plane)
201 | lm = np.stack([lm[:, 0], h0 - 1 - lm[:, 1]], axis=1)
202 |
203 | # calculate translation and scale factors using 5 facial landmarks and standard landmarks
204 | t, s = POS(lm.transpose(), lm3D.transpose())
205 | # print('t = {}, s = {}'.format(t,s))
206 |
207 | # processing the image
208 | img_new, lm_new, t0, t1 = process_img(img, lm, t, s)
209 | lm_new = np.stack([lm_new[:, 0], 223 - lm_new[:, 1]], axis=1)
210 | trans_params = np.array([w0, h0, 102.0 / s, t0, t1])
211 |
212 | return img_new, lm_new, trans_params
213 |
--------------------------------------------------------------------------------
/utils/bfm_visual.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from PIL import Image
4 | import os
5 | import sys
6 |
7 | sys.path.append(os.path.join(os.getcwd(), 'utils'))
8 | from bfm_load_data import *
9 | from reconstruct_mesh import *
10 | import mesh_core_cython
11 |
12 |
13 | def isPointInTri(point, tri_points):
14 | ''' Judge whether the point is in the triangle
15 | Method:
16 | http://blackpawn.com/texts/pointinpoly/
17 | Args:
18 | point: [u, v] or [x, y]
19 | tri_points: three vertices(2d points) of a triangle. 2 coords x 3 vertices
20 | Returns:
21 | bool: true for in triangle
22 | '''
23 | tp = tri_points
24 |
25 | # vectors
26 | v0 = tp[:, 2] - tp[:, 0]
27 | v1 = tp[:, 1] - tp[:, 0]
28 | v2 = point - tp[:, 0]
29 |
30 | # dot products
31 | dot00 = np.dot(v0.T, v0)
32 | dot01 = np.dot(v0.T, v1)
33 | dot02 = np.dot(v0.T, v2)
34 | dot11 = np.dot(v1.T, v1)
35 | dot12 = np.dot(v1.T, v2)
36 |
37 | # barycentric coordinates
38 | if dot00 * dot11 - dot01 * dot01 == 0:
39 | inverDeno = 0
40 | else:
41 | inverDeno = 1 / (dot00 * dot11 - dot01 * dot01)
42 |
43 | u = (dot11 * dot02 - dot01 * dot12) * inverDeno
44 | v = (dot00 * dot12 - dot01 * dot02) * inverDeno
45 |
46 | # check if point in triangle
47 | return (u >= 0) & (v >= 0) & (u + v < 1)
48 |
49 |
50 | def render_texture(vertices, colors, triangles, h, w, c=3):
51 | ''' render mesh by z buffer
52 | Args:
53 | vertices: 3 x nver
54 | colors: 3 x nver
55 | triangles: 3 x ntri
56 | h: height
57 | w: width
58 | '''
59 | # initial
60 | image = np.zeros((h, w, c), dtype=np.uint8)
61 |
62 | depth_buffer = np.zeros([h, w]) - 999999.
63 | # triangle depth: approximate the depth to the average value of z in each vertex(v0, v1, v2), since the vertices are closed to each other
64 | tri_depth = (vertices[2, triangles[0, :]] + vertices[2, triangles[1, :]] + vertices[2, triangles[2, :]]) / 3.
65 | tri_tex = (colors[:, triangles[0, :]] + colors[:, triangles[1, :]] + colors[:, triangles[2, :]]) / 3.
66 |
67 | for i in range(triangles.shape[1]):
68 | tri = triangles[:, i] # 3 vertex indices
69 |
70 | # the inner bounding box
71 | umin = max(int(np.ceil(np.min(vertices[0, tri]))), 0)
72 | umax = min(int(np.floor(np.max(vertices[0, tri]))), w - 1)
73 |
74 | vmin = max(int(np.ceil(np.min(vertices[1, tri]))), 0)
75 | vmax = min(int(np.floor(np.max(vertices[1, tri]))), h - 1)
76 |
77 | if umax < umin or vmax < vmin:
78 | continue
79 |
80 | for u in range(umin, umax + 1):
81 | for v in range(vmin, vmax + 1):
82 | if tri_depth[i] > depth_buffer[v, u] and isPointInTri([u, v], vertices[:2, tri]):
83 | depth_buffer[v, u] = tri_depth[i]
84 | image[v, u, :] = tri_tex[:, i]
85 | return image
86 |
87 |
88 | def plot_bfm_coeff_seq(save_dir, facemodel, step, seq_len, real_bfm_coeff_seq, bfm_coeff_seq, id_coeff=None, texture_coeff=None):
89 | ## 9*10 block
90 | block_x = 10
91 | block_y = 9
92 | img_size = 224
93 |
94 | def merge_seq(bfm_coeff_seq, big_img, time, h_index):
95 |
96 | for i in range(time):
97 | face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, translation = Reconstruction(
98 | bfm_coeff_seq[0, i:i + 1, ...], facemodel)
99 |
100 | face_projection2 = np.concatenate([face_projection, z_buffer], axis=2)
101 | face_projection = np.squeeze(face_projection2, (0))
102 |
103 | shape = np.squeeze(face_projection2, (0))
104 | color = np.squeeze(face_color, (0))
105 | color = np.clip(color, 0, 255).astype(np.int32)
106 |
107 | new_image = np.zeros((224 * 224 * 3), dtype=np.uint8)
108 | face_mask = np.zeros((224 * 224), dtype=np.uint8)
109 |
110 | vertices = shape.reshape(-1).astype(np.float32).copy()
111 | triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
112 | colors = color.reshape(-1).astype(np.float32).copy()
113 | depth_buffer = (np.zeros((224 * 224)) - 99999.0).astype(np.float32)
114 | mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
115 | facemodel.tri.shape[0], 224, 224, 3)
116 | new_image = new_image.reshape([224, 224, 3])
117 |
118 |
119 | # shape = np.squeeze(face_shape, (0))
120 | # color = np.squeeze(face_color, (0))
121 | # color = np.clip(color, 0, 255).astype(np.int32)
122 | # shape[:, :2] = 112 - shape[:, :2] * 112
123 |
124 | # new_image = render_texture(shape.T, color.T, (facemodel.tri - 1).astype(int).T, 224, 224, c=3)
125 | new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
126 |
127 | big_img[(i // block_x + h_index) * img_size: (i // block_x + h_index + 1) * img_size,
128 | (i % block_x) * img_size: (i % block_x + 1) * img_size] = new_image
129 |
130 | return big_img
131 |
132 | ### We only pick the first sequence of the batch, trim length of 30.
133 | if (seq_len[0] > 30):
134 | time = 30
135 | else:
136 | time = seq_len[0]
137 |
138 | ### We only pick the first sequence of the batch, trim length of 30.
139 | if (seq_len[0] > 30):
140 | time = 30
141 | else:
142 | time = seq_len[0]
143 |
144 | big_img = np.zeros((img_size * block_y, img_size * block_x, 3), dtype=np.uint8)
145 | big_img = merge_seq(real_bfm_coeff_seq, big_img, time, 0)
146 |
147 | if(id_coeff is None or texture_coeff is None):
148 | bfm_coeff_seq = np.concatenate([real_bfm_coeff_seq[:, :, :80], bfm_coeff_seq[:, :, :], real_bfm_coeff_seq[:, :, 144:]], axis=2)
149 | else:
150 | bfm_coeff_seq = np.concatenate([np.tile(id_coeff, (1, real_bfm_coeff_seq.shape[1], 1)), bfm_coeff_seq[:, :, :], np.tile(texture_coeff, (1, real_bfm_coeff_seq.shape[1], 1)), real_bfm_coeff_seq[:, :, 224:]], axis=2)
151 |
152 | big_img = merge_seq(bfm_coeff_seq, big_img, time, 3)
153 |
154 | cv2.imwrite('{}/bfmnet_{}.jpg'.format(save_dir, step), big_img)
155 |
156 |
--------------------------------------------------------------------------------
/utils/cython/mesh_core.h:
--------------------------------------------------------------------------------
1 | #ifndef MESH_CORE_HPP_
2 | #define MESH_CORE_HPP_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | using namespace std;
12 |
13 | class point
14 | {
15 | public:
16 | float x;
17 | float y;
18 |
19 | float dot(point p)
20 | {
21 | return this->x * p.x + this->y * p.y;
22 | }
23 |
24 | point operator-(const point& p)
25 | {
26 | point np;
27 | np.x = this->x - p.x;
28 | np.y = this->y - p.y;
29 | return np;
30 | }
31 |
32 | point operator+(const point& p)
33 | {
34 | point np;
35 | np.x = this->x + p.x;
36 | np.y = this->y + p.y;
37 | return np;
38 | }
39 |
40 | point operator*(float s)
41 | {
42 | point np;
43 | np.x = s * this->x;
44 | np.y = s * this->y;
45 | return np;
46 | }
47 | };
48 |
49 |
50 | bool isPointInTri(point p, point p0, point p1, point p2, int h, int w);
51 | void get_point_weight(float* weight, point p, point p0, point p1, point p2);
52 |
53 | void _get_normal_core(
54 | float* normal, float* tri_normal, int* triangles,
55 | int ntri);
56 |
57 | void _rasterize_triangles_core(
58 | float* vertices, int* triangles,
59 | float* depth_buffer, int* triangle_buffer, float* barycentric_weight,
60 | int nver, int ntri,
61 | int h, int w);
62 |
63 | void _render_colors_core(
64 | unsigned char* image, unsigned char *face_mask, float* vertices, int* triangles,
65 | float* colors,
66 | float* depth_buffer,
67 | int ntri,
68 | int h, int w, int c);
69 |
70 | void _render_texture_core(
71 | float* image, float* vertices, int* triangles,
72 | float* texture, float* tex_coords, int* tex_triangles,
73 | float* depth_buffer,
74 | int nver, int tex_nver, int ntri,
75 | int h, int w, int c,
76 | int tex_h, int tex_w, int tex_c,
77 | int mapping_type);
78 |
79 | #endif
--------------------------------------------------------------------------------
/utils/cython/mesh_core_cython.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | from libcpp.string cimport string
4 |
5 | # use the Numpy-C-API from Cython
6 | np.import_array()
7 |
8 | # cdefine the signature of our c function
9 | cdef extern from "mesh_core.h":
10 | void _rasterize_triangles_core(
11 | float* vertices, int* triangles,
12 | float* depth_buffer, int* triangle_buffer, float* barycentric_weight,
13 | int nver, int ntri,
14 | int h, int w)
15 |
16 | void _render_colors_core(
17 | unsigned char* image, unsigned char *face_mask, float* vertices, int* triangles,
18 | float* colors,
19 | float* depth_buffer,
20 | int ntri,
21 | int h, int w, int c)
22 |
23 | void _render_texture_core(
24 | float* image, float* vertices, int* triangles,
25 | float* texture, float* tex_coords, int* tex_triangles,
26 | float* depth_buffer,
27 | int nver, int tex_nver, int ntri,
28 | int h, int w, int c,
29 | int tex_h, int tex_w, int tex_c,
30 | int mapping_type)
31 |
32 | void _get_normal_core(
33 | float* normal, float* tri_normal, int* triangles,
34 | int ntri)
35 |
36 | void _write_obj_with_colors_texture(string filename, string mtl_name,
37 | float* vertices, int* triangles, float* colors, float* uv_coords,
38 | int nver, int ntri, int ntexver)
39 |
40 | def get_normal_core(np.ndarray[float, ndim=2, mode = "c"] normal not None,
41 | np.ndarray[float, ndim=2, mode = "c"] tri_normal not None,
42 | np.ndarray[int, ndim=2, mode="c"] triangles not None,
43 | int ntri
44 | ):
45 | _get_normal_core(
46 | np.PyArray_DATA(normal), np.PyArray_DATA(tri_normal), np.PyArray_DATA(triangles),
47 | ntri)
48 |
49 | def rasterize_triangles_core(
50 | np.ndarray[float, ndim=2, mode = "c"] vertices not None,
51 | np.ndarray[int, ndim=2, mode="c"] triangles not None,
52 | np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
53 | np.ndarray[int, ndim=2, mode = "c"] triangle_buffer not None,
54 | np.ndarray[float, ndim=2, mode = "c"] barycentric_weight not None,
55 | int nver, int ntri,
56 | int h, int w
57 | ):
58 | _rasterize_triangles_core(
59 | np.PyArray_DATA(vertices), np.PyArray_DATA(triangles),
60 | np.PyArray_DATA(depth_buffer), np.PyArray_DATA(triangle_buffer), np.PyArray_DATA(barycentric_weight),
61 | nver, ntri,
62 | h, w)
63 |
64 | def render_colors_core(np.ndarray[unsigned char, ndim=1, mode = "c"] image not None,
65 | np.ndarray[unsigned char, ndim=1, mode = "c"] face_mask not None,
66 | np.ndarray[float, ndim=1, mode = "c"] vertices not None,
67 | np.ndarray[int, ndim=1, mode="c"] triangles not None,
68 | np.ndarray[float, ndim=1, mode = "c"] colors not None,
69 | np.ndarray[float, ndim=1, mode = "c"] depth_buffer not None,
70 | int ntri,
71 | int h, int w, int c
72 | ):
73 | _render_colors_core(
74 | np.PyArray_DATA(image), np.PyArray_DATA(face_mask), np.PyArray_DATA(vertices), np.PyArray_DATA(triangles),
75 | np.PyArray_DATA(colors),
76 | np.PyArray_DATA(depth_buffer),
77 | ntri,
78 | h, w, c)
79 |
80 | def render_texture_core(np.ndarray[float, ndim=3, mode = "c"] image not None,
81 | np.ndarray[float, ndim=2, mode = "c"] vertices not None,
82 | np.ndarray[int, ndim=2, mode="c"] triangles not None,
83 | np.ndarray[float, ndim=3, mode = "c"] texture not None,
84 | np.ndarray[float, ndim=2, mode = "c"] tex_coords not None,
85 | np.ndarray[int, ndim=2, mode="c"] tex_triangles not None,
86 | np.ndarray[float, ndim=2, mode = "c"] depth_buffer not None,
87 | int nver, int tex_nver, int ntri,
88 | int h, int w, int c,
89 | int tex_h, int tex_w, int tex_c,
90 | int mapping_type
91 | ):
92 | _render_texture_core(
93 | np.PyArray_DATA(image), np.PyArray_DATA(vertices), np.PyArray_DATA(triangles),
94 | np.PyArray_DATA(texture), np.PyArray_DATA(tex_coords), np.PyArray_DATA(tex_triangles),
95 | np.PyArray_DATA(depth_buffer),
96 | nver, tex_nver, ntri,
97 | h, w, c,
98 | tex_h, tex_w, tex_c,
99 | mapping_type)
100 |
--------------------------------------------------------------------------------
/utils/cython/setup.py:
--------------------------------------------------------------------------------
1 | '''
2 | python setup.py build_ext -i
3 | to compile
4 | '''
5 |
6 | # setup.py
7 | from distutils.core import setup, Extension
8 | from Cython.Build import cythonize
9 | from Cython.Distutils import build_ext
10 | from distutils.sysconfig import get_python_lib
11 | import numpy
12 |
13 | setup(
14 | name='mesh_core_cython',
15 | cmdclass={'build_ext': build_ext},
16 | ext_modules=[Extension("mesh_core_cython",
17 | sources=["mesh_core_cython.pyx", "mesh_core.cpp"],
18 | language='c++',
19 | include_dirs=[get_python_lib(), numpy.get_include()])],
20 | )
21 |
--------------------------------------------------------------------------------
/utils/reconstruct_mesh.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | # input: coeff with shape [1,257]
5 | def Split_coeff(coeff):
6 | id_coeff = coeff[:, :80] # identity(shape) coeff of dim 80
7 | ex_coeff = coeff[:, 80:144] # expression coeff of dim 64
8 | tex_coeff = coeff[:, 144:224] # texture(albedo) coeff of dim 80
9 | angles = coeff[:, 224:227] # ruler angles(x,y,z) for rotation of dim 3
10 | gamma = coeff[:, 227:254] # lighting coeff for 3 channel SH function of dim 27
11 | translation = coeff[:, 254:] # translation coeff of dim 3
12 |
13 | return id_coeff, ex_coeff, tex_coeff, angles, gamma, translation
14 |
15 |
16 | # compute face shape with identity and expression coeff, based on BFM model
17 | # input: id_coeff with shape [1,80]
18 | # ex_coeff with shape [1,64]
19 | # output: face_shape with shape [1,N,3], N is number of vertices
20 | def Shape_formation(id_coeff, ex_coeff, facemodel):
21 | face_shape = np.einsum('ij,aj->ai', facemodel.idBase, id_coeff) + \
22 | np.einsum('ij,aj->ai', facemodel.exBase, ex_coeff) + \
23 | facemodel.meanshape
24 |
25 | face_shape = np.reshape(face_shape, [1, -1, 3])
26 | # re-center face shape
27 | face_shape = face_shape - np.mean(np.reshape(facemodel.meanshape, [1, -1, 3]), axis=1, keepdims=True)
28 |
29 | return face_shape
30 |
31 |
32 | # compute vertex normal using one-ring neighborhood
33 | # input: face_shape with shape [1,N,3]
34 | # output: v_norm with shape [1,N,3]
35 | def Compute_norm(face_shape, facemodel):
36 | face_id = facemodel.tri # vertex index for each triangle face, with shape [F,3], F is number of faces
37 | point_id = facemodel.point_buf # adjacent face index for each vertex, with shape [N,8], N is number of vertex
38 | shape = face_shape
39 | face_id = (face_id - 1).astype(np.int32)
40 | point_id = (point_id - 1).astype(np.int32)
41 | v1 = shape[:, face_id[:, 0], :]
42 | v2 = shape[:, face_id[:, 1], :]
43 | v3 = shape[:, face_id[:, 2], :]
44 | e1 = v1 - v2
45 | e2 = v2 - v3
46 | face_norm = np.cross(e1, e2) # compute normal for each face
47 | face_norm = np.concatenate([face_norm, np.zeros([1, 1, 3])],
48 | axis=1) # concat face_normal with a zero vector at the end
49 | v_norm = np.sum(face_norm[:, point_id, :], axis=2) # compute vertex normal using one-ring neighborhood
50 | v_norm = v_norm / np.expand_dims(np.linalg.norm(v_norm, axis=2), 2) # normalize normal vectors
51 |
52 | return v_norm
53 |
54 |
55 | # compute vertex texture(albedo) with tex_coeff
56 | # input: tex_coeff with shape [1,N,3]
57 | # output: face_texture with shape [1,N,3], RGB order, range from 0-255
58 | def Texture_formation(tex_coeff, facemodel):
59 | face_texture = np.einsum('ij,aj->ai', facemodel.texBase, tex_coeff) + facemodel.meantex
60 | face_texture = np.reshape(face_texture, [1, -1, 3])
61 |
62 | return face_texture
63 |
64 |
65 | # compute rotation matrix based on 3 ruler angles
66 | # input: angles with shape [1,3]
67 | # output: rotation matrix with shape [1,3,3]
68 | def Compute_rotation_matrix(angles):
69 | angle_x = angles[:, 0][0]
70 | angle_y = angles[:, 1][0]
71 | angle_z = angles[:, 2][0]
72 |
73 | # compute rotation matrix for X,Y,Z axis respectively
74 | rotation_X = np.array([1.0, 0, 0, \
75 | 0, np.cos(angle_x), -np.sin(angle_x), \
76 | 0, np.sin(angle_x), np.cos(angle_x)])
77 | rotation_Y = np.array([np.cos(angle_y), 0, np.sin(angle_y), \
78 | 0, 1, 0, \
79 | -np.sin(angle_y), 0, np.cos(angle_y)])
80 | rotation_Z = np.array([np.cos(angle_z), -np.sin(angle_z), 0, \
81 | np.sin(angle_z), np.cos(angle_z), 0, \
82 | 0, 0, 1])
83 |
84 | rotation_X = np.reshape(rotation_X, [1, 3, 3])
85 | rotation_Y = np.reshape(rotation_Y, [1, 3, 3])
86 | rotation_Z = np.reshape(rotation_Z, [1, 3, 3])
87 |
88 | rotation = np.matmul(np.matmul(rotation_Z, rotation_Y), rotation_X)
89 | rotation = np.transpose(rotation, axes=[0, 2, 1]) # transpose row and column (dimension 1 and 2)
90 |
91 | return rotation
92 |
93 |
94 | # project 3D face onto image plane
95 | # input: face_shape with shape [1,N,3]
96 | # rotation with shape [1,3,3]
97 | # translation with shape [1,3]
98 | # output: face_projection with shape [1,N,2]
99 | # z_buffer with shape [1,N,1]
100 | def Projection_layer(face_shape, rotation, translation, focal=1015.0,
101 | center=112.0): # we choose the focal length and camera position empirically
102 |
103 | camera_pos = np.reshape(np.array([0.0, 0.0, 10.0]), [1, 1, 3]) # camera position
104 | reverse_z = np.reshape(np.array([1.0, 0, 0, 0, 1, 0, 0, 0, -1.0]), [1, 3, 3])
105 |
106 | p_matrix = np.concatenate([[focal], [0.0], [center], [0.0], [focal], [center], [0.0], [0.0], [1.0]],
107 | axis=0) # projection matrix
108 | p_matrix = np.reshape(p_matrix, [1, 3, 3])
109 |
110 | # calculate face position in camera space
111 | face_shape_r = np.matmul(face_shape, rotation)
112 | face_shape_t = face_shape_r + np.reshape(translation, [1, 1, 3])
113 | face_shape_t = np.matmul(face_shape_t, reverse_z) + camera_pos
114 |
115 | # calculate projection of face vertex using perspective projection
116 | aug_projection = np.matmul(face_shape_t, np.transpose(p_matrix, [0, 2, 1]))
117 | face_projection = aug_projection[:, :, 0:2] / np.reshape(aug_projection[:, :, 2], [1, np.shape(aug_projection)[1], 1])
118 | z_buffer = -np.reshape(aug_projection[:, :, 2], [1, -1, 1])
119 |
120 | return face_projection, z_buffer
121 |
122 |
123 | # compute vertex color using face_texture and SH function lighting approximation
124 | # input: face_texture with shape [1,N,3]
125 | # norm with shape [1,N,3]
126 | # gamma with shape [1,27]
127 | # output: face_color with shape [1,N,3], RGB order, range from 0-255
128 | # lighting with shape [1,N,3], color under uniform texture
129 | def Illumination_layer(face_texture, norm, gamma):
130 | # gamma = np.zeros(gamma.shape, dtype=gamma.dtype)
131 | num_vertex = np.shape(face_texture)[1]
132 |
133 | init_lit = np.array([0.8, 0, 0, 0, 0, 0, 0, 0, 0])
134 | gamma = np.reshape(gamma, [-1, 3, 9])
135 | gamma = gamma + np.reshape(init_lit, [1, 1, 9])
136 |
137 | # parameter of 9 SH function
138 | a0 = np.pi
139 | a1 = 2 * np.pi / np.sqrt(3.0)
140 | a2 = 2 * np.pi / np.sqrt(8.0)
141 | c0 = 1 / np.sqrt(4 * np.pi)
142 | c1 = np.sqrt(3.0) / np.sqrt(4 * np.pi)
143 | c2 = 3 * np.sqrt(5.0) / np.sqrt(12 * np.pi)
144 |
145 | Y0 = np.tile(np.reshape(a0 * c0, [1, 1, 1]), [1, num_vertex, 1])
146 | Y1 = np.reshape(-a1 * c1 * norm[:, :, 1], [1, num_vertex, 1])
147 | Y2 = np.reshape(a1 * c1 * norm[:, :, 2], [1, num_vertex, 1])
148 | Y3 = np.reshape(-a1 * c1 * norm[:, :, 0], [1, num_vertex, 1])
149 | Y4 = np.reshape(a2 * c2 * norm[:, :, 0] * norm[:, :, 1], [1, num_vertex, 1])
150 | Y5 = np.reshape(-a2 * c2 * norm[:, :, 1] * norm[:, :, 2], [1, num_vertex, 1])
151 | Y6 = np.reshape(a2 * c2 * 0.5 / np.sqrt(3.0) * (3 * np.square(norm[:, :, 2]) - 1), [1, num_vertex, 1])
152 | Y7 = np.reshape(-a2 * c2 * norm[:, :, 0] * norm[:, :, 2], [1, num_vertex, 1])
153 | Y8 = np.reshape(a2 * c2 * 0.5 * (np.square(norm[:, :, 0]) - np.square(norm[:, :, 1])), [1, num_vertex, 1])
154 |
155 | Y = np.concatenate([Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, Y8], axis=2)
156 |
157 | # Y shape:[batch,N,9].
158 |
159 | lit_r = np.squeeze(np.matmul(Y, np.expand_dims(gamma[:, 0, :], 2)), 2) # [batch,N,9] * [batch,9,1] = [batch,N]
160 | lit_g = np.squeeze(np.matmul(Y, np.expand_dims(gamma[:, 1, :], 2)), 2)
161 | lit_b = np.squeeze(np.matmul(Y, np.expand_dims(gamma[:, 2, :], 2)), 2)
162 |
163 | # shape:[batch,N,3]
164 | face_color = np.stack([lit_r * face_texture[:, :, 0], lit_g * face_texture[:, :, 1], lit_b * face_texture[:, :, 2]],
165 | axis=2)
166 | lighting = np.stack([lit_r, lit_g, lit_b], axis=2) * 128
167 |
168 | return face_color, lighting
169 |
170 |
171 | # face reconstruction with coeff and BFM model
172 | def Reconstruction(coeff, facemodel):
173 | id_coeff, ex_coeff, tex_coeff, angles, gamma, translation = Split_coeff(coeff)
174 | # compute face shape
175 | face_shape = Shape_formation(id_coeff, ex_coeff, facemodel)
176 | # compute vertex texture(albedo)
177 | face_texture = Texture_formation(tex_coeff, facemodel)
178 | # vertex normal
179 | face_norm = Compute_norm(face_shape, facemodel)
180 | # rotation matrix
181 | rotation = Compute_rotation_matrix(angles)
182 | face_norm_r = np.matmul(face_norm, rotation)
183 |
184 | # compute vertex projection on image plane (with image sized 224*224)
185 | face_projection, z_buffer = Projection_layer(face_shape, rotation, translation)
186 | face_projection = np.stack([face_projection[:, :, 0], 224 - face_projection[:, :, 1]], axis=2)
187 |
188 | # compute 68 landmark on image plane
189 | landmarks_2d = face_projection[:, facemodel.keypoints, :]
190 |
191 | # compute vertex color using SH function lighting approximation
192 | face_color, lighting = Illumination_layer(face_texture, face_norm_r, gamma)
193 |
194 | return face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, translation
195 |
196 |
197 | # face reconstruction with coeff and BFM model
198 | def Reconstruction_rotation(coeff, facemodel, angles):
199 | id_coeff, ex_coeff, tex_coeff, _, gamma, translation = Split_coeff(coeff)
200 | # compute face shape
201 | face_shape = Shape_formation(id_coeff, ex_coeff, facemodel)
202 | # compute vertex texture(albedo)
203 | face_texture = Texture_formation(tex_coeff, facemodel)
204 | # vertex normal
205 | face_norm = Compute_norm(face_shape, facemodel)
206 | # rotation matrix
207 | rotation = Compute_rotation_matrix(angles)
208 | face_norm_r = np.matmul(face_norm, rotation)
209 |
210 | # rotation matrix
211 | face_shape = np.matmul(face_shape, rotation)
212 |
213 | # compute vertex projection on image plane (with image sized 224*224)
214 | face_projection, z_buffer = Projection_layer(face_shape, rotation, translation)
215 | face_projection = np.stack([face_projection[:, :, 0], 224 - face_projection[:, :, 1]], axis=2)
216 |
217 | # compute 68 landmark on image plane
218 | landmarks_2d = face_projection[:, facemodel.keypoints, :]
219 |
220 | # compute vertex color using SH function lighting approximation
221 | face_color, lighting = Illumination_layer(face_texture, face_norm_r, gamma)
222 |
223 | return face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d
224 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import cv2
3 | import numpy as np
4 | import os
5 | import math
6 |
7 | alignment_handler = None
8 | dlib_detector = None
9 |
10 | def mkdir(dirname):
11 | if(not os.path.isdir(dirname)):
12 | os.makedirs(dirname)
13 |
14 | class MXDetectorHandler:
15 | '''
16 | face 2D landmark alignment by mxnet, refer to https://github.com/deepinx/deep-face-alignment
17 | '''
18 | def __init__(self, prefix, epoch, mx, name='model'):
19 | ctx_id = int(os.environ["CUDA_VISIBLE_DEVICES"])
20 | if (ctx_id >= 0):
21 | ctx = mx.gpu(ctx_id)
22 | else:
23 | ctx = mx.cpu()
24 |
25 | sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(prefix, name), epoch)
26 | all_layers = sym.get_internals()
27 | sym = all_layers['heatmap_output']
28 | image_size = (128, 128)
29 | self.image_size = image_size
30 | model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
31 | model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
32 | model.set_params(arg_params, aux_params)
33 | self.model = model
34 |
35 |
36 | def get_mxnet_sat_alignment(model_dir, image):
37 | '''
38 | Arguments:
39 | model_dir: The folder contains mxnet pretrained model.
40 | image: The image contains at least 1 face inside, we only detect the first face.
41 | Returns:
42 | image: The image input.
43 | img_landmarks: The 68 landmarks' coordinates in image.
44 | img: The face area expand by sat alignment, resize to out_img_size=224.
45 | lmk_cropped: The 68 landmarks' coordinates in img.
46 | center_x: the x position of the face center in image.
47 | center_y: the y position of the face center in image.
48 | ratio: The return image size / original face area size(before resize).
49 | '''
50 | global alignment_handler, dlib_detector
51 |
52 | if (alignment_handler is None):
53 | alignment_handler = MXDetectorHandler(prefix=model_dir, epoch=0, mx=mx, name='model-sat')
54 |
55 | import dlib
56 | if (dlib_detector is None):
57 | dlib_detector = dlib.get_frontal_face_detector()
58 |
59 | def crop_expand_dlib(image, rect, ratio=1.5):
60 | ## rect: [left, right, top, bottom]
61 | mean = [(rect[2] + rect[3]) / 2.0, (rect[0] + rect[1]) / 2.0]
62 | ## mean: [y, x]
63 | width = rect[1] - rect[0]
64 | height = rect[3] - rect[2]
65 |
66 | max_ratio = min([(image.shape[0] - mean[0])/(height/2), (image.shape[1] - mean[1])/(width/2), mean[0]/(height/2), mean[1]/(width/2)])
67 | if(max_ratio=0:
47 | ctx = mx.gpu(ctx_id)
48 | else:
49 | ctx = mx.cpu()
50 | sym, arg_params, aux_params = mx.model.load_checkpoint(os.path.join(prefix, "model"), epoch)
51 | all_layers = sym.get_internals()
52 | sym = all_layers['heatmap_output']
53 | image_size = (128, 128)
54 | self.image_size = image_size
55 | model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
56 | model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
57 | model.set_params(arg_params, aux_params)
58 | self.model = model
59 |
60 |
61 | def face_alignment(image):
62 | import mxnet as mx
63 | global alignment_handler
64 | global MXDetectorHandler_prefix
65 | if(alignment_handler is None):
66 | alignment_handler = MXDetectorHandler(prefix=MXDetectorHandler_prefix, epoch=0, ctx_id=-1, mx=mx)
67 |
68 | import dlib
69 | dlib_detector = dlib.get_frontal_face_detector()
70 |
71 | def crop_expand_dlib(image, rect, ratio=1.5):
72 | ## rect: [left, right, top, bottom]
73 | mean = [(rect[2] + rect[3]) // 2, (rect[0] + rect[1]) // 2]
74 | ## mean: [y, x]
75 | half_crop_size = int((rect[1] + rect[3] - rect[0] - rect[2]) * ratio // 4)
76 |
77 | # padding if the crop area outside of image.
78 | if (mean[0] - half_crop_size < 0):
79 | image = cv2.copyMakeBorder(image, 0, 0, half_crop_size - mean[0], 0, cv2.BORDER_CONSTANT, 0)
80 | if (mean[0] + half_crop_size > image.shape[1]):
81 | image = cv2.copyMakeBorder(image, 0, 0, 0, mean[0] + half_crop_size - image.shape[1], cv2.BORDER_CONSTANT, 0)
82 | if (mean[1] - half_crop_size < 0):
83 | image = cv2.copyMakeBorder(image, half_crop_size - mean[1], 0, 0, 0, cv2.BORDER_CONSTANT, 0)
84 | if (mean[1] + half_crop_size > image.shape[0]):
85 | image = cv2.copyMakeBorder(image, 0, mean[1] + half_crop_size - image.shape[0], 0, 0, cv2.BORDER_CONSTANT, 0)
86 |
87 | left = mean[1] - half_crop_size
88 | right = mean[1] + half_crop_size
89 | top = mean[0] - half_crop_size
90 | buttom = mean[0] + half_crop_size
91 |
92 | if (left < 0):
93 | left = 0
94 | if (top < 0):
95 | top = 0
96 |
97 | return image, [left, right, top, buttom]
98 |
99 | def crop_expand_alignment(img, xys, out_img_size=224, ratio=1.3):
100 | xys = np.array(map(lambda x: int(x), xys))
101 | max_x = max(xys[::2])
102 | max_y = max(xys[1::2])
103 | min_x = min(xys[::2])
104 | min_y = min(xys[1::2])
105 | width = int((max_x - min_x) * ratio)
106 | height = int((max_y - min_y) * ratio)
107 | height = width
108 |
109 | center_x = (max_x + min_x) // 2
110 | center_y = (max_y + min_y) // 2
111 |
112 | left = center_x - width / 2
113 | top = center_y - height / 2
114 | img = img[top:top + height, left:left + width]
115 |
116 | xys[::2] -= left
117 | xys[1::2] -= top
118 | xys[::2] = xys[::2] * out_img_size / width
119 | xys[1::2] = xys[1::2] * out_img_size / height
120 |
121 | img = cv2.resize(img, (out_img_size, out_img_size))
122 | xys = np.array(list(map(lambda x: float(x)/out_img_size, xys)))
123 |
124 | return img, xys
125 |
126 | img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
127 | rects = dlib_detector(img_gray, 0)
128 | if (len(rects) != 1):
129 | return None
130 |
131 | rect = [rects[0].left(), rects[0].right(), rects[0].top(), rects[0].bottom()]
132 | image, rect = crop_expand_dlib(image, rect) # dlib region is too small
133 | ## rect: [left, right, top, bottom]
134 |
135 | img = cv2.cvtColor(image[rect[2]:rect[3], rect[0]:rect[1]], cv2.COLOR_BGR2RGB)
136 | crop_width = img.shape[1]
137 | crop_height = img.shape[0]
138 |
139 | img = cv2.resize(img, (128, 128))
140 | img = np.transpose(img, (2, 0, 1)) # 3*128*128, RGB
141 | input_blob = np.zeros((1, 3, 128, 128), dtype=np.uint8)
142 | input_blob[0] = img
143 | data = mx.nd.array(input_blob)
144 | db = mx.io.DataBatch(data=(data,))
145 | alignment_handler.model.forward(db, is_train=False)
146 | alabel = alignment_handler.model.get_outputs()[-1].asnumpy()[0]
147 |
148 | img_landmarks = []
149 | for j in xrange(alabel.shape[0]):
150 | a = cv2.resize(alabel[j], (128, 128))
151 | ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
152 | ## ind: [y, x]
153 |
154 | origin_x = rect[0] + ind[1] * crop_width / 128
155 | origin_y = rect[2] + ind[0] * crop_height / 128
156 |
157 | img_landmarks.append(str(origin_x))
158 | img_landmarks.append(str(origin_y))
159 |
160 | image, img_landmarks = crop_expand_alignment(image, img_landmarks)
161 | return image, img_landmarks
162 |
163 | def test_atnet(config_path):
164 | global wav_file
165 | global img_path
166 | img = cv2.imread(img_path)
167 | example_img, example_lmk = face_alignment(img)
168 |
169 | params = YParams(config_path, 'default')
170 | sample_rate = params.mel['sample_rate']
171 | hop_step = params.mel['hop_step']
172 | win_length = params.mel['win_length']
173 | frame_rate = params.frame_rate
174 | mean = np.load(params.mean_file)
175 | component = np.load(params.components_file)
176 |
177 | example_lmk = np.dot((example_lmk - mean), component[:,:20])
178 | example_lmk *= np.array([1.5, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0,2.0,1.0,1.0, 1,1,1,1,1, 1,1,1,1,1])
179 | example_lmk = np.dot(example_lmk, component[:,:20].T)
180 |
181 | wav_loader = WavLoader(sr=sample_rate)
182 |
183 | pose = np.ones([1000,3], dtype=np.float32)*0.0
184 | ear = np.ones([1000,1], dtype=np.float32)*0.6
185 | ear[40:75,:] = np.ones([35,1], dtype=np.float32)*0.2
186 |
187 | pcm = wav_loader.get_data(wav_file)
188 |
189 | frame_wav_scale = sample_rate / frame_rate
190 | frame_mfcc_scale = frame_wav_scale / hop_step
191 |
192 | assert (frame_mfcc_scale - int(frame_mfcc_scale) == 0), "sample_rate/hop_step must divided by frame_rate."
193 |
194 | frame_mfcc_scale = int(frame_mfcc_scale)
195 | min_len = min(ear.shape[0], pose.shape[0], pcm.shape[0]//frame_wav_scale)
196 |
197 | g1 = tf.Graph()
198 | with g1.as_default():
199 |
200 | ear = tf.convert_to_tensor(ear[np.newaxis, :min_len, :], dtype=tf.float32)
201 | pose = tf.convert_to_tensor(pose[np.newaxis, :min_len, :], dtype=tf.float32)
202 | seq_len = tf.convert_to_tensor(np.array([min_len]), dtype=tf.int32)
203 | example_landmark = tf.convert_to_tensor(example_lmk[np.newaxis, :], dtype=tf.float32)
204 |
205 | pcm_length = hop_step * (min_len * frame_mfcc_scale - 1) + win_length
206 | if (pcm.shape[0] < pcm_length):
207 | pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
208 | elif(pcm.shape[0] > pcm_length):
209 | pcm = pcm[:pcm_length]
210 | mfcc = extract_mfcc(pcm[np.newaxis, :], params)
211 |
212 | atnet = ATNet(config_path)
213 | params = atnet.params
214 | params.batch_size = 1
215 | atnet.set_params(params)
216 |
217 | infer_nodes = atnet.build_inference_op(ear, pose, mfcc, example_landmark, seq_len)
218 |
219 | sess = tf.Session()
220 | sess.run(tf.global_variables_initializer())
221 | tf.train.Saver().restore(sess, 'ckpt_atnet/atnet-80000')
222 | lmk_seq = sess.run(infer_nodes['LandmarkDecoder'])
223 | save_lmkseq_video(lmk_seq, mean, "atnet.avi", wav_file)
224 |
225 | return example_img, example_lmk, lmk_seq
226 |
227 | def test_vgnet(config_path, example_img, example_landmark, lmk_seq):
228 | example_img = cv2.resize(example_img, (128, 128)).astype(np.float32)[np.newaxis, ...]
229 | example_img /= 256.0
230 | example_img = (example_img - 0.5) / 0.5
231 |
232 | params = YParams(config_path, 'default')
233 |
234 | g2 = tf.Graph()
235 | with g2.as_default():
236 | example_landmark = tf.convert_to_tensor(example_landmark[np.newaxis, :], dtype=tf.float32)
237 | example_img = tf.convert_to_tensor(example_img, dtype=tf.float32)
238 | seq_len = tf.convert_to_tensor(np.array([lmk_seq.shape[1]]), dtype=tf.int32)
239 | lmk_seq = tf.convert_to_tensor((lmk_seq), dtype=tf.float32)
240 |
241 | vgnet = VGNet(config_path)
242 | params = vgnet.params
243 | params.batch_size = 1
244 | vgnet.set_params(params)
245 |
246 | infer_nodes = vgnet.build_inference_op(lmk_seq, example_landmark, example_img, seq_len)
247 |
248 | sess = tf.Session(graph=g2)
249 | sess.run(tf.global_variables_initializer())
250 | tf.train.Saver().restore(sess, 'ckpt_vgnet/vgnet-70000')
251 | img_seq = sess.run(infer_nodes['Fake_img_seq'])
252 |
253 | save_imgseq_video(img_seq, "vgnet.mp4", wav_file)
254 |
255 |
256 | if (__name__ == '__main__'):
257 |
258 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
259 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
260 | help='the config yaml file')
261 |
262 | opts, argv = cmd_parser.parse_args()
263 |
264 | if (opts.config_path is None):
265 | logger.error('Please check your parameters.')
266 | exit(0)
267 |
268 | config_path = opts.config_path
269 |
270 | if (not os.path.exists(config_path)):
271 | logger.error('config_path not exists')
272 | exit(0)
273 |
274 | example_img, example_landmark, lmk_seq = test_atnet(config_path)
275 | test_vgnet(config_path, example_img, example_landmark, lmk_seq)
276 |
277 |
278 | # lmk_seq = []
279 | # example_image = None
280 | # example_landmark = None
281 | # params = YParams(config_path, 'default')
282 | # mean = np.load(params.mean_file)
283 | # component = np.load(params.components_file)
284 |
285 | # wav_file = '/Users/donglu/Downloads/cctv_cut.wav'
286 | # cap = cv2.VideoCapture('/Users/donglu/Downloads/cctv_cut.mp4')
287 | # if (cap.isOpened()):
288 | # success, image = cap.read()
289 | # idx = 0
290 | # while (success):
291 | # idx += 1
292 | # if(idx==100):
293 | # break
294 | # [h, w, c] = image.shape
295 | # if c > 3:
296 | # image = image[:, :, :3]
297 | # example_img, example_lmk = face_alignment(image)
298 | # example_lmk = np.dot((example_lmk - mean), component[:,:20])
299 | # example_lmk *= np.array([1.5, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0,2.0,1.0,1.0, 1,1,1,1,1, 1,1,1,1,1])
300 | # example_lmk = np.dot(example_lmk, component[:,:20].T)
301 | # if(example_image is None):
302 | # example_image = example_img
303 | # if(example_landmark is None):
304 | # example_landmark = example_lmk
305 | # lmk_seq.append(example_lmk)
306 |
307 | # success, image = cap.read()
308 | # cap.release()
309 | # lmk_seq = np.array(lmk_seq)[np.newaxis,...]
310 | # save_lmkseq_video(lmk_seq, mean, "atnet.avi", wav_file)
311 |
312 | # test_vgnet(config_path, example_image, example_landmark, lmk_seq)
313 |
314 |
315 |
--------------------------------------------------------------------------------
/voicepuppet/atvgnet/plot.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import numpy as np
4 | import os
5 | import cv2
6 | import subprocess
7 |
8 |
9 | def strokeline_lookup():
10 | '''
11 | the strokeline index of 68 points.
12 | '''
13 | Mouth = [[48, 49], [49, 50], [50, 51], [51, 52], [52, 53], [53, 54], [54, 55], [55, 56], [56, 57], \
14 | [57, 58], [58, 59], [59, 48], [60, 61], [61, 62], [62, 63], [63, 64], [64, 65], [65, 66], \
15 | [66, 67], [67, 60]]
16 |
17 | Nose = [[27, 28], [28, 29], [29, 30], [30, 31], [30, 35], [31, 32], [32, 33], \
18 | [33, 34], [34, 35], [27, 31], [27, 35]]
19 |
20 | leftBrow = [[17, 18], [18, 19], [19, 20], [20, 21]]
21 | rightBrow = [[22, 23], [23, 24], [24, 25], [25, 26]]
22 |
23 | leftEye = [[36, 37], [37, 38], [38, 39], [39, 40], [40, 41], [36, 41]]
24 | rightEye = [[42, 43], [43, 44], [44, 45], [45, 46], [46, 47], [42, 47]]
25 |
26 | other = [[0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], \
27 | [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], \
28 | [12, 13], [13, 14], [14, 15], [15, 16]]
29 |
30 | faceLmarkLookups = []
31 | faceLmarkLookups.append(Mouth)
32 | faceLmarkLookups.append(Nose)
33 | faceLmarkLookups.append(leftBrow)
34 | faceLmarkLookups.append(rightBrow)
35 | faceLmarkLookups.append(leftEye)
36 | faceLmarkLookups.append(rightEye)
37 | faceLmarkLookups.append(other)
38 | return faceLmarkLookups
39 |
40 |
41 | def plot_lmk_seq(save_dir, step, mean, seq_len, real_lmk_seq, lmk_seq):
42 | '''
43 | merge 128x128 images to a large 9*10 grid picture.
44 | '''
45 |
46 | ## 9*10 block
47 | block_x = 10
48 | block_y = 9
49 | img_size = 128
50 |
51 | faceLmarkLookups = strokeline_lookup()
52 |
53 | def merge_seq(lmk_seq, big_img, time, h_index):
54 |
55 | for i in range(time):
56 | back_img = np.ones((img_size, img_size), dtype=np.uint8) * 255
57 | lmk = (((lmk_seq[0, i, ...] + mean)/2+0.5) * img_size).astype(np.int32)
58 | for k in range(68):
59 | cv2.circle(back_img, (int(lmk[k * 2]), int(lmk[k * 2 + 1])), 1, [0], -1)
60 |
61 | for part in faceLmarkLookups:
62 | for idx in part:
63 | cv2.line(back_img, (int(lmk[idx[0] * 2]), int(lmk[idx[0] * 2 + 1])),
64 | (int(lmk[idx[1] * 2]), int(lmk[idx[1] * 2 + 1])), (0), 1)
65 |
66 | big_img[(i // block_x + h_index) * img_size: (i // block_x + h_index + 1) * img_size,
67 | (i % block_x) * img_size: (i % block_x + 1) * img_size] = back_img
68 |
69 | return big_img
70 |
71 | ### We only pick the first sequence of the batch, trim length of 30.
72 | if (seq_len[0] > 30):
73 | time = 30
74 | else:
75 | time = seq_len[0]
76 |
77 | big_img = np.zeros((img_size * block_y, img_size * block_x), dtype=np.uint8)
78 | big_img = merge_seq(real_lmk_seq, big_img, time, 0)
79 | big_img = merge_seq(lmk_seq, big_img, time, 3)
80 |
81 | cv2.imwrite('{}/atnet_{}.jpg'.format(save_dir, step), big_img)
82 |
83 |
84 | def plot_image_seq(save_dir, step, mean, seq_len, real_lmk_seq, real_mask_seq, real_img_seq, fake_img_seq,
85 | attention_seq):
86 | '''
87 | merge 2 sequence of image and attention map to a large image (9*10 grid picture).
88 | '''
89 |
90 | ## 9*10 block
91 | block_x = 10
92 | block_y = 9
93 | img_size = real_img_seq.shape[2]
94 |
95 | ### We only pick the first sequence of the batch, trim length of 30.
96 | if (seq_len[0] > 30):
97 | time = 30
98 | else:
99 | time = seq_len[0]
100 |
101 | big_img = 255 * np.ones((img_size * block_y, img_size * block_x, 4), dtype=np.uint8)
102 |
103 | for i in range(time):
104 | real_img = (((real_img_seq[0, i, ...] * 0.5) + 0.5) * 256).astype(np.uint8)
105 | fake_img = (((fake_img_seq[0, i, ...] * 0.5) + 0.5) * 256).astype(np.uint8)
106 | real_mask = (((real_mask_seq[0, i, ...] + 1) / 2) * 255).astype(np.uint8)
107 | attention_img = (attention_seq[0, i, ...] * 256).astype(np.uint8)
108 |
109 | lmk = (((real_lmk_seq[0, i, ...] + mean)/2+0.5) * img_size).astype(np.int32)
110 | for k in range(68):
111 | cv2.circle(real_img, (int(lmk[k * 2]), int(lmk[k * 2 + 1])), 1, [255, 255, 0], 1)
112 |
113 | real_img = np.concatenate([real_img, real_mask], axis=-1)
114 |
115 | big_img[i // block_x * img_size: (i // block_x + 1) * img_size,
116 | (i % block_x) * img_size: (i % block_x + 1) * img_size,
117 | :] = real_img
118 |
119 | big_img[(i // block_x + 3) * img_size: (i // block_x + 1 + 3) * img_size,
120 | (i % block_x) * img_size: (i % block_x + 1) * img_size,
121 | :-1] = fake_img
122 |
123 | big_img[(i // block_x + 6) * img_size: (i // block_x + 1 + 6) * img_size,
124 | (i % block_x) * img_size: (i % block_x + 1) * img_size,
125 | :] = cv2.merge((attention_img, attention_img, attention_img, attention_img))
126 |
127 | cv2.imwrite('{}/vgnet_{}.png'.format(save_dir, step), big_img)
128 |
129 |
130 | def save_lmkseq_video(lmk_seq, mean, output_file, wav_file=None):
131 | img_size = 480
132 | seq_len = lmk_seq.shape[1]
133 | fourcc = cv2.VideoWriter_fourcc(*'MJPG')
134 | output_movie = cv2.VideoWriter('temp.avi', fourcc, 25, (img_size, img_size), isColor=False)
135 | faceLmarkLookups = strokeline_lookup()
136 |
137 | for i in range(seq_len):
138 | back_img = np.ones((img_size, img_size), dtype=np.uint8) * 255
139 | lmk = (((lmk_seq[0, i, ...] + mean)/2+0.5) * img_size).astype(np.int32)
140 | for k in range(68):
141 | cv2.circle(back_img, (int(lmk[k * 2]), int(lmk[k * 2 + 1])), 1, [0], -1)
142 |
143 | for part in faceLmarkLookups:
144 | for idx in part:
145 | cv2.line(back_img, (int(lmk[idx[0] * 2]), int(lmk[idx[0] * 2 + 1])),
146 | (int(lmk[idx[1] * 2]), int(lmk[idx[1] * 2 + 1])), (0), 1)
147 |
148 | output_movie.write(back_img)
149 |
150 | if (wav_file is not None):
151 | cmd = 'ffmpeg -y -i temp.avi -i ' + wav_file + ' -c:v copy -c:a aac -strict experimental ' + output_file
152 | subprocess.call(cmd, shell=True)
153 | os.remove('temp.avi')
154 |
155 |
156 | def save_imgseq_video(img_seq, output_file, wav_file=None):
157 | def mkdir(dirname):
158 | if not os.path.isdir(dirname):
159 | os.makedirs(dirname)
160 |
161 | img_size = 128
162 | seq_len = img_seq.shape[1]
163 | mkdir('temp')
164 |
165 | for i in range(seq_len):
166 | real_img = (((img_seq[0, i, ...] * 0.5) + 0.5) * 256).astype(np.uint8)
167 | cv2.imwrite('temp/{}.jpg'.format(i), real_img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
168 |
169 | if (wav_file is not None):
170 | cmd = 'ffmpeg -i temp/%d.jpg -i ' + wav_file + ' -c:v libx264 -c:a aac -strict experimental -y -vf format=yuv420p ' + output_file
171 | subprocess.call(cmd, shell=True)
172 | cmd = 'rm -rf temp temp.avi'
173 | subprocess.call(cmd, shell=True)
174 |
--------------------------------------------------------------------------------
/voicepuppet/atvgnet/test_atnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | """Test for ATNet architectures."""
4 | from __future__ import absolute_import
5 | from __future__ import division
6 | from __future__ import print_function
7 |
8 | from optparse import OptionParser
9 | import tensorflow as tf
10 | import numpy as np
11 | import os
12 | from atnet import ATNet
13 | from tinynet import MfccNet
14 |
15 |
16 | class ArchitectureTest(tf.test.TestCase):
17 |
18 | def testATNet(self):
19 | config_path = 'config/params.yml'
20 | with tf.Graph().as_default():
21 | time = 100
22 |
23 | ### ATNet setting
24 | atnet = ATNet(config_path)
25 | params = atnet.params
26 | params.batch_size = 2
27 | atnet.set_params(params)
28 |
29 | seq_len = np.random.uniform(1, 100, params.batch_size).astype(np.int32)
30 | time = max(seq_len)
31 |
32 | ## landmark: [batch_size, time, 68*2]
33 | landmark = tf.random.uniform([params.batch_size, time, params.landmark_size], minval=-1, maxval=1,
34 | dtype=tf.float32)
35 | ## ears: [batch_size, 1]
36 | ears = tf.random.uniform([params.batch_size, time, 1], minval=0, maxval=1, dtype=tf.float32)
37 | ## poses: [batch_size, 3]
38 | poses = tf.random.uniform([params.batch_size, time, 3], minval=-1, maxval=1, dtype=tf.float32)
39 | ## mfccs: [batch_size, time*frame_mfcc_scale, num_mel_bins]
40 | mfccs = tf.random.uniform([params.batch_size, time * 5, 80], dtype=tf.float32)
41 | ## example_landmark: [batch_size, 68*2]
42 | example_landmark = tf.random.uniform([params.batch_size, params.landmark_size], minval=-1, maxval=1,
43 | dtype=tf.float32)
44 | ## seq_len: [batch_size], in rational size
45 | seq_len = tf.convert_to_tensor(seq_len, dtype=tf.int32)
46 |
47 | def check_nodes(nodes):
48 | ## Test input tensor
49 | self.assertAllEqual(nodes['Landmark'].shape, landmark.shape.as_list())
50 | self.assertAllEqual(nodes['Ears'].shape, ears.shape.as_list())
51 | self.assertAllEqual(nodes['Poses'].shape, poses.shape.as_list())
52 | self.assertAllEqual(nodes['Mfccs'].shape, mfccs.shape.as_list())
53 | self.assertAllEqual(nodes['Example_landmark'].shape, example_landmark.shape.as_list())
54 | self.assertAllEqual(nodes['Seq_len'].shape, seq_len.shape.as_list())
55 |
56 | ## Test MfccEncoder output tensor
57 | self.assertAllEqual(nodes['MfccEncoder'].shape, [params.batch_size, time, params.encode_embedding_size])
58 | ## Test LandmarkEncoder output tensor
59 | self.assertAllEqual(nodes['LandmarkEncoder'].shape, [params.batch_size, time, params.encode_embedding_size])
60 | ## Test PoseEncoder output tensor
61 | self.assertAllEqual(nodes['PoseEncoder'].shape, [params.batch_size, time, params.encode_embedding_size])
62 | ## Test RNNModule output tensor
63 | self.assertAllEqual(nodes['RNNModule'].shape, [params.batch_size, time, params.rnn_hidden_size])
64 | ## Test LandmarkDecoder output tensor
65 | self.assertAllEqual(nodes['LandmarkDecoder'].shape, [params.batch_size, time, params.landmark_size])
66 |
67 | ## Test LandmarkDecoder output value range
68 | self.assertAllGreaterEqual(nodes['LandmarkDecoder'], -2)
69 | self.assertAllLessEqual(nodes['LandmarkDecoder'], 2)
70 |
71 | ################## 1. Test train stage ##################
72 | nodes = atnet.build_train_op(landmark, ears, poses, mfccs, example_landmark, seq_len)
73 | with self.session() as sess:
74 | sess.run(tf.global_variables_initializer())
75 | result = sess.run([nodes['Landmark'], nodes['Ears'], nodes['Poses'], nodes['Mfccs'], nodes['Example_landmark'],
76 | nodes['Seq_len'], nodes['MfccEncoder'], nodes['LandmarkEncoder'], nodes['PoseEncoder'],
77 | nodes['RNNModule'], nodes['LandmarkDecoder']])
78 |
79 | nodes = {}
80 | nodes.update({'Landmark': result[0]})
81 | nodes.update({'Ears': result[1]})
82 | nodes.update({'Poses': result[2]})
83 | nodes.update({'Mfccs': result[3]})
84 | nodes.update({'Example_landmark': result[4]})
85 | nodes.update({'Seq_len': result[5]})
86 | nodes.update({'MfccEncoder': result[6]})
87 | nodes.update({'LandmarkEncoder': result[7]})
88 | nodes.update({'PoseEncoder': result[8]})
89 | nodes.update({'RNNModule': result[9]})
90 | nodes.update({'LandmarkDecoder': result[10]})
91 | check_nodes(nodes)
92 |
93 | ################## 2. Test evaluate stage ##################
94 | nodes = atnet.build_eval_op(landmark, ears, poses, mfccs, example_landmark, seq_len)
95 | with self.session() as sess:
96 | sess.run(tf.global_variables_initializer())
97 | result = sess.run([nodes['Landmark'], nodes['Ears'], nodes['Poses'], nodes['Mfccs'], nodes['Example_landmark'],
98 | nodes['Seq_len'], nodes['MfccEncoder'], nodes['LandmarkEncoder'], nodes['PoseEncoder'],
99 | nodes['RNNModule'], nodes['LandmarkDecoder']])
100 |
101 | nodes = {}
102 | nodes.update({'Landmark': result[0]})
103 | nodes.update({'Ears': result[1]})
104 | nodes.update({'Poses': result[2]})
105 | nodes.update({'Mfccs': result[3]})
106 | nodes.update({'Example_landmark': result[4]})
107 | nodes.update({'Seq_len': result[5]})
108 | nodes.update({'MfccEncoder': result[6]})
109 | nodes.update({'LandmarkEncoder': result[7]})
110 | nodes.update({'PoseEncoder': result[8]})
111 | nodes.update({'RNNModule': result[9]})
112 | nodes.update({'LandmarkDecoder': result[10]})
113 | check_nodes(nodes)
114 |
115 |
116 | if (__name__ == '__main__'):
117 | tf.test.main()
118 |
--------------------------------------------------------------------------------
/voicepuppet/atvgnet/test_vgnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | """Test for ATNet architectures."""
4 | from __future__ import absolute_import
5 | from __future__ import division
6 | from __future__ import print_function
7 |
8 | import tensorflow as tf
9 | import numpy as np
10 | import os
11 | import random
12 | from vgnet import VGNet
13 |
14 |
15 | class ArchitectureTest(tf.test.TestCase):
16 |
17 | def testVGNet(self):
18 | config_path = 'config/params.yml'
19 | with tf.Graph().as_default():
20 | img_size = 128
21 |
22 | ### VGNet setting
23 | vgnet = VGNet(config_path)
24 | params = vgnet.params
25 | params.batch_size = 2
26 | vgnet.set_params(params)
27 |
28 | seq_len = np.random.uniform(1, 100, params.batch_size).astype(np.int32)
29 | time = max(seq_len)
30 |
31 | ## real_landmark_seq: [batch_size, time, 68*2]
32 | real_landmark_seq = tf.random.uniform([params.batch_size, time, params.landmark_size], minval=-1, maxval=1,
33 | dtype=tf.float32)
34 | ## real_mask_seq: [batch_size, time, img_size, img_size, 1]
35 | real_mask_seq = tf.random.uniform([params.batch_size, time, img_size, img_size, 1], minval=0, maxval=1,
36 | dtype=tf.float32)
37 | ## real_img_seq: [batch_size, time, img_size, img_size, 3]
38 | real_img_seq = tf.random.uniform([params.batch_size, time, img_size, img_size, 3], minval=-1, maxval=1,
39 | dtype=tf.float32)
40 | ## example_landmark: [batch_size, 68*2]
41 | example_landmark = tf.random.uniform([params.batch_size, params.landmark_size], minval=-1, maxval=1,
42 | dtype=tf.float32)
43 | ## example_img: [batch_size, img_size, img_size, 3]
44 | example_img = tf.random.uniform([params.batch_size, img_size, img_size, 3], minval=-1, maxval=1, dtype=tf.float32)
45 | ## seq_len: [batch_size], in rational size
46 | seq_len = tf.convert_to_tensor(seq_len, dtype=tf.int32)
47 |
48 | def check_nodes(nodes):
49 | ## Test input tensors' shape
50 | self.assertAllEqual(nodes['Real_landmark_seq'].shape, real_landmark_seq.shape.as_list())
51 | self.assertAllEqual(nodes['Real_mask_seq'].shape, real_mask_seq.shape.as_list())
52 | self.assertAllEqual(nodes['Real_img_seq'].shape, real_img_seq.shape.as_list())
53 | self.assertAllEqual(nodes['Example_landmark'].shape, example_landmark.shape.as_list())
54 | self.assertAllEqual(nodes['Example_img'].shape, example_img.shape.as_list())
55 | self.assertAllEqual(nodes['Seq_len'].shape, seq_len.shape.as_list())
56 |
57 | ## Test Discriminator tensors' shape
58 | self.assertAllEqual(nodes['Discriminator']['Real_node']['Discriminator']['Decision'].shape, [params.batch_size])
59 | self.assertAllEqual(nodes['Discriminator']['Real_node']['Discriminator']['LandmarkSeq'].shape,
60 | [params.batch_size, time, params.landmark_size])
61 | self.assertAllEqual(nodes['Discriminator']['Fake_node']['Discriminator']['Decision'].shape,
62 | [params.batch_size])
63 | self.assertAllEqual(nodes['Discriminator']['Fake_node']['Discriminator']['LandmarkSeq'].shape,
64 | [params.batch_size, time, params.landmark_size])
65 | self.assertAllEqual(nodes['Discriminator']['Generator_node']['Generator']['Color'].shape,
66 | [params.batch_size, time, img_size, img_size, 3])
67 | self.assertAllEqual(nodes['Discriminator']['Generator_node']['Generator']['Attention'].shape,
68 | [params.batch_size, time, img_size, img_size, 1])
69 | self.assertAllEqual(nodes['Discriminator']['Generator_node']['Generator']['Feature'].shape,
70 | [params.batch_size, time, img_size, img_size, 3])
71 |
72 | ## Test Generator tensors' shape
73 | self.assertAllEqual(nodes['Generator']['Discriminator_node']['Discriminator']['Decision'].shape,
74 | [params.batch_size])
75 | self.assertAllEqual(nodes['Generator']['Discriminator_node']['Discriminator']['LandmarkSeq'].shape,
76 | [params.batch_size, time, params.landmark_size])
77 | self.assertAllEqual(nodes['Generator']['Generator_node']['Generator']['Color'].shape,
78 | [params.batch_size, time, img_size, img_size, 3])
79 | self.assertAllEqual(nodes['Generator']['Generator_node']['Generator']['Attention'].shape,
80 | [params.batch_size, time, img_size, img_size, 1])
81 | self.assertAllEqual(nodes['Generator']['Generator_node']['Generator']['Feature'].shape,
82 | [params.batch_size, time, img_size, img_size, 3])
83 |
84 | ## Test input tensors' value range
85 | self.assertAllGreaterEqual(nodes['Real_landmark_seq'], -1)
86 | self.assertAllLessEqual(nodes['Real_landmark_seq'], 1)
87 | self.assertAllGreaterEqual(nodes['Real_mask_seq'], 0)
88 | self.assertAllLessEqual(nodes['Real_mask_seq'], 1)
89 | self.assertAllGreaterEqual(nodes['Real_img_seq'], -1)
90 | self.assertAllLessEqual(nodes['Real_img_seq'], 1)
91 | self.assertAllGreaterEqual(nodes['Example_landmark'], -1)
92 | self.assertAllLessEqual(nodes['Example_landmark'], 1)
93 | self.assertAllGreaterEqual(nodes['Example_img'], -1)
94 | self.assertAllLessEqual(nodes['Example_img'], 1)
95 | self.assertAllGreaterEqual(nodes['Seq_len'], 1)
96 | self.assertAllLessEqual(nodes['Seq_len'], time)
97 |
98 | ## Test Discriminator tensors' value range
99 | self.assertAllGreaterEqual(nodes['Discriminator']['Real_node']['Discriminator']['Decision'], 0)
100 | self.assertAllLessEqual(nodes['Discriminator']['Real_node']['Discriminator']['Decision'], 1)
101 | self.assertAllGreaterEqual(nodes['Discriminator']['Real_node']['Discriminator']['LandmarkSeq'], -2)
102 | self.assertAllLessEqual(nodes['Discriminator']['Real_node']['Discriminator']['LandmarkSeq'], 2)
103 | self.assertAllGreaterEqual(nodes['Discriminator']['Fake_node']['Discriminator']['Decision'], 0)
104 | self.assertAllLessEqual(nodes['Discriminator']['Fake_node']['Discriminator']['Decision'], 1)
105 | self.assertAllGreaterEqual(nodes['Discriminator']['Fake_node']['Discriminator']['LandmarkSeq'], -2)
106 | self.assertAllLessEqual(nodes['Discriminator']['Fake_node']['Discriminator']['LandmarkSeq'], 2)
107 | self.assertAllGreaterEqual(nodes['Discriminator']['Generator_node']['Generator']['Color'], -1)
108 | self.assertAllLessEqual(nodes['Discriminator']['Generator_node']['Generator']['Color'], 1)
109 | self.assertAllGreaterEqual(nodes['Discriminator']['Generator_node']['Generator']['Attention'], 0)
110 | self.assertAllLessEqual(nodes['Discriminator']['Generator_node']['Generator']['Attention'], 1)
111 | self.assertAllGreaterEqual(nodes['Discriminator']['Generator_node']['Generator']['Feature'], -1)
112 | self.assertAllLessEqual(nodes['Discriminator']['Generator_node']['Generator']['Feature'], 1)
113 |
114 | ## Test Generator tensors' value range
115 | self.assertAllGreaterEqual(nodes['Generator']['Discriminator_node']['Discriminator']['Decision'], 0)
116 | self.assertAllLessEqual(nodes['Generator']['Discriminator_node']['Discriminator']['Decision'], 1)
117 | self.assertAllGreaterEqual(nodes['Generator']['Discriminator_node']['Discriminator']['LandmarkSeq'], -2)
118 | self.assertAllLessEqual(nodes['Generator']['Discriminator_node']['Discriminator']['LandmarkSeq'], 2)
119 | self.assertAllGreaterEqual(nodes['Generator']['Generator_node']['Generator']['Color'], -1)
120 | self.assertAllLessEqual(nodes['Generator']['Generator_node']['Generator']['Color'], 1)
121 | self.assertAllGreaterEqual(nodes['Generator']['Generator_node']['Generator']['Attention'], 0)
122 | self.assertAllLessEqual(nodes['Generator']['Generator_node']['Generator']['Attention'], 1)
123 | self.assertAllGreaterEqual(nodes['Generator']['Generator_node']['Generator']['Feature'], -1)
124 | self.assertAllLessEqual(nodes['Generator']['Generator_node']['Generator']['Feature'], 1)
125 |
126 | def walkDict(aDict, key_list, value_list, path=()):
127 | ## visit the nodes dict into key and value list, while keep the hierarchy
128 | for k in aDict:
129 | if type(aDict[k]) != dict:
130 | if ('_grads' in k or '_tvars' in k):
131 | continue
132 | key_list.append(path + (k,))
133 | value_list.append(aDict[k])
134 | else:
135 | walkDict(aDict[k], key_list, value_list, path + (k,))
136 |
137 | ################## 1. Test train stage ##################
138 | nodes = vgnet.build_train_op(real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img,
139 | seq_len)
140 |
141 | with self.session() as sess:
142 | sess.run(tf.global_variables_initializer())
143 | ## visit the nodes dict into key and value list, while keep the hierarchy
144 | key_list = []
145 | value_list = []
146 | walkDict(nodes, key_list, value_list)
147 |
148 | result = sess.run(value_list)
149 |
150 | ## replace the tensor in nodes by numpy matrix after sess.run
151 | for i, tensor in enumerate(result):
152 | node = nodes
153 | for key in key_list[i]:
154 | node = node[key]
155 | node = tensor
156 |
157 | ## test the nodes' shapes and values
158 | check_nodes(nodes)
159 |
160 | ################## 2. Test evaluate stage ##################
161 | nodes = vgnet.build_eval_op(real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img,
162 | seq_len)
163 | with self.session() as sess:
164 | sess.run(tf.global_variables_initializer())
165 | ## visit the nodes dict into key and value list, while keep the hierarchy
166 | key_list = []
167 | value_list = []
168 | walkDict(nodes, key_list, value_list)
169 |
170 | result = sess.run(value_list)
171 |
172 | ## replace the tensor in nodes by numpy matrix after sess.run
173 | for i, tensor in enumerate(result):
174 | node = nodes
175 | for key in key_list[i]:
176 | node = node[key]
177 | node = tensor
178 |
179 | ## test the nodes' shapes and values
180 | check_nodes(nodes)
181 |
182 |
183 | if (__name__ == '__main__'):
184 | tf.test.main()
185 |
--------------------------------------------------------------------------------
/voicepuppet/atvgnet/train_atnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | from atnet import ATNet
9 | from dataset.generator import ATNetDataGenerator
10 | from plot import *
11 |
12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | def mkdir(dirname):
17 | if not os.path.isdir(dirname):
18 | os.makedirs(dirname)
19 |
20 |
21 | if (__name__ == '__main__'):
22 |
23 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
24 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
25 | help='the config yaml file')
26 |
27 | opts, argv = cmd_parser.parse_args()
28 |
29 | if (opts.config_path is None):
30 | logger.error('Please check your parameters.')
31 | exit(0)
32 |
33 | config_path = opts.config_path
34 |
35 | if (not os.path.exists(config_path)):
36 | logger.error('config_path not exists')
37 | exit(0)
38 |
39 | os.environ["CUDA_VISIBLE_DEVICES"] = '1'
40 |
41 | batch_size = 16
42 | ### Generator for training setting
43 | train_generator = ATNetDataGenerator(config_path)
44 | params = train_generator.params
45 | params.dataset_path = params.train_dataset_path
46 | params.batch_size = batch_size
47 | train_generator.set_params(params)
48 | train_dataset = train_generator.get_dataset()
49 |
50 | ### Generator for evaluation setting
51 | eval_generator = ATNetDataGenerator(config_path)
52 | params = eval_generator.params
53 | params.dataset_path = params.eval_dataset_path
54 | params.batch_size = batch_size
55 | eval_generator.set_params(params)
56 | eval_dataset = eval_generator.get_dataset()
57 |
58 | sess = tf.Session()
59 | tf.train.start_queue_runners(sess=sess)
60 |
61 | train_iter = train_dataset.make_one_shot_iterator()
62 | eval_iter = eval_dataset.make_one_shot_iterator()
63 |
64 | ### ATNet setting
65 | atnet = ATNet(config_path)
66 | params = atnet.params
67 | epochs = params.training['epochs']
68 | params.add_hparam('max_to_keep', 10)
69 | params.add_hparam('save_dir', 'ckpt_atnet')
70 | params.add_hparam('save_name', 'atnet')
71 | params.add_hparam('save_step', 1000)
72 | params.add_hparam('eval_step', 1000)
73 | params.add_hparam('summary_step', 100)
74 | params.add_hparam('eval_visual_dir', 'log/eval_atnet')
75 | params.add_hparam('summary_dir', 'log/summary_atnet')
76 | params.batch_size = batch_size
77 | atnet.set_params(params)
78 | mean = np.load(params.mean_file)
79 |
80 | mkdir(params.save_dir)
81 | mkdir(params.eval_visual_dir)
82 | mkdir(params.summary_dir)
83 |
84 | train_nodes = atnet.build_train_op(*train_iter.get_next())
85 | eval_nodes = atnet.build_eval_op(*eval_iter.get_next())
86 | sess.run(tf.global_variables_initializer())
87 |
88 | # Restore from save_dir
89 | if ('checkpoint' in os.listdir(params.save_dir)):
90 | tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir))
91 |
92 | tf.summary.scalar("loss", train_nodes['Loss'])
93 | tf.summary.scalar("lr", train_nodes['Lr'])
94 | grads = train_nodes['Grads']
95 | tvars = train_nodes['Tvars']
96 | # Add histograms for gradients.
97 | for i, grad in enumerate(grads):
98 | if grad is not None:
99 | var = tvars[i]
100 | if ('BatchNorm' not in var.op.name):
101 | tf.summary.histogram(var.op.name + '/gradients', grad)
102 |
103 | merge_summary_op = tf.summary.merge_all()
104 | summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
105 |
106 | for i in range(epochs):
107 | ### Run training
108 | result = sess.run([train_nodes['Train_op'],
109 | merge_summary_op,
110 | train_nodes['Loss'],
111 | train_nodes['Lr'],
112 | train_nodes['Global_step'],
113 | train_nodes['Mfccs'],
114 | train_nodes['Poses'],
115 | train_nodes['Ears'],
116 | train_nodes['Seq_len'],
117 | train_nodes['Landmark'],
118 | train_nodes['Example_landmark']])
119 | _, summary, loss, lr, global_step, mfccs, poses, ears, seq_len, landmark, example_landmark = result
120 | print('Step {}: Loss= {:.3f}, Lr= {:.2e}'.format(global_step, loss, lr))
121 |
122 | if (global_step % params.summary_step == 0):
123 | summary_writer.add_summary(summary, global_step)
124 |
125 | ### Run evaluation
126 | if (global_step % params.eval_step == 0):
127 | result = sess.run([eval_nodes['Loss'],
128 | eval_nodes['Seq_len'],
129 | eval_nodes['Landmark'],
130 | eval_nodes['LandmarkDecoder']])
131 | loss, seq_len, real_lmk_seq, lmk_seq = result
132 |
133 | print('\r\nEvaluation >>> Loss= {:.3f}'.format(loss))
134 | plot_lmk_seq(params.eval_visual_dir, global_step, mean, seq_len, real_lmk_seq, lmk_seq)
135 |
136 | ### Save checkpoint
137 | if (global_step % params.save_step == 0):
138 | tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
139 | os.path.join(params.save_dir,
140 | params.save_name),
141 | global_step=global_step)
142 |
--------------------------------------------------------------------------------
/voicepuppet/atvgnet/train_vgnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from vgnet import VGNet
7 | from dataset.generator import VGNetDataGenerator
8 | from optparse import OptionParser
9 | import logging
10 | from plot import *
11 |
12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | def mkdir(dirname):
17 | if not os.path.isdir(dirname):
18 | os.makedirs(dirname)
19 |
20 |
21 | if (__name__ == '__main__'):
22 |
23 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
24 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
25 | help='the config yaml file')
26 |
27 | opts, argv = cmd_parser.parse_args()
28 |
29 | if (opts.config_path is None):
30 | logger.error('Please check your parameters.')
31 | exit(0)
32 |
33 | config_path = opts.config_path
34 |
35 | if (not os.path.exists(config_path)):
36 | logger.error('config_path not exists')
37 | exit(0)
38 |
39 | os.environ["CUDA_VISIBLE_DEVICES"] = '2'
40 |
41 | batch_size = 4
42 | ### Generator for training setting
43 | train_generator = VGNetDataGenerator(config_path)
44 | params = train_generator.params
45 | params.dataset_path = params.train_dataset_path
46 | params.batch_size = batch_size
47 | train_generator.set_params(params)
48 | train_dataset = train_generator.get_dataset()
49 |
50 | ### Generator for evaluation setting
51 | eval_generator = VGNetDataGenerator(config_path)
52 | params = eval_generator.params
53 | params.dataset_path = params.eval_dataset_path
54 | params.batch_size = batch_size
55 | eval_generator.set_params(params)
56 | eval_dataset = eval_generator.get_dataset()
57 |
58 | sess = tf.Session()
59 | tf.train.start_queue_runners(sess=sess)
60 |
61 | train_iter = train_dataset.make_one_shot_iterator()
62 | eval_iter = eval_dataset.make_one_shot_iterator()
63 |
64 | ### VGNet setting
65 | vgnet = VGNet(config_path)
66 | params = vgnet.params
67 | epochs = params.training['epochs']
68 | params.add_hparam('max_to_keep', 10)
69 | params.add_hparam('save_dir', 'ckpt_vgnet')
70 | params.add_hparam('save_name', 'vgnet')
71 | params.add_hparam('save_step', 1000)
72 | params.add_hparam('eval_step', 1000)
73 | params.add_hparam('summary_step', 100)
74 | params.add_hparam('alternative', 1000)
75 | params.add_hparam('eval_visual_dir', 'log/eval_vgnet')
76 | params.add_hparam('summary_dir', 'log/summary_vgnet')
77 | params.batch_size = batch_size
78 | vgnet.set_params(params)
79 | mean = np.load(params.mean_file)
80 |
81 | mkdir(params.save_dir)
82 | mkdir(params.eval_visual_dir)
83 | mkdir(params.summary_dir)
84 |
85 | train_nodes = vgnet.build_train_op(*train_iter.get_next())
86 | eval_nodes = vgnet.build_eval_op(*eval_iter.get_next())
87 | sess.run(tf.global_variables_initializer())
88 |
89 | # Restore from save_dir
90 | if ('checkpoint' in os.listdir(params.save_dir)):
91 | tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir, latest_filename=None))
92 |
93 | # Add summary when training
94 | discriminator_summary = []
95 | discriminator_summary.append(tf.summary.scalar("real_bce_loss", train_nodes['Discriminator']['Real_bce_loss']))
96 | discriminator_summary.append(tf.summary.scalar("real_lmk_loss", train_nodes['Discriminator']['Real_lmk_loss']))
97 | discriminator_summary.append(tf.summary.scalar("fake_bce_loss", train_nodes['Discriminator']['Fake_bce_loss']))
98 | discriminator_summary.append(tf.summary.scalar("fake_lmk_loss", train_nodes['Discriminator']['Fake_lmk_loss']))
99 | discriminator_summary.append(
100 | tf.summary.scalar("discriminator_loss", train_nodes['Discriminator']['Discriminator_loss']))
101 |
102 | generator_summary = []
103 | generator_summary.append(tf.summary.scalar("bce_loss", train_nodes['Generator']['Bce_loss']))
104 | generator_summary.append(tf.summary.scalar("lmk_loss", train_nodes['Generator']['Lmk_loss']))
105 | generator_summary.append(tf.summary.scalar("pix_loss", train_nodes['Generator']['Pix_loss']))
106 | generator_summary.append(tf.summary.scalar("generator_loss", train_nodes['Generator']['Generator_loss']))
107 |
108 | # Add gradient to summary
109 | grads = train_nodes['Discriminator_grads']
110 | tvars = train_nodes['Discriminator_tvars']
111 | for i, grad in enumerate(grads):
112 | if grad is not None:
113 | var = tvars[i]
114 | if('BatchNorm' not in var.name):
115 | discriminator_summary.append(tf.summary.histogram(var.op.name + '/gradients', grad))
116 |
117 | grads = train_nodes['Generator_grads']
118 | tvars = train_nodes['Generator_tvars']
119 | for i, grad in enumerate(grads):
120 | if grad is not None:
121 | var = tvars[i]
122 | if('BatchNorm' not in var.name):
123 | generator_summary.append(tf.summary.histogram(var.op.name + '/gradients', grad))
124 |
125 | discriminator_summary_op = tf.summary.merge(discriminator_summary)
126 | generator_summary_op = tf.summary.merge(generator_summary)
127 | lr_summary_op = tf.summary.scalar("lr", train_nodes['Lr'])
128 |
129 | summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
130 |
131 | # Run epoch
132 | for i in range(epochs):
133 | if ((i // params.alternative) % 2 == 0):
134 | ### Run discriminator training
135 | result = sess.run([train_nodes['Train_discriminator'],
136 | discriminator_summary_op,
137 | train_nodes['Lr'],
138 | train_nodes['Global_step'],
139 | train_nodes['Discriminator']['Real_bce_loss'],
140 | train_nodes['Discriminator']['Real_lmk_loss'],
141 | train_nodes['Discriminator']['Fake_bce_loss'],
142 | train_nodes['Discriminator']['Fake_lmk_loss'],
143 | train_nodes['Discriminator']['Discriminator_loss']])
144 | _, summary, lr, global_step, real_bce_loss, real_lmk_loss, fake_bce_loss, fake_lmk_loss, discriminator_loss = result
145 | print(
146 | 'Step {}: Lr= {:.2e}, Discriminator_loss= {:.3f}, [Real_bce_loss= {:.3f}, Real_lmk_loss= {:.3f}, Fake_bce_loss= {:.3f}, Fake_lmk_loss= {:.3f}]'.format(
147 | global_step, lr, discriminator_loss, real_bce_loss, real_lmk_loss, fake_bce_loss, fake_lmk_loss))
148 |
149 | else:
150 | ### Run generator training
151 | result = sess.run([train_nodes['Train_generator'],
152 | generator_summary_op,
153 | train_nodes['Lr'],
154 | train_nodes['Global_step'],
155 | train_nodes['Generator']['Bce_loss'],
156 | train_nodes['Generator']['Lmk_loss'],
157 | train_nodes['Generator']['Pix_loss'],
158 | train_nodes['Generator']['Generator_loss']])
159 | _, summary, lr, global_step, bce_loss, lmk_loss, pix_loss, generator_loss = result
160 | print(
161 | 'Step {}: Lr= {:.2e}, Generator_loss= {:.3f}, [Bce_loss= {:.3f}, Lmk_loss= {:.3f}, Pix_loss= {:.3f}]'.format(
162 | global_step, lr,
163 | generator_loss,
164 | bce_loss, lmk_loss, pix_loss))
165 |
166 | if (global_step % params.summary_step == 0):
167 | summary_writer.add_summary(summary, global_step)
168 |
169 | ### Run evaluation
170 | if (global_step % params.eval_step == 0):
171 | result = sess.run([eval_nodes['Real_landmark_seq'],
172 | eval_nodes['Real_mask_seq'],
173 | eval_nodes['Real_img_seq'],
174 | eval_nodes['Example_landmark'],
175 | eval_nodes['Example_img'],
176 | eval_nodes['Seq_len'],
177 | eval_nodes['Generator']['Fake_img_seq'],
178 | eval_nodes['Generator']['Attention'],
179 | eval_nodes['Generator']['Generator_loss'],
180 | eval_nodes['Discriminator']['Discriminator_loss']])
181 | real_landmark_seq, real_mask_seq, real_img_seq, example_landmark, example_img, seq_len, fake_img_seq, attention, generator_loss, discriminator_loss = result
182 |
183 | print('\r\nEvaluation >>> Generator_loss= {:.3f}, Discriminator_loss= {:.3f}'.format(generator_loss,
184 | discriminator_loss))
185 | plot_image_seq(params.eval_visual_dir, global_step, mean, seq_len, real_landmark_seq, real_mask_seq, real_img_seq,
186 | fake_img_seq, attention)
187 |
188 | ### Save checkpoint
189 | if (global_step % params.save_step == 0):
190 | tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
191 | os.path.join(params.save_dir,
192 | params.save_name),
193 | global_step=global_step)
194 |
--------------------------------------------------------------------------------
/voicepuppet/bfmnet/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "tinynet",
10 | srcs = ["tinynet.py"],
11 | deps = [
12 | ],
13 | )
14 |
15 | py_library(
16 | name = "bfmnet",
17 | srcs = ["bfmnet.py"],
18 | deps = [
19 | "//config:configure",
20 | ":tinynet",
21 | "//voicepuppet:builder"
22 | ],
23 | )
24 |
25 | py_binary(
26 | name = "train_bfmnet",
27 | srcs = ["train_bfmnet.py"],
28 | deps = [
29 | "//utils:bfm_load_data",
30 | "//utils:bfm_visual",
31 | "//utils:reconstruct_mesh",
32 | "//utils:utils",
33 | ":bfmnet",
34 | "//generator:generator"
35 | ],
36 | )
37 |
38 | py_binary(
39 | name = "infer_bfmnet",
40 | srcs = ["infer_bfmnet.py"],
41 | deps = [
42 | "//utils:bfm_load_data",
43 | "//utils:bfm_visual",
44 | "//utils:reconstruct_mesh",
45 | "//utils:utils",
46 | ":bfmnet",
47 | "//generator:generator",
48 | "//generator:loader"
49 | ],
50 | )
51 |
--------------------------------------------------------------------------------
/voicepuppet/bfmnet/infer_bfmnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | import subprocess
9 | from generator.loader import *
10 | from bfmnet import BFMNet
11 | from generator.generator import DataGenerator
12 | from utils.bfm_load_data import *
13 | from utils.bfm_visual import *
14 | from utils.utils import *
15 | import scipy
16 |
17 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18 | logger = logging.getLogger(__name__)
19 |
20 | # #########################################################################################################
21 | # facemodel = BFM('../allmodels')
22 | # def visual_3dface(root, name):
23 | # mkdir('output')
24 | # for file in os.listdir('output'):
25 | # os.system('rm -rf output/{}'.format(file))
26 |
27 | # bfmcoeff_loader = BFMCoeffLoader()
28 | # bfm_coeff_seq = bfmcoeff_loader.get_data(os.path.join(root, 'bfmcoeff.txt'))
29 | # audio_file = os.path.join(root, 'audio.wav')
30 | # id_coeff = np.mean(bfm_coeff_seq[:, :80], 0)
31 |
32 | # for i in range(bfm_coeff_seq.shape[0]):
33 | # bfm_coeff_seq[i, :80] = id_coeff
34 |
35 | # for i in range(bfm_coeff_seq.shape[0]):
36 | # face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, _ = Reconstruction(
37 | # bfm_coeff_seq[i:i + 1, ...], facemodel)
38 | # if(i>300):
39 | # break
40 | # shape = np.squeeze(face_shape, (0))
41 | # color = np.squeeze(face_color, (0))
42 | # color = np.clip(color, 0, 255).astype(np.int32)
43 | # shape[:, :2] = 112 - shape[:, :2] * 112
44 | # shape *=3
45 |
46 | # img_size = 672
47 | # new_image = np.zeros((img_size * img_size * 3), dtype=np.uint8)
48 | # face_mask = np.zeros((img_size * img_size), dtype=np.uint8)
49 |
50 | # vertices = shape.reshape(-1).astype(np.float32).copy()
51 | # triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
52 | # colors = color.reshape(-1).astype(np.float32).copy()
53 | # depth_buffer = (np.zeros((img_size * img_size)) - 99999.0).astype(np.float32)
54 | # mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
55 | # facemodel.tri.shape[0], img_size, img_size, 3)
56 | # new_image = new_image.reshape([img_size, img_size, 3])
57 |
58 | # new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
59 |
60 | # cv2.imwrite('output/{}.jpg'.format(i), new_image)
61 | # print(i)
62 |
63 | # cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y {}'.format(name)
64 | # subprocess.call(cmd, shell=True)
65 |
66 | # root = '/media/dong/DiskData/gridcorpus/todir/vid1'
67 | # for folder in os.listdir(root):
68 | # name = os.path.join(root, folder+'.mp4')
69 | # visual_3dface(os.path.join(root, folder), name)
70 | # sys.exit(0)
71 | # #########################################################################################################
72 |
73 | def alignto_bfm_coeff(model_dir, img, xys):
74 | from PIL import Image
75 | import tensorflow as tf
76 |
77 | def load_graph(graph_filename):
78 | with tf.gfile.GFile(graph_filename, 'rb') as f:
79 | graph_def = tf.GraphDef()
80 | graph_def.ParseFromString(f.read())
81 |
82 | return graph_def
83 |
84 | # read standard landmarks for preprocessing images
85 | lm3D = load_lm3d(model_dir)
86 |
87 | # build reconstruction model
88 | with tf.Graph().as_default() as graph, tf.device('/cpu:0'):
89 | images = tf.placeholder(name='input_imgs', shape=[None, 224, 224, 3], dtype=tf.float32)
90 | graph_def = load_graph(os.path.join(model_dir, "FaceReconModel.pb"))
91 | tf.import_graph_def(graph_def, name='resnet', input_map={'input_imgs:0': images})
92 |
93 | # output coefficients of R-Net (dim = 257)
94 | coeff = graph.get_tensor_by_name('resnet/coeff:0')
95 |
96 | with tf.Session() as sess:
97 | ps = map(lambda x: int(x), xys)
98 |
99 | left_eye_x = int(round((ps[72] + ps[74] + ps[76] + ps[78] + ps[80] + ps[82]) / 6))
100 | left_eye_y = int(round((ps[73] + ps[75] + ps[77] + ps[79] + ps[81] + ps[83]) / 6))
101 | right_eye_x = int(round((ps[84] + ps[86] + ps[88] + ps[90] + ps[92] + ps[94]) / 6))
102 | right_eye_y = int(round((ps[85] + ps[87] + ps[89] + ps[91] + ps[93] + ps[95]) / 6))
103 | nose_x = int(round(ps[60]))
104 | nose_y = int(round(ps[61]))
105 | left_mouse_x = int(round(ps[96]))
106 | left_mouse_y = int(round(ps[97]))
107 | right_mouse_x = int(round(ps[108]))
108 | right_mouse_y = int(round(ps[109]))
109 |
110 | lmk5 = np.array(
111 | [[left_eye_x, left_eye_y], [right_eye_x, right_eye_y], [nose_x, nose_y], [left_mouse_x, left_mouse_y],
112 | [right_mouse_x, right_mouse_y]])
113 |
114 | image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
115 | # preprocess input image
116 | input_img, lm_new, transform_params = Preprocess(image, lmk5, lm3D)
117 | bfmcoeff = sess.run(coeff, feed_dict={images: input_img})
118 | return bfmcoeff, input_img, transform_params
119 |
120 |
121 | if (__name__ == '__main__'):
122 |
123 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
124 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
125 | help='the config yaml file')
126 |
127 | opts, argv = cmd_parser.parse_args()
128 |
129 | if (opts.config_path is None):
130 | logger.error('Please check your parameters.')
131 | exit(0)
132 |
133 | config_path = opts.config_path
134 |
135 | if (not os.path.exists(config_path)):
136 | logger.error('config_path not exists')
137 | exit(0)
138 |
139 | image_file, audio_file = argv
140 |
141 | os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
142 | mkdir('output')
143 | for file in os.listdir('output'):
144 | os.system('rm -rf output/{}'.format(file))
145 |
146 | batch_size = 1
147 | ### Generator for inference setting
148 | infer_generator = DataGenerator(config_path)
149 | params = infer_generator.params
150 | params.batch_size = batch_size
151 | infer_generator.set_params(params)
152 | wav_loader = WavLoader(sr=infer_generator.sample_rate)
153 | pcm = wav_loader.get_data(audio_file)
154 |
155 | pad_len = int(1 + pcm.shape[0] / infer_generator.frame_wav_scale)
156 | # calculate the rational length of pcm in order to keep the alignment of mfcc and landmark sequence.
157 | pcm_length = infer_generator.hop_step * (pad_len * infer_generator.frame_mfcc_scale - 1) + infer_generator.win_length
158 | if (pcm.shape[0] < pcm_length):
159 | pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
160 | pcm_slice = pcm[:pcm_length][np.newaxis, :]
161 |
162 | ears = np.ones([1, pad_len, 1], dtype=np.float32)*0.9
163 | for i in range(pad_len//2):
164 | ears[0, i, 0] = 0.2
165 | ears = tf.convert_to_tensor(ears, dtype=tf.float32)
166 | mfcc = infer_generator.extract_mfcc(pcm_slice)
167 | img = cv2.imread(image_file)
168 |
169 | _, _, img_cropped, lmk_cropped, center_x, center_y, ratio = get_mxnet_sat_alignment(params.model_dir, img)
170 | bfmcoeff, input_img, transform_params = alignto_bfm_coeff(params.model_dir, img_cropped, lmk_cropped)
171 | ratio *= transform_params[2]
172 | tx = -int(round(transform_params[3] / ratio))
173 | ty = -int(round(transform_params[4] / ratio))
174 |
175 | seq_len = tf.convert_to_tensor([pad_len], dtype=tf.int32)
176 |
177 | config = tf.ConfigProto()
178 | config.gpu_options.allow_growth = True
179 | sess = tf.Session(config=config)
180 |
181 | ### BFMNet setting
182 | bfmnet = BFMNet(config_path)
183 | params = bfmnet.params
184 | params.batch_size = batch_size
185 | bfmnet.set_params(params)
186 | facemodel = BFM(params.model_dir)
187 |
188 | infer_nodes = bfmnet.build_inference_op(ears, mfcc, seq_len)
189 | sess.run(tf.global_variables_initializer())
190 |
191 | # Restore from save_dir
192 | tf.train.Saver().restore(sess, 'ckpt_bfmnet/bfmnet-65000')
193 |
194 | ### Run inference
195 | bfm_coeff_seq = sess.run(infer_nodes['BFMCoeffDecoder'])
196 | bfmcoeff = np.tile(bfmcoeff[:, np.newaxis, :], [1, bfm_coeff_seq.shape[1], 1])
197 |
198 | bfm_coeff_seq = np.concatenate([bfmcoeff[:, :, :80], bfm_coeff_seq[:, :, :], bfmcoeff[:, :, 144:]], axis=2)
199 | merge_images = []
200 |
201 | ### step 2: generate tuple image sequence
202 | angles = np.array([[0, 0, 0]], dtype=np.float32)
203 | shift = 0.04
204 | for i in range(bfm_coeff_seq.shape[1]):
205 | angles[0][1] += shift
206 | if (angles[0][1] > 0.8 or angles[0][1] < -0.8):
207 | shift = -shift
208 |
209 | face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d, _ = Reconstruction(
210 | bfm_coeff_seq[0, i:i + 1, ...], facemodel)
211 |
212 | shape = np.squeeze(face_shape, (0))
213 | color = np.squeeze(face_color, (0))
214 | color = np.clip(color, 0, 255).astype(np.int32)
215 | shape[:, :2] = 112 - shape[:, :2] * 112
216 | shape *=3
217 |
218 | img_size = 672
219 | new_image = np.zeros((img_size * img_size * 3), dtype=np.uint8)
220 | face_mask = np.zeros((img_size * img_size), dtype=np.uint8)
221 |
222 | vertices = shape.reshape(-1).astype(np.float32).copy()
223 | triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
224 | colors = color.reshape(-1).astype(np.float32).copy()
225 | depth_buffer = (np.zeros((img_size * img_size)) - 99999.0).astype(np.float32)
226 | mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
227 | facemodel.tri.shape[0], img_size, img_size, 3)
228 | new_image = new_image.reshape([img_size, img_size, 3])
229 |
230 | new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
231 |
232 | cv2.imwrite('output/{}.jpg'.format(i), new_image)
233 |
234 | cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp.mp4'
235 | subprocess.call(cmd, shell=True)
236 |
--------------------------------------------------------------------------------
/voicepuppet/bfmnet/train_bfmnet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | import sys
9 |
10 | sys.path.append(os.path.join(os.getcwd(), 'generator'))
11 | sys.path.append(os.path.join(os.getcwd(), 'utils'))
12 |
13 | from bfmnet import BFMNet
14 | from generator import BFMNetDataGenerator
15 | from bfm_load_data import *
16 | from bfm_visual import *
17 | from utils import *
18 |
19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20 | logger = logging.getLogger(__name__)
21 |
22 |
23 | if (__name__ == '__main__'):
24 |
25 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
26 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
27 | help='the config yaml file')
28 |
29 | opts, argv = cmd_parser.parse_args()
30 |
31 | if (opts.config_path is None):
32 | logger.error('Please check your parameters.')
33 | exit(0)
34 |
35 | config_path = opts.config_path
36 |
37 | if (not os.path.exists(config_path)):
38 | logger.error('config_path not exists')
39 | exit(0)
40 |
41 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
42 |
43 | batch_size = 4
44 | ### Generator for training setting
45 | train_generator = BFMNetDataGenerator(config_path)
46 | params = train_generator.params
47 | params.dataset_path = params.train_dataset_path
48 | params.batch_size = batch_size
49 | train_generator.set_params(params)
50 | train_dataset = train_generator.get_dataset()
51 |
52 | ### Generator for evaluation setting
53 | eval_generator = BFMNetDataGenerator(config_path)
54 | params = eval_generator.params
55 | params.dataset_path = params.eval_dataset_path
56 | params.batch_size = batch_size
57 | eval_generator.set_params(params)
58 | eval_dataset = eval_generator.get_dataset()
59 |
60 | config = tf.ConfigProto()
61 | config.gpu_options.allow_growth = True
62 | sess = tf.Session(config=config)
63 |
64 | tf.train.start_queue_runners(sess=sess)
65 |
66 | train_iter = train_dataset.make_one_shot_iterator()
67 | eval_iter = eval_dataset.make_one_shot_iterator()
68 |
69 | ### BFMNet setting
70 | bfmnet = BFMNet(config_path)
71 | params = bfmnet.params
72 | epochs = params.training['epochs']
73 | params.add_hparam('max_to_keep', 10)
74 | params.add_hparam('save_dir', 'ckpt_bfmnet')
75 | params.add_hparam('save_name', 'bfmnet')
76 | params.add_hparam('save_step', 5000)
77 | params.add_hparam('eval_step', 1000)
78 | # params.add_hparam('summary_step', 1000)
79 | params.add_hparam('eval_visual_dir', 'log/eval_bfmnet')
80 | # params.add_hparam('summary_dir', 'log/summary_bfmnet')
81 | params.batch_size = batch_size
82 | bfmnet.set_params(params)
83 | facemodel = BFM(params.model_dir)
84 |
85 | mkdir(params.save_dir)
86 | mkdir(params.eval_visual_dir)
87 | # mkdir(params.summary_dir)
88 |
89 | train_nodes = bfmnet.build_train_op(*train_iter.get_next())
90 | eval_nodes = bfmnet.build_eval_op(*eval_iter.get_next())
91 | sess.run(tf.global_variables_initializer())
92 |
93 | # Restore from save_dir
94 | if ('checkpoint' in os.listdir(params.save_dir)):
95 | print('Restore from {}\n'.format(params.save_dir))
96 | tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir))
97 |
98 | # tf.summary.scalar("loss", train_nodes['Loss'])
99 | # tf.summary.scalar("lr", train_nodes['Lr'])
100 | # grads = train_nodes['Grads']
101 | # tvars = train_nodes['Tvars']
102 | # # Add histograms for gradients.
103 | # for i, grad in enumerate(grads):
104 | # if grad is not None:
105 | # var = tvars[i]
106 | # if ('BatchNorm' not in var.op.name):
107 | # tf.summary.histogram(var.op.name + '/gradients', grad)
108 |
109 | # merge_summary_op = tf.summary.merge_all()
110 | # summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
111 |
112 | for i in range(epochs):
113 | ### Run training
114 | result = sess.run([train_nodes['Train_op'],
115 | # merge_summary_op,
116 | train_nodes['Loss'],
117 | train_nodes['Lr'],
118 | train_nodes['Global_step'],
119 | train_nodes['Mfccs'],
120 | train_nodes['Seq_len'],
121 | train_nodes['BFM_coeff_seq'],
122 | train_nodes['Ears']])
123 | _, loss, lr, global_step, mfccs, seq_len, bfm_coeff_seq, ears = result
124 | print('Step {}: Loss= {:.3f}, Lr= {:.2e}'.format(global_step, loss, lr))
125 |
126 | # if (global_step % params.summary_step == 0):
127 | # summary_writer.add_summary(summary, global_step)
128 |
129 | ### Run evaluation
130 | if (global_step % params.eval_step == 0):
131 | result = sess.run([eval_nodes['Loss'],
132 | eval_nodes['Seq_len'],
133 | eval_nodes['BFM_coeff_seq'],
134 | eval_nodes['BFMCoeffDecoder']])
135 | loss, seq_len, real_bfm_coeff_seq, bfm_coeff_seq = result
136 |
137 | print('\r\nEvaluation >>> Loss= {:.3f}'.format(loss))
138 | plot_bfm_coeff_seq(params.eval_visual_dir, facemodel, global_step, seq_len, real_bfm_coeff_seq, bfm_coeff_seq)
139 |
140 | ### Save checkpoint
141 | if (global_step % params.save_step == 0):
142 | tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
143 | os.path.join(params.save_dir,
144 | params.save_name),
145 | global_step=global_step)
146 |
--------------------------------------------------------------------------------
/voicepuppet/builder.py:
--------------------------------------------------------------------------------
1 | class ModelBuilder(object):
2 |
3 | def __init__(self):
4 | raise NotImplementError('__init__ not implemented.')
5 |
6 | def build_network(self):
7 | raise NotImplementError('build_network not implemented.')
8 |
9 | def __call__(self):
10 | raise NotImplementError('__call__ not implemented.')
11 |
--------------------------------------------------------------------------------
/voicepuppet/pixflow/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "pixflow",
10 | srcs = ["pixflow.py"],
11 | deps = [
12 | "//config:configure",
13 | "//voicepuppet:builder"
14 | ],
15 | )
16 |
17 | py_binary(
18 | name = "train_pixflow",
19 | srcs = ["train_pixflow.py"],
20 | deps = [
21 | "//utils:utils",
22 | ":pixflow",
23 | "//generator:generator",
24 | "//generator:loader"
25 | ],
26 | )
27 |
28 | py_binary(
29 | name = "infer_pixflow",
30 | srcs = ["infer_pixflow.py"],
31 | deps = [
32 | "//utils:bfm_load_data",
33 | "//utils:reconstruct_mesh",
34 | "//utils:bfm_visual",
35 | "//utils:utils",
36 | ":pixflow",
37 | "//voicepuppet/bfmnet:bfmnet",
38 | "//generator:generator",
39 | "//generator:loader"
40 | ],
41 | )
42 |
43 | py_binary(
44 | name = "infer_bfm_pixflow",
45 | srcs = ["infer_bfm_pixflow.py"],
46 | deps = [
47 | "//utils:bfm_load_data",
48 | "//utils:reconstruct_mesh",
49 | "//utils:bfm_visual",
50 | "//utils:utils",
51 | ":pixflow",
52 | "//voicepuppet/bfmnet:bfmnet",
53 | "//generator:generator",
54 | "//generator:loader"
55 | ],
56 | )
57 |
--------------------------------------------------------------------------------
/voicepuppet/pixflow/infer_bfm_pixflow.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | import subprocess
9 | from pixflow import PixFlowNet
10 | from voicepuppet.bfmnet.bfmnet import BFMNet
11 | from generator.loader import *
12 | from generator.generator import DataGenerator
13 | from utils.bfm_load_data import *
14 | from utils.bfm_visual import *
15 | from utils.utils import *
16 | import scipy
17 | import random
18 |
19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20 | logger = logging.getLogger(__name__)
21 |
22 | def alignto_bfm_coeff(model_dir, img, xys):
23 | from PIL import Image
24 | import tensorflow as tf
25 |
26 | def load_graph(graph_filename):
27 | with tf.gfile.GFile(graph_filename, 'rb') as f:
28 | graph_def = tf.GraphDef()
29 | graph_def.ParseFromString(f.read())
30 |
31 | return graph_def
32 |
33 | # read standard landmarks for preprocessing images
34 | lm3D = load_lm3d(model_dir)
35 |
36 | # build reconstruction model
37 | with tf.Graph().as_default() as graph, tf.device('/cpu:0'):
38 | images = tf.placeholder(name='input_imgs', shape=[None, 224, 224, 3], dtype=tf.float32)
39 | graph_def = load_graph(os.path.join(model_dir, "FaceReconModel.pb"))
40 | tf.import_graph_def(graph_def, name='resnet', input_map={'input_imgs:0': images})
41 |
42 | # output coefficients of R-Net (dim = 257)
43 | coeff = graph.get_tensor_by_name('resnet/coeff:0')
44 |
45 | with tf.Session() as sess:
46 | ps = map(lambda x: int(x), xys)
47 |
48 | left_eye_x = int(round((ps[72] + ps[74] + ps[76] + ps[78] + ps[80] + ps[82]) / 6))
49 | left_eye_y = int(round((ps[73] + ps[75] + ps[77] + ps[79] + ps[81] + ps[83]) / 6))
50 | right_eye_x = int(round((ps[84] + ps[86] + ps[88] + ps[90] + ps[92] + ps[94]) / 6))
51 | right_eye_y = int(round((ps[85] + ps[87] + ps[89] + ps[91] + ps[93] + ps[95]) / 6))
52 | nose_x = int(round(ps[60]))
53 | nose_y = int(round(ps[61]))
54 | left_mouse_x = int(round(ps[96]))
55 | left_mouse_y = int(round(ps[97]))
56 | right_mouse_x = int(round(ps[108]))
57 | right_mouse_y = int(round(ps[109]))
58 |
59 | lmk5 = np.array(
60 | [[left_eye_x, left_eye_y], [right_eye_x, right_eye_y], [nose_x, nose_y], [left_mouse_x, left_mouse_y],
61 | [right_mouse_x, right_mouse_y]])
62 |
63 | image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
64 | # preprocess input image
65 | input_img, lm_new, transform_params = Preprocess(image, lmk5, lm3D)
66 | bfmcoeff = sess.run(coeff, feed_dict={images: input_img})
67 | return bfmcoeff, input_img, transform_params
68 |
69 | angles = np.array([[0, 0, 0]], dtype=np.float32)
70 | shift = 0.005
71 |
72 | def render_face(center_x, center_y, ratio, bfmcoeff, img, transform_params, facemodel):
73 | ratio *= transform_params[2]
74 | tx = -int((transform_params[3] / ratio))
75 | ty = -int((transform_params[4] / ratio))
76 | global angles, shift
77 |
78 | # angles[0][0] += shift
79 | # angles[0][1] += shift
80 | # angles[0][2] += shift
81 | # if (angles[0][1] > 0.03 or angles[0][1] < -0.03):
82 | # shift = -shift
83 |
84 | face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d = Reconstruction_rotation(
85 | bfmcoeff, facemodel, angles)
86 | face_projection2 = np.concatenate([face_projection, z_buffer], axis=2)
87 | face_projection = np.squeeze(face_projection2, (0))
88 |
89 | shape = np.squeeze(face_projection2, (0))
90 | color = np.squeeze(face_color, (0))
91 | color = np.clip(color, 0, 255).astype(np.int32)
92 |
93 | new_image = np.zeros((224 * 224 * 3), dtype=np.uint8)
94 | face_mask = np.zeros((224 * 224), dtype=np.uint8)
95 |
96 | vertices = shape.reshape(-1).astype(np.float32).copy()
97 | triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
98 | colors = color.reshape(-1).astype(np.float32).copy()
99 | depth_buffer = (np.zeros((224 * 224)) - 99999.0).astype(np.float32)
100 | mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
101 | facemodel.tri.shape[0], 224, 224, 3)
102 | new_image = new_image.reshape([224, 224, 3])
103 |
104 | new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
105 | new_image = cv2.resize(new_image, (
106 | int(round(new_image.shape[0] / ratio)), int(round(new_image.shape[1] / ratio))))
107 |
108 | back_new_image = np.zeros((img.shape[0], img.shape[1], img.shape[2]), dtype=img.dtype)
109 | center_face_x = new_image.shape[1] // 2
110 | center_face_y = new_image.shape[0] // 2
111 |
112 | ry = center_y - center_face_y + new_image.shape[0] - ty
113 | rx = center_x - center_face_x + new_image.shape[1] - tx
114 | back_new_image[center_y - center_face_y - ty:ry, center_x - center_face_x - tx:rx, :] = new_image
115 | return back_new_image
116 |
117 |
118 | if (__name__ == '__main__'):
119 |
120 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
121 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
122 | help='the config yaml file')
123 |
124 | opts, argv = cmd_parser.parse_args()
125 |
126 | if (opts.config_path is None):
127 | logger.error('Please check your parameters.')
128 | exit(0)
129 |
130 | config_path = opts.config_path
131 |
132 | if (not os.path.exists(config_path)):
133 | logger.error('config_path not exists')
134 | exit(0)
135 |
136 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
137 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
138 |
139 | image_file, audio_file = argv
140 |
141 | mkdir('output')
142 | for file in os.listdir('output'):
143 | os.system('rm -rf output/{}'.format(file))
144 |
145 | batch_size = 1
146 | ### Generator for inference setting
147 | infer_generator = DataGenerator(config_path)
148 | params = infer_generator.params
149 | params.batch_size = batch_size
150 | infer_generator.set_params(params)
151 | wav_loader = WavLoader(sr=infer_generator.sample_rate)
152 | pcm = wav_loader.get_data(audio_file)
153 | facemodel = BFM(params.pretrain_dir)
154 |
155 | pad_len = int(1 + pcm.shape[0] / infer_generator.frame_wav_scale)
156 | # calculate the rational length of pcm in order to keep the alignment of mfcc and landmark sequence.
157 | pcm_length = infer_generator.hop_step * (pad_len * infer_generator.frame_mfcc_scale - 1) + infer_generator.win_length
158 | if (pcm.shape[0] < pcm_length):
159 | pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
160 | pcm_slice = pcm[:pcm_length][np.newaxis, :]
161 |
162 | mfcc = infer_generator.extract_mfcc(pcm_slice)
163 | img_size = 512
164 | img = cv2.imread(image_file)[:, :512, :]
165 | img, img_landmarks, img_cropped, lmk_cropped, center_x, center_y, ratio = get_mxnet_sat_alignment(params.pretrain_dir, img)
166 | bfmcoeff, input_img, transform_params = alignto_bfm_coeff(params.pretrain_dir, img_cropped, lmk_cropped)
167 |
168 | img = cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
169 | face3d_refer = img[:, 512:512*2, :]
170 | fg_refer = img[:, :512, :] * img[:, 512*2:, :]
171 | img = img[:, :512, :]
172 |
173 | with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
174 | seq_len = tf.convert_to_tensor([pad_len], dtype=tf.int32)
175 | ear = np.random.rand(1, pad_len, 1).astype(np.float32)/100
176 | ear = tf.convert_to_tensor(ear, dtype=tf.float32)
177 |
178 | with tf.variable_scope('localization'):
179 | ### BFMNet setting
180 | bfmnet = BFMNet(config_path)
181 | params = bfmnet.params
182 | params.batch_size = 1
183 | bfmnet.set_params(params)
184 |
185 | bfmnet_nodes = bfmnet.build_inference_op(ear, mfcc, seq_len)
186 |
187 | with tf.variable_scope('recognition'):
188 | ### Vid2VidNet setting
189 | vid2vidnet = PixFlowNet(config_path)
190 | params = vid2vidnet.params
191 | params.batch_size = 1
192 | vid2vidnet.set_params(params)
193 |
194 | inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
195 | targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
196 | vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, targets_holder)
197 |
198 | variables_to_restore = tf.global_variables()
199 | loc_varlist = {v.name[13:][:-2]: v
200 | for v in variables_to_restore if v.name[:12]=='localization'}
201 | rec_varlist = {v.name[12:][:-2]: v
202 | for v in variables_to_restore if v.name[:11]=='recognition'}
203 |
204 | loc_saver = tf.train.Saver(var_list=loc_varlist)
205 | rec_saver = tf.train.Saver(var_list=rec_varlist)
206 |
207 | sess.run(tf.global_variables_initializer())
208 | loc_saver.restore(sess, 'ckpt_bfmnet_new3/bfmnet-40000')
209 | rec_saver.restore(sess, 'ckpt_pixflow3/pixflownet-50000')
210 |
211 | ### Run inference
212 | bfm_coeff_seq = sess.run(bfmnet_nodes['BFMCoeffDecoder'])
213 | bfmcoeff = np.tile(bfmcoeff[:, np.newaxis, :], [1, bfm_coeff_seq.shape[1], 1])
214 |
215 | bfm_coeff_seq = np.concatenate([bfmcoeff[:, :, :80], bfm_coeff_seq[:, :, :], bfmcoeff[:, :, 144:]], axis=2)
216 |
217 | inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
218 | inputs[0, ..., 0:3] = face3d_refer
219 |
220 | for i in range(bfm_coeff_seq.shape[1]):
221 | face3d = render_face(center_x+random.randint(0, 0), center_y+random.randint(0, 0), ratio, bfm_coeff_seq[0, i:i + 1, ...], img, transform_params, facemodel)
222 | # cv2.imwrite('output/{}.jpg'.format(i), face3d)
223 | face3d = cv2.cvtColor(face3d, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
224 |
225 | inputs[0, ..., 0:3] = face3d
226 |
227 | bg_img = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
228 | bg_img[0, ..., :3] = cv2.resize(cv2.imread('background/{}.jpg'.format(i+1)), (img_size, img_size)).astype(np.float32)/255.0
229 | bg_img[0, ..., 3:] = bg_img[0, ..., :3]
230 |
231 | # bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB)
232 | frames = sess.run(vid2vid_nodes['Outputs'],
233 | feed_dict={inputs_holder: inputs, targets_holder: bg_img})
234 |
235 | cv2.imwrite('output/{}.jpg'.format(i), cv2.cvtColor((frames[0,..., :3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
236 |
237 | # image_loader = ImageLoader()
238 | # for index in range(4, 195):
239 | # img = image_loader.get_data(os.path.join('/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05', '{}.jpg'.format(index)))
240 | # face3d = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)[:, img_size:img_size*2, :]
241 |
242 | # inputs[0, ..., 3:6] = inputs[0, ..., 6:9]
243 | # inputs[0, ..., 6:9] = face3d
244 |
245 | # frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']],
246 | # feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: np.tile(bg_img, (1, 1, 3))[np.newaxis, ...]})
247 | # fg_inputs[0, ..., 3:6] = last
248 |
249 | # cv2.imwrite('output/{}.jpg'.format(index), cv2.cvtColor((last[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
250 |
--------------------------------------------------------------------------------
/voicepuppet/pixflow/infer_pixflow.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | import subprocess
9 | from pixflow import PixFlowNet
10 | from voicepuppet.bfmnet.bfmnet import BFMNet
11 | from generator.loader import *
12 | from generator.generator import DataGenerator
13 | from utils.bfm_load_data import *
14 | from utils.bfm_visual import *
15 | from utils.utils import *
16 | import scipy
17 |
18 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | if (__name__ == '__main__'):
23 |
24 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
25 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
26 | help='the config yaml file')
27 |
28 | opts, argv = cmd_parser.parse_args()
29 |
30 | if (opts.config_path is None):
31 | logger.error('Please check your parameters.')
32 | exit(0)
33 |
34 | config_path = opts.config_path
35 |
36 | if (not os.path.exists(config_path)):
37 | logger.error('config_path not exists')
38 | exit(0)
39 |
40 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
41 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
42 |
43 | mkdir('output')
44 | for file in os.listdir('output'):
45 | os.system('rm -rf output/{}'.format(file))
46 |
47 | batch_size = 1
48 | img_size = 512
49 | image_loader = ImageLoader()
50 | root = '/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05'
51 | bg_img = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
52 | bg_img[0, ..., :3] = cv2.resize(cv2.imread('/home/dong/Downloads/bg.jpg'), (img_size, img_size)).astype(np.float32)/255.0
53 | bg_img[0, ..., 3:] = bg_img[0, ..., :3]
54 |
55 | with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
56 | with tf.variable_scope('recognition'):
57 | ### Vid2VidNet setting
58 | vid2vidnet = PixFlowNet(config_path)
59 | params = vid2vidnet.params
60 | params.batch_size = 1
61 | vid2vidnet.set_params(params)
62 |
63 | inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
64 | targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
65 | vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, targets_holder)
66 |
67 | variables_to_restore = tf.global_variables()
68 | rec_varlist = {v.name[12:][:-2]: v
69 | for v in variables_to_restore if v.name[:11]=='recognition'}
70 |
71 | rec_saver = tf.train.Saver(var_list=rec_varlist)
72 |
73 | sess.run(tf.global_variables_initializer())
74 | rec_saver.restore(sess, 'ckpt_pixflow3/pixflownet-60000')
75 |
76 | inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
77 |
78 | img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(10)))
79 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
80 | inputs[0, :, :, 0:3] = img[:, img_size:img_size*2, :]
81 |
82 | for index in range(195):
83 | img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(index)))
84 | if (img is not None):
85 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
86 | inputs[0, ..., 3:6] = img[:, img_size:img_size*2, :]
87 |
88 | frames = sess.run(vid2vid_nodes['Outputs'],
89 | feed_dict={inputs_holder: inputs, targets_holder: bg_img})
90 |
91 | cv2.imwrite('output/_{}.jpg'.format(index), cv2.cvtColor((frames[0,...,3:]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
92 | # cv2.imshow('', last[0, ...])
93 | # cv2.waitKey(0)
94 |
95 |
96 | # cv2.imwrite('output/_{}.jpg'.format(i), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
97 |
98 | # cmd = 'ffmpeg -i output/_%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp2.mp4'
99 | # subprocess.call(cmd, shell=True)
100 |
101 | # cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp.mp4'
102 | # subprocess.call(cmd, shell=True)
103 |
--------------------------------------------------------------------------------
/voicepuppet/pixflow/train_pixflow.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | from pixflow import PixFlowNet
9 | from generator.generator import PixFlowDataGenerator
10 | from utils.utils import *
11 |
12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | if (__name__ == '__main__'):
17 |
18 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
19 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
20 | help='the config yaml file')
21 |
22 | opts, argv = cmd_parser.parse_args()
23 |
24 | if (opts.config_path is None):
25 | logger.error('Please check your parameters.')
26 | exit(0)
27 |
28 | config_path = opts.config_path
29 |
30 | if (not os.path.exists(config_path)):
31 | logger.error('config_path not exists')
32 | exit(0)
33 |
34 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
35 |
36 | batch_size = 3
37 | ### Generator for training setting
38 | train_generator = PixFlowDataGenerator(config_path)
39 | params = train_generator.params
40 | params.dataset_path = params.train_dataset_path
41 | params.batch_size = batch_size
42 | train_generator.set_params(params)
43 | train_dataset = train_generator.get_dataset()
44 |
45 | config = tf.ConfigProto()
46 | config.gpu_options.allow_growth = True
47 | sess = tf.Session(config=config)
48 | tf.train.start_queue_runners(sess=sess)
49 |
50 | train_iter = train_dataset.make_one_shot_iterator()
51 |
52 | # inputs, fg_inputs, targets, masks = sess.run(train_iter.get_next())
53 | # inp1 = cv2.cvtColor((inputs[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
54 | # inp2 = cv2.cvtColor((inputs[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
55 | # fg_inputs1 = cv2.cvtColor((fg_inputs[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
56 | # fg_inputs2 = cv2.cvtColor((fg_inputs[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
57 | # targets1 = cv2.cvtColor((targets[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
58 | # targets2 = cv2.cvtColor((targets[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
59 | # masks1 = cv2.cvtColor((masks[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
60 | # masks2 = cv2.cvtColor((masks[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
61 |
62 | # cv2.imwrite('to/inp1.jpg', inp1)
63 | # cv2.imwrite('to/inp2.jpg', inp2)
64 | # cv2.imwrite('to/fg_inputs1.jpg', fg_inputs1)
65 | # cv2.imwrite('to/fg_inputs2.jpg', fg_inputs2)
66 | # cv2.imwrite('to/targets1.jpg', targets1)
67 | # cv2.imwrite('to/targets2.jpg', targets2)
68 | # cv2.imwrite('to/masks1.jpg', masks1)
69 | # cv2.imwrite('to/masks2.jpg', masks2)
70 | # sys.exit(0)
71 |
72 |
73 | ### Vid2VidNet setting
74 | vid2vidnet = PixFlowNet(config_path)
75 | params = vid2vidnet.params
76 | epochs = params.training['epochs']
77 | params.add_hparam('max_to_keep', 2)
78 | params.add_hparam('save_dir', 'ckpt_pixflow')
79 | params.add_hparam('save_name', 'pixflownet')
80 | params.add_hparam('save_step', 5000)
81 | params.add_hparam('summary_step', 100)
82 | params.add_hparam('summary_dir', 'log/summary_pixflow')
83 | params.batch_size = batch_size
84 | vid2vidnet.set_params(params)
85 |
86 | mkdir(params.save_dir)
87 | mkdir(params.summary_dir)
88 |
89 | train_nodes = vid2vidnet.build_train_op(*train_iter.get_next())
90 | sess.run(tf.global_variables_initializer())
91 |
92 | # Restore from save_dir
93 | if ('checkpoint' in os.listdir(params.save_dir)):
94 | tf.train.Saver().restore(sess, tf.train.latest_checkpoint(params.save_dir))
95 |
96 | tf.summary.scalar("discriminator_loss", train_nodes['Discrim_loss'])
97 | tf.summary.scalar("generator_loss_GAN", train_nodes['Gen_loss_GAN'])
98 | tf.summary.scalar("generator_loss_L1", train_nodes['Gen_loss_L1'])
99 |
100 | with tf.name_scope("inputs_summary"):
101 | tf.summary.image("inputs", tf.image.convert_image_dtype(train_nodes['Inputs'][... ,3:6], dtype=tf.uint8))
102 |
103 | with tf.name_scope("targets_summary"):
104 | tf.summary.image("targets", tf.image.convert_image_dtype(train_nodes['FG_Inputs'][... ,3:6], dtype=tf.uint8))
105 |
106 | with tf.name_scope("outputs_summary"):
107 | tf.summary.image("outputs", tf.image.convert_image_dtype(train_nodes['Outputs'], dtype=tf.uint8))
108 |
109 | with tf.name_scope("alpha_summary"):
110 | tf.summary.image("alphas", tf.image.convert_image_dtype(train_nodes['Alphas'], dtype=tf.uint8))
111 |
112 | # Add histograms for gradients.
113 | for grad, var in train_nodes['Discrim_grads_and_vars'] + train_nodes['Gen_grads_and_vars']:
114 | if(grad is not None):
115 | tf.summary.histogram(var.op.name, grad)
116 |
117 | merge_summary_op = tf.summary.merge_all()
118 | summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
119 |
120 | for i in range(epochs):
121 | ### Run training
122 | result = sess.run([train_nodes['Train_op'],
123 | merge_summary_op,
124 | train_nodes['Gen_loss_GAN'],
125 | train_nodes['Gen_loss_L1'],
126 | train_nodes['Discrim_loss'],
127 | train_nodes['Lr'],
128 | train_nodes['Global_step']])
129 | _, summary, gen_loss_GAN, gen_loss_L1, discrim_loss, lr, global_step = result
130 | if(global_step % params.summary_step==0):
131 | print('Step {}, Lr= {:.2e}: \n\tgen_loss_GAN= {:.3f}, \n\tgen_loss_L1= {:.3f}, \n\tdiscrim_loss= {:.3f}'.format(global_step, lr, gen_loss_GAN, gen_loss_L1, discrim_loss))
132 | summary_writer.add_summary(summary, global_step)
133 |
134 | ### Save checkpoint
135 | if (global_step % params.save_step == 0):
136 | tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
137 | os.path.join(params.save_dir,
138 | params.save_name),
139 | global_step=global_step)
140 |
--------------------------------------------------------------------------------
/voicepuppet/pixrefer/BUILD:
--------------------------------------------------------------------------------
1 | package(
2 | default_visibility = [
3 | "//visibility:public",
4 | ],
5 | features = ["-layering_check"],
6 | )
7 |
8 | py_library(
9 | name = "vgg_simple",
10 | srcs = ["vgg_simple.py"],
11 | deps = [
12 | "//config:configure",
13 | "//voicepuppet:builder"
14 | ],
15 | )
16 |
17 | py_library(
18 | name = "pixrefer",
19 | srcs = ["pixrefer.py"],
20 | deps = [
21 | "//config:configure",
22 | "//voicepuppet:builder",
23 | ":vgg_simple"
24 | ],
25 | )
26 |
27 | py_binary(
28 | name = "train_pixrefer",
29 | srcs = ["train_pixrefer.py"],
30 | deps = [
31 | "//utils:utils",
32 | ":pixrefer",
33 | "//generator:generator",
34 | "//generator:loader"
35 | ],
36 | )
37 |
38 | py_binary(
39 | name = "infer_pixrefer",
40 | srcs = ["infer_pixrefer.py"],
41 | deps = [
42 | "//utils:bfm_load_data",
43 | "//utils:reconstruct_mesh",
44 | "//utils:bfm_visual",
45 | "//utils:utils",
46 | ":pixrefer",
47 | "//voicepuppet/bfmnet:bfmnet",
48 | "//generator:generator",
49 | "//generator:loader"
50 | ],
51 | )
52 |
53 | py_binary(
54 | name = "infer_bfmvid",
55 | srcs = ["infer_bfmvid.py"],
56 | deps = [
57 | "//utils:bfm_load_data",
58 | "//utils:reconstruct_mesh",
59 | "//utils:bfm_visual",
60 | "//utils:utils",
61 | ":pixrefer",
62 | "//voicepuppet/bfmnet:bfmnet",
63 | "//generator:generator",
64 | "//generator:loader"
65 | ],
66 | )
67 |
--------------------------------------------------------------------------------
/voicepuppet/pixrefer/infer_bfmvid.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | import subprocess
9 | import scipy
10 | import random
11 | import sys
12 |
13 | sys.path.append(os.path.join(os.getcwd(), 'utils'))
14 |
15 | from pixrefer import PixReferNet
16 | from voicepuppet.bfmnet.bfmnet import BFMNet
17 | from generator.loader import *
18 | from generator.generator import DataGenerator
19 | from bfm_load_data import *
20 | from bfm_visual import *
21 | from utils import *
22 |
23 | bfmcoeff_loader = BFMCoeffLoader()
24 | # vid_bfmcoeff = bfmcoeff_loader.get_data('/media/dong/DiskData/gridcorpus/todir/bilibili/4_16/bfmcoeff.txt')
25 |
26 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
27 | logger = logging.getLogger(__name__)
28 |
29 | def alignto_bfm_coeff(model_dir, img, xys):
30 | from PIL import Image
31 | import tensorflow as tf
32 |
33 | def load_graph(graph_filename):
34 | with tf.gfile.GFile(graph_filename, 'rb') as f:
35 | graph_def = tf.GraphDef()
36 | graph_def.ParseFromString(f.read())
37 |
38 | return graph_def
39 |
40 | # read standard landmarks for preprocessing images
41 | lm3D = load_lm3d(model_dir)
42 |
43 | # build reconstruction model
44 | with tf.Graph().as_default() as graph, tf.device('/cpu:0'):
45 | images = tf.placeholder(name='input_imgs', shape=[None, 224, 224, 3], dtype=tf.float32)
46 | graph_def = load_graph(os.path.join(model_dir, "FaceReconModel.pb"))
47 | tf.import_graph_def(graph_def, name='resnet', input_map={'input_imgs:0': images})
48 |
49 | # output coefficients of R-Net (dim = 257)
50 | coeff = graph.get_tensor_by_name('resnet/coeff:0')
51 |
52 | with tf.Session() as sess:
53 | ps = list(map(lambda x: int(x), xys))
54 |
55 | left_eye_x = int(round((ps[72] + ps[74] + ps[76] + ps[78] + ps[80] + ps[82]) / 6))
56 | left_eye_y = int(round((ps[73] + ps[75] + ps[77] + ps[79] + ps[81] + ps[83]) / 6))
57 | right_eye_x = int(round((ps[84] + ps[86] + ps[88] + ps[90] + ps[92] + ps[94]) / 6))
58 | right_eye_y = int(round((ps[85] + ps[87] + ps[89] + ps[91] + ps[93] + ps[95]) / 6))
59 | nose_x = int(round(ps[60]))
60 | nose_y = int(round(ps[61]))
61 | left_mouse_x = int(round(ps[96]))
62 | left_mouse_y = int(round(ps[97]))
63 | right_mouse_x = int(round(ps[108]))
64 | right_mouse_y = int(round(ps[109]))
65 |
66 | lmk5 = np.array(
67 | [[left_eye_x, left_eye_y], [right_eye_x, right_eye_y], [nose_x, nose_y], [left_mouse_x, left_mouse_y],
68 | [right_mouse_x, right_mouse_y]])
69 |
70 | image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
71 | # preprocess input image
72 | input_img, lm_new, transform_params = Preprocess(image, lmk5, lm3D)
73 | bfmcoeff = sess.run(coeff, feed_dict={images: input_img})
74 | return bfmcoeff, input_img, transform_params
75 |
76 | angles = np.array([[0, 0, 0]], dtype=np.float32)
77 | shift = 0.005
78 |
79 | def render_face(center_x, center_y, ratio, bfmcoeff, img, transform_params, facemodel):
80 | ratio *= transform_params[2]
81 | tx = -int((transform_params[3] / ratio))
82 | ty = -int((transform_params[4] / ratio))
83 | global angles, shift
84 |
85 | angles[0][0] += shift
86 | angles[0][1] += shift
87 | angles[0][2] += shift
88 | if (angles[0][1] > 0.03 or angles[0][1] < -0.03):
89 | shift = -shift
90 |
91 | face_shape, face_texture, face_color, face_projection, z_buffer, landmarks_2d = Reconstruction_rotation(
92 | bfmcoeff, facemodel, angles)
93 | face_projection2 = np.concatenate([face_projection, z_buffer], axis=2)
94 | face_projection = np.squeeze(face_projection2, (0))
95 |
96 | shape = np.squeeze(face_projection2, (0))
97 | color = np.squeeze(face_color, (0))
98 | color = np.clip(color, 0, 255).astype(np.int32)
99 |
100 | new_image = np.zeros((224 * 224 * 3), dtype=np.uint8)
101 | face_mask = np.zeros((224 * 224), dtype=np.uint8)
102 |
103 | vertices = shape.reshape(-1).astype(np.float32).copy()
104 | triangles = (facemodel.tri - 1).reshape(-1).astype(np.int32).copy()
105 | colors = color.reshape(-1).astype(np.float32).copy()
106 | depth_buffer = (np.zeros((224 * 224)) - 99999.0).astype(np.float32)
107 | mesh_core_cython.render_colors_core(new_image, face_mask, vertices, triangles, colors, depth_buffer,
108 | facemodel.tri.shape[0], 224, 224, 3)
109 | new_image = new_image.reshape([224, 224, 3])
110 |
111 | new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
112 | new_image = cv2.resize(new_image, (
113 | int(round(new_image.shape[0] / ratio)), int(round(new_image.shape[1] / ratio))))
114 |
115 | back_new_image = np.zeros((img.shape[0], img.shape[1], img.shape[2]), dtype=img.dtype)
116 | center_face_x = new_image.shape[1] // 2
117 | center_face_y = new_image.shape[0] // 2
118 |
119 | ry = center_y - center_face_y + new_image.shape[0] - ty
120 | rx = center_x - center_face_x + new_image.shape[1] - tx
121 | back_new_image[center_y - center_face_y - ty:ry, center_x - center_face_x - tx:rx, :] = new_image
122 | return back_new_image
123 |
124 |
125 | if (__name__ == '__main__'):
126 |
127 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
128 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
129 | help='the config yaml file')
130 |
131 | opts, argv = cmd_parser.parse_args()
132 |
133 | if (opts.config_path is None):
134 | logger.error('Please check your parameters.')
135 | exit(0)
136 |
137 | config_path = opts.config_path
138 |
139 | if (not os.path.exists(config_path)):
140 | logger.error('config_path not exists')
141 | exit(0)
142 |
143 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
144 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
145 |
146 | image_file, audio_file = argv
147 |
148 | mkdir('output')
149 | for file in os.listdir('output'):
150 | os.system('rm -rf output/{}'.format(file))
151 |
152 | batch_size = 1
153 | ### Generator for inference setting
154 | infer_generator = DataGenerator(config_path)
155 | params = infer_generator.params
156 | params.batch_size = batch_size
157 | infer_generator.set_params(params)
158 | wav_loader = WavLoader(sr=infer_generator.sample_rate)
159 | pcm = wav_loader.get_data(audio_file)
160 | facemodel = BFM(params.model_dir)
161 |
162 | pad_len = int(1 + pcm.shape[0] / infer_generator.frame_wav_scale)
163 | # calculate the rational length of pcm in order to keep the alignment of mfcc and landmark sequence.
164 | pcm_length = infer_generator.hop_step * (pad_len * infer_generator.frame_mfcc_scale - 1) + infer_generator.win_length
165 | if (pcm.shape[0] < pcm_length):
166 | pcm = np.pad(pcm, (0, pcm_length - pcm.shape[0]), 'constant', constant_values=(0))
167 | pcm_slice = pcm[:pcm_length][np.newaxis, :]
168 |
169 | mfcc = infer_generator.extract_mfcc(pcm_slice)
170 | img_size = 512
171 | img = cv2.imread(image_file)[:, :512, :]
172 | img, img_landmarks, img_cropped, lmk_cropped, center_x, center_y, ratio = get_mxnet_sat_alignment(params.model_dir, img)
173 | bfmcoeff, input_img, transform_params = alignto_bfm_coeff(params.model_dir, img_cropped, lmk_cropped)
174 |
175 | img = cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
176 | face3d_refer = img[:, 512:512*2, :]
177 | fg_refer = img[:, :512, :] * img[:, 512*2:, :]
178 | img = img[:, :512, :]
179 |
180 | with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
181 | seq_len = tf.convert_to_tensor([pad_len], dtype=tf.int32)
182 | ear = np.random.rand(1, pad_len, 1).astype(np.float32)/100
183 | ear = tf.convert_to_tensor(ear, dtype=tf.float32)
184 |
185 | with tf.variable_scope('bfm_scope'):
186 | ### BFMNet setting
187 | bfmnet = BFMNet(config_path)
188 | params = bfmnet.params
189 | params.batch_size = 1
190 | bfmnet.set_params(params)
191 |
192 | bfmnet_nodes = bfmnet.build_inference_op(ear, mfcc, seq_len)
193 |
194 | with tf.variable_scope('vid_scope'):
195 | ### Vid2VidNet setting
196 | vid2vidnet = PixReferNet(config_path)
197 | params = vid2vidnet.params
198 | params.batch_size = 1
199 | params.add_hparam('is_training', False)
200 | vid2vidnet.set_params(params)
201 |
202 | inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
203 | fg_inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
204 | targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
205 | vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, fg_inputs_holder, targets_holder)
206 |
207 | variables_to_restore = tf.global_variables()
208 | bfm_varlist = {v.name[len('bfm_scope')+1:][:-2]: v
209 | for v in variables_to_restore if v.name[:len('bfm_scope')]=='bfm_scope'}
210 | vid_varlist = {v.name[len('vid_scope')+1:][:-2]: v
211 | for v in variables_to_restore if v.name[:len('vid_scope')]=='vid_scope'}
212 |
213 | bfm_saver = tf.train.Saver(var_list=bfm_varlist)
214 | vid_saver = tf.train.Saver(var_list=vid_varlist)
215 |
216 | sess.run(tf.global_variables_initializer())
217 | bfm_saver.restore(sess, 'ckpt_bfmnet/bfmnet-65000')
218 | vid_saver.restore(sess, 'ckpt_pixrefer/pixrefernet-20000')
219 |
220 | # ### Run inference
221 | bfm_coeff_seq = sess.run(bfmnet_nodes['BFMCoeffDecoder'])
222 | # bfm_coeff_seq = vid_bfmcoeff[np.newaxis, :, 80:144]
223 | bfmcoeff = np.tile(bfmcoeff[:, np.newaxis, :], [1, bfm_coeff_seq.shape[1], 1])
224 | bfm_coeff_seq = np.concatenate([bfmcoeff[:, :, :80], bfm_coeff_seq, bfmcoeff[:, :, 144:]], axis=2)
225 |
226 | inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
227 | fg_inputs = np.zeros([1, img_size, img_size, 3], dtype=np.float32)
228 | inputs[0, ..., 0:3] = face3d_refer
229 | fg_inputs[0, ..., 0:3] = fg_refer
230 |
231 | for i in range(bfm_coeff_seq.shape[1]):#
232 | face3d = render_face(center_x+random.randint(-0, 0), center_y+random.randint(-0, 0), ratio, bfm_coeff_seq[0, i:i + 1, ...], img, transform_params, facemodel)
233 | # cv2.imwrite('output/{}.jpg'.format(i), face3d)
234 | face3d = cv2.cvtColor(face3d, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0
235 |
236 | inputs[0, ..., 3:6] = face3d
237 |
238 | bg_img = cv2.resize(cv2.imread('background/{}.jpg'.format(i%100+1)), (img_size, img_size)).astype(np.float32)/255.0
239 | bg_img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB)
240 | frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']],
241 | feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: bg_img[np.newaxis, ...]})
242 |
243 | cv2.imwrite('output/{}.jpg'.format(i), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
244 |
245 | cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y output.mp4'
246 | subprocess.call(cmd, shell=True)
247 |
248 | # image_loader = ImageLoader()
249 | # for index in range(4, 195):
250 | # img = image_loader.get_data(os.path.join('/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05', '{}.jpg'.format(index)))
251 | # face3d = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)[:, img_size:img_size*2, :]
252 |
253 | # inputs[0, ..., 3:6] = inputs[0, ..., 6:9]
254 | # inputs[0, ..., 6:9] = face3d
255 |
256 | # frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']],
257 | # feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: np.tile(bg_img, (1, 1, 3))[np.newaxis, ...]})
258 | # fg_inputs[0, ..., 3:6] = last
259 |
260 | # cv2.imwrite('output/{}.jpg'.format(index), cv2.cvtColor((last[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
261 |
--------------------------------------------------------------------------------
/voicepuppet/pixrefer/infer_pixrefer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | import subprocess
9 | from pixrefer import PixReferNet
10 | from voicepuppet.bfmnet.bfmnet import BFMNet
11 | from generator.loader import *
12 | from generator.generator import DataGenerator
13 | from utils.bfm_load_data import *
14 | from utils.bfm_visual import *
15 | from utils.utils import *
16 | import scipy
17 |
18 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | if (__name__ == '__main__'):
23 |
24 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
25 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
26 | help='the config yaml file')
27 |
28 | opts, argv = cmd_parser.parse_args()
29 |
30 | if (opts.config_path is None):
31 | logger.error('Please check your parameters.')
32 | exit(0)
33 |
34 | config_path = opts.config_path
35 |
36 | if (not os.path.exists(config_path)):
37 | logger.error('config_path not exists')
38 | exit(0)
39 |
40 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
41 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
42 |
43 | mkdir('output')
44 | for file in os.listdir('output'):
45 | os.system('rm -rf output/{}'.format(file))
46 |
47 | batch_size = 1
48 | img_size = 512
49 | image_loader = ImageLoader()
50 | root = '/media/dong/DiskData/gridcorpus/todir_vid2vid/vid1/05'
51 | bg_img = cv2.resize(cv2.imread('/home/dong/Downloads/bg.jpg'), (img_size, img_size)).astype(np.float32)/255.0
52 |
53 | with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) as sess:
54 | with tf.variable_scope('recognition'):
55 | ### Vid2VidNet setting
56 | vid2vidnet = PixReferNet(config_path)
57 | params = vid2vidnet.params
58 | params.batch_size = 1
59 | params.add_hparam('is_training', False)
60 | vid2vidnet.set_params(params)
61 |
62 | inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 6])
63 | fg_inputs_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
64 | targets_holder = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
65 | vid2vid_nodes = vid2vidnet.build_inference_op(inputs_holder, fg_inputs_holder, targets_holder)
66 |
67 | variables_to_restore = tf.global_variables()
68 | rec_varlist = {v.name[12:][:-2]: v
69 | for v in variables_to_restore if v.name[:11]=='recognition'}
70 |
71 | rec_saver = tf.train.Saver(var_list=rec_varlist)
72 |
73 | sess.run(tf.global_variables_initializer())
74 | rec_saver.restore(sess, 'ckpt_pixrefer/pixrefernet-20000')
75 |
76 | inputs = np.zeros([1, img_size, img_size, 6], dtype=np.float32)
77 | fg_inputs = np.zeros([1, img_size, img_size, 3], dtype=np.float32)
78 |
79 | img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(0)))
80 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
81 | inputs[0, :, :, 0:3] = img[:, img_size:img_size*2, :]
82 | fg_inputs[0, :, :, 0:3] = img[:, :img_size, :] * img[:, img_size*2:, :]
83 |
84 | for index in range(4, 195):
85 | img = image_loader.get_data(os.path.join(root, '{}.jpg'.format(index)))
86 | if (img is not None):
87 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
88 | inputs[0, ..., 3:6] = img[:, img_size:img_size*2, :]
89 |
90 | frames, last = sess.run([vid2vid_nodes['Outputs'], vid2vid_nodes['Outputs_FG']],
91 | feed_dict={inputs_holder: inputs, fg_inputs_holder: fg_inputs, targets_holder: bg_img[np.newaxis, ...]})
92 |
93 | cv2.imwrite('output/_{}.jpg'.format(index), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
94 | # cv2.imshow('', last[0, ...])
95 | # cv2.waitKey(0)
96 |
97 |
98 | # cv2.imwrite('output/_{}.jpg'.format(i), cv2.cvtColor((frames[0,...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB))
99 |
100 | # cmd = 'ffmpeg -i output/_%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp2.mp4'
101 | # subprocess.call(cmd, shell=True)
102 |
103 | # cmd = 'ffmpeg -i output/%d.jpg -i ' + audio_file + ' -c:v libx264 -c:a aac -strict experimental -y temp.mp4'
104 | # subprocess.call(cmd, shell=True)
105 |
--------------------------------------------------------------------------------
/voicepuppet/pixrefer/train_pixrefer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | import tensorflow as tf
4 | import numpy as np
5 | import os
6 | from optparse import OptionParser
7 | import logging
8 | from pixrefer import PixReferNet
9 | from generator.generator import PixReferDataGenerator
10 | from utils.utils import *
11 |
12 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13 | logger = logging.getLogger(__name__)
14 |
15 |
16 | if (__name__ == '__main__'):
17 |
18 | cmd_parser = OptionParser(usage="usage: %prog [options] --config_path <>")
19 | cmd_parser.add_option('--config_path', type="string", dest="config_path",
20 | help='the config yaml file')
21 |
22 | opts, argv = cmd_parser.parse_args()
23 |
24 | if (opts.config_path is None):
25 | logger.error('Please check your parameters.')
26 | exit(0)
27 |
28 | config_path = opts.config_path
29 |
30 | if (not os.path.exists(config_path)):
31 | logger.error('config_path not exists')
32 | exit(0)
33 |
34 | os.environ["CUDA_VISIBLE_DEVICES"] = '0'
35 |
36 | batch_size = 2
37 | ### Generator for training setting
38 | train_generator = PixReferDataGenerator(config_path)
39 | params = train_generator.params
40 | params.dataset_path = params.train_dataset_path
41 | params.batch_size = batch_size
42 | train_generator.set_params(params)
43 | train_dataset = train_generator.get_dataset()
44 |
45 | config = tf.ConfigProto()
46 | config.gpu_options.allow_growth = True
47 | sess = tf.Session(config=config)
48 | tf.train.start_queue_runners(sess=sess)
49 |
50 | train_iter = train_dataset.make_one_shot_iterator()
51 |
52 | # inputs, fg_inputs, targets, masks = sess.run(train_iter.get_next())
53 | # inp1 = cv2.cvtColor((inputs[0,...,0:3]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
54 | # inp2 = cv2.cvtColor((inputs[0,...,3:6]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
55 | # fg1 = cv2.cvtColor((fg_inputs[0, ...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
56 | # targets1 = cv2.cvtColor((targets[0, ...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
57 | # masks1 = cv2.cvtColor((masks[0, ...]*255).astype(np.uint8), cv2.COLOR_BGR2RGB)
58 |
59 | # cv2.imwrite('to/inp1.jpg', inp1)
60 | # cv2.imwrite('to/inp2.jpg', inp2)
61 | # cv2.imwrite('to/fg1.jpg', fg1)
62 | # cv2.imwrite('to/targets1.jpg', targets1)
63 | # cv2.imwrite('to/masks1.jpg', masks1)
64 | # sys.exit(0)
65 |
66 |
67 | ### Vid2VidNet setting
68 | vid2vidnet = PixReferNet(config_path)
69 | params = vid2vidnet.params
70 | epochs = params.training['epochs']
71 | params.add_hparam('max_to_keep', 2)
72 | params.add_hparam('save_dir', 'ckpt_pixrefer')
73 | params.add_hparam('save_name', 'pixrefernet')
74 | params.add_hparam('save_step', 5000)
75 | params.add_hparam('summary_step', 100)
76 | params.add_hparam('summary_dir', 'log/summary_pixrefer')
77 | params.batch_size = batch_size
78 | params.add_hparam('is_training', True)
79 | params.sess = sess
80 | params.vgg_model_path = os.path.join(params.model_dir, 'vgg_16.ckpt')
81 | vid2vidnet.set_params(params)
82 |
83 | mkdir(params.save_dir)
84 | mkdir(params.summary_dir)
85 |
86 | train_nodes = vid2vidnet.build_train_op(*train_iter.get_next())
87 | # sess.run(tf.global_variables_initializer())
88 |
89 | all_var = tf.global_variables()
90 | init_var = [v for v in all_var if 'vgg_16' not in v.name]
91 | init = tf.variables_initializer(var_list=init_var)
92 | sess.run(init)
93 |
94 | # # Restore from save_dir
95 | # if ('checkpoint' in os.listdir(params.save_dir)):
96 | # variables_to_restore = tf.trainable_variables()
97 | # varlist = {v.name[:-2]: v for v in variables_to_restore if v.name[:6]!='vgg_16'}
98 | # print(varlist)
99 | # tf.train.Saver(varlist).restore(sess, tf.train.latest_checkpoint(params.save_dir))
100 |
101 | tf.summary.scalar("discriminator_loss", train_nodes['Discrim_loss'])
102 | tf.summary.scalar("generator_loss_GAN", train_nodes['Gen_loss_GAN'])
103 | tf.summary.scalar("generator_loss_L1", train_nodes['Gen_loss_L1'])
104 |
105 | with tf.name_scope("inputs1_summary"):
106 | tf.summary.image("inputs1", tf.image.convert_image_dtype(train_nodes['Inputs'][... ,3:6], dtype=tf.uint8))
107 |
108 | with tf.name_scope("targets_summary"):
109 | tf.summary.image("targets", tf.image.convert_image_dtype(train_nodes['Targets'], dtype=tf.uint8))
110 |
111 | with tf.name_scope("outputs_summary"):
112 | tf.summary.image("outputs", tf.image.convert_image_dtype(train_nodes['Outputs'], dtype=tf.uint8))
113 |
114 | with tf.name_scope("alpha_summary"):
115 | tf.summary.image("alphas", tf.image.convert_image_dtype(train_nodes['Alphas'], dtype=tf.uint8))
116 |
117 | with tf.name_scope("inputs0_summary"):
118 | tf.summary.image("inputs0", tf.image.convert_image_dtype(train_nodes['Inputs'][:,:,:,:3], dtype=tf.uint8))
119 |
120 | # with tf.name_scope("fg_inputs0_summary"):
121 | # tf.summary.image("fg_inputs0", tf.image.convert_image_dtype(train_nodes['FGInputs'], dtype=tf.uint8))
122 |
123 | # with tf.name_scope("inputs_fg_summary"):
124 | # tf.summary.image("inputs_fg", tf.image.convert_image_dtype(train_nodes['Inputs'][:,:,:,:3], dtype=tf.uint8))
125 |
126 | # # Add histograms for gradients.
127 | # for grad, var in train_nodes['Discrim_grads_and_vars'] + train_nodes['Gen_grads_and_vars']:
128 | # if(grad is not None):
129 | # tf.summary.histogram(var.op.name + "/gradients", grad)
130 |
131 | merge_summary_op = tf.summary.merge_all()
132 | summary_writer = tf.summary.FileWriter(params.summary_dir, graph=sess.graph)
133 |
134 | for i in range(epochs):
135 | ### Run training
136 | result = sess.run([train_nodes['Train_op'],
137 | merge_summary_op,
138 | train_nodes['Gen_loss_GAN'],
139 | train_nodes['Gen_loss_L1'],
140 | train_nodes['Discrim_loss'],
141 | train_nodes['Lr'],
142 | train_nodes['Global_step']])
143 | _, summary, gen_loss_GAN, gen_loss_L1, discrim_loss, lr, global_step = result
144 | if(global_step % params.summary_step==0):
145 | print('Step {}, Lr= {:.2e}: \n\tgen_loss_GAN= {:.3f}, \n\tgen_loss_L1= {:.3f}, \n\tdiscrim_loss= {:.3f}'.format(global_step, lr, gen_loss_GAN, gen_loss_L1, discrim_loss))
146 | summary_writer.add_summary(summary, global_step)
147 |
148 | ### Save checkpoint
149 | if (global_step % params.save_step == 0):
150 | tf.train.Saver(max_to_keep=params.max_to_keep, var_list=tf.global_variables()).save(sess,
151 | os.path.join(params.save_dir,
152 | params.save_name),
153 | global_step=global_step)
154 |
--------------------------------------------------------------------------------
/voicepuppet/pixrefer/vgg_simple.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | slim = tf.contrib.slim
4 |
5 |
6 | def vgg_arg_scope(weight_decay=0.0005):
7 | """Defines the VGG arg scope.
8 | Args:
9 | weight_decay: The l2 regularization coefficient.
10 | Returns:
11 | An arg_scope.
12 | """
13 | with slim.arg_scope([slim.conv2d, slim.fully_connected],
14 | activation_fn=tf.nn.relu,
15 | weights_regularizer=slim.l2_regularizer(weight_decay),
16 | biases_initializer=tf.zeros_initializer()):
17 | with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
18 | return arg_sc
19 |
20 |
21 | def vgg_a(inputs,
22 | num_classes=1000,
23 | is_training=True,
24 | dropout_keep_prob=0.5,
25 | spatial_squeeze=True,
26 | scope='vgg_a',
27 | fc_conv_padding='VALID',
28 | global_pool=False):
29 | """Oxford Net VGG 11-Layers version A Example.
30 | Note: All the fully_connected layers have been transformed to conv2d layers.
31 | To use in classification mode, resize input to 224x224.
32 | Args:
33 | inputs: a tensor of size [batch_size, height, width, channels].
34 | num_classes: number of predicted classes. If 0 or None, the logits layer is
35 | omitted and the input features to the logits layer are returned instead.
36 | is_training: whether or not the model is being trained.
37 | dropout_keep_prob: the probability that activations are kept in the dropout
38 | layers during training.
39 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
40 | outputs. Useful to remove unnecessary dimensions for classification.
41 | scope: Optional scope for the variables.
42 | fc_conv_padding: the type of padding to use for the fully connected layer
43 | that is implemented as a convolutional layer. Use 'SAME' padding if you
44 | are applying the network in a fully convolutional manner and want to
45 | get a prediction map downsampled by a factor of 32 as an output.
46 | Otherwise, the output prediction map will be (input / 32) - 6 in case of
47 | 'VALID' padding.
48 | global_pool: Optional boolean flag. If True, the input to the classification
49 | layer is avgpooled to size 1x1, for any input size. (This is not part
50 | of the original VGG architecture.)
51 | Returns:
52 | net: the output of the logits layer (if num_classes is a non-zero integer),
53 | or the input to the logits layer (if num_classes is 0 or None).
54 | end_points: a dict of tensors with intermediate activations.
55 | """
56 | with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
57 | end_points_collection = sc.original_name_scope + '_end_points'
58 | # Collect outputs for conv2d, fully_connected and max_pool2d.
59 | with slim.arg_scope([slim.conv2d, slim.max_pool2d],
60 | outputs_collections=end_points_collection):
61 | net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
62 | net = slim.max_pool2d(net, [2, 2], scope='pool1')
63 | net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
64 | net = slim.max_pool2d(net, [2, 2], scope='pool2')
65 | net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
66 | net = slim.max_pool2d(net, [2, 2], scope='pool3')
67 | net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
68 | net = slim.max_pool2d(net, [2, 2], scope='pool4')
69 | net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
70 | net = slim.max_pool2d(net, [2, 2], scope='pool5')
71 |
72 | # Use conv2d instead of fully_connected layers.
73 | net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
74 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
75 | scope='dropout6')
76 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
77 | # Convert end_points_collection into a end_point dict.
78 | end_points = slim.utils.convert_collection_to_dict(end_points_collection)
79 | if global_pool:
80 | net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
81 | end_points['global_pool'] = net
82 | if num_classes:
83 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
84 | scope='dropout7')
85 | net = slim.conv2d(net, num_classes, [1, 1],
86 | activation_fn=None,
87 | normalizer_fn=None,
88 | scope='fc8')
89 | if spatial_squeeze:
90 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
91 | end_points[sc.name + '/fc8'] = net
92 | return net, end_points
93 | vgg_a.default_image_size = 224
94 |
95 |
96 | def vgg_16(inputs,
97 | num_classes=1000,
98 | is_training=False,
99 | dropout_keep_prob=0.5,
100 | spatial_squeeze=True,
101 | scope='vgg_16',
102 | fc_conv_padding='VALID',
103 | global_pool=False,
104 | reuse=False):
105 | """Oxford Net VGG 16-Layers version D Example.
106 | Note: All the fully_connected layers have been transformed to conv2d layers.
107 | To use in classification mode, resize input to 224x224.
108 | Args:
109 | inputs: a tensor of size [batch_size, height, width, channels].
110 | num_classes: number of predicted classes. If 0 or None, the logits layer is
111 | omitted and the input features to the logits layer are returned instead.
112 | is_training: whether or not the model is being trained.
113 | dropout_keep_prob: the probability that activations are kept in the dropout
114 | layers during training.
115 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the
116 | outputs. Useful to remove unnecessary dimensions for classification.
117 | scope: Optional scope for the variables.
118 | fc_conv_padding: the type of padding to use for the fully connected layer
119 | that is implemented as a convolutional layer. Use 'SAME' padding if you
120 | are applying the network in a fully convolutional manner and want to
121 | get a prediction map downsampled by a factor of 32 as an output.
122 | Otherwise, the output prediction map will be (input / 32) - 6 in case of
123 | 'VALID' padding.
124 | global_pool: Optional boolean flag. If True, the input to the classification
125 | layer is avgpooled to size 1x1, for any input size. (This is not part
126 | of the original VGG architecture.)
127 | Returns:
128 | net: the output of the logits layer (if num_classes is a non-zero integer),
129 | or the input to the logits layer (if num_classes is 0 or None).
130 | end_points: a dict of tensors with intermediate activations.
131 | """
132 | with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc:
133 | out = []
134 | end_points_collection = sc.original_name_scope + '_end_points'
135 | # Collect outputs for conv2d, fully_connected and max_pool2d.
136 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
137 | outputs_collections=end_points_collection):
138 | net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
139 |
140 | # with tf.variable_scope('relu1'):
141 | out1 = net
142 |
143 | net = slim.max_pool2d(net, [2, 2], scope='pool1')
144 | net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
145 |
146 | # with tf.variable_scope('relu2'):
147 | # out = tf.add(net, tf.zeros_like(net), name='conv2_2')
148 | out2 = net
149 |
150 | net = slim.max_pool2d(net, [2, 2], scope='pool2')
151 | net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
152 |
153 | # with tf.variable_scope('relu3'):
154 | out3 = net
155 |
156 | net = slim.max_pool2d(net, [2, 2], scope='pool3')
157 | net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
158 |
159 | out4 = net
160 | exclude = ['vgg_16/fc6', 'vgg_16/pool4','vgg_16/conv5','vgg_16/pool5','vgg_16/fc7','vgg_16/global_pool','vgg_16/fc8/squeezed','vgg_16/fc8']
161 |
162 | return out1, out2, out3, out4, exclude
163 | vgg_16.default_image_size = 224
164 |
--------------------------------------------------------------------------------