├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── cog.yaml ├── docs ├── index.html └── static │ ├── css │ ├── bulma-carousel.min.css │ ├── bulma-slider.min.css │ ├── bulma.css.map.txt │ ├── bulma.min.css │ ├── fontawesome.all.min.css │ └── index.css │ ├── images │ ├── pipeline.png │ └── teaser.png │ ├── js │ ├── bulma-carousel.js │ ├── bulma-carousel.min.js │ ├── bulma-slider.js │ ├── bulma-slider.min.js │ ├── fontawesome.all.min.js │ └── index.js │ ├── pdfs │ └── sample.pdf │ └── videos │ ├── Ablation.mp4 │ ├── Comparison.mp4 │ └── Results_in_the_wild.mp4 ├── examples ├── audio │ ├── 1.wav │ └── 2.wav └── face │ ├── 1.mp4 │ ├── 2.mp4 │ ├── 3.mp4 │ ├── 4.mp4 │ └── 5.mp4 ├── inference.py ├── inference_videoretalking.sh ├── models ├── DNet.py ├── ENet.py ├── LNet.py ├── __init__.py ├── base_blocks.py ├── ffc.py └── transformer.py ├── predict.py ├── quick_demo.ipynb ├── requirements.txt ├── third_part ├── GFPGAN │ ├── LICENSE │ ├── gfpgan │ │ ├── __init__.py │ │ ├── archs │ │ │ ├── __init__.py │ │ │ ├── arcface_arch.py │ │ │ ├── gfpgan_bilinear_arch.py │ │ │ ├── gfpganv1_arch.py │ │ │ ├── gfpganv1_clean_arch.py │ │ │ ├── stylegan2_bilinear_arch.py │ │ │ └── stylegan2_clean_arch.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ └── ffhq_degradation_dataset.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── gfpgan_model.py │ │ ├── train.py │ │ ├── utils.py │ │ ├── version.py │ │ └── weights │ │ │ └── README.md │ └── options │ │ ├── train_gfpgan_v1.yml │ │ └── train_gfpgan_v1_simple.yml ├── GPEN │ ├── align_faces.py │ ├── face_detect │ │ ├── .DS_Store │ │ ├── data │ │ │ ├── FDDB │ │ │ │ └── img_list.txt │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── data_augment.py │ │ │ └── wider_face.py │ │ ├── facemodels │ │ │ ├── __init__.py │ │ │ ├── net.py │ │ │ └── retinaface.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── functions │ │ │ │ └── prior_box.py │ │ │ └── modules │ │ │ │ ├── __init__.py │ │ │ │ └── multibox_loss.py │ │ ├── retinaface_detection.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── box_utils.py │ │ │ ├── nms │ │ │ ├── __init__.py │ │ │ └── py_cpu_nms.py │ │ │ └── timer.py │ ├── face_model │ │ ├── face_gan.py │ │ ├── gpen_model.py │ │ └── op │ │ │ ├── __init__.py │ │ │ ├── fused_act.py │ │ │ ├── fused_bias_act.cpp │ │ │ ├── fused_bias_act_kernel.cu │ │ │ ├── upfirdn2d.cpp │ │ │ ├── upfirdn2d.py │ │ │ └── upfirdn2d_kernel.cu │ ├── face_morpher │ │ ├── .gitignore │ │ ├── README.rst │ │ ├── facemorpher │ │ │ ├── __init__.py │ │ │ ├── aligner.py │ │ │ ├── averager.py │ │ │ ├── blender.py │ │ │ ├── locator.py │ │ │ ├── morpher.py │ │ │ ├── plotter.py │ │ │ ├── videoer.py │ │ │ └── warper.py │ │ ├── requirements.txt │ │ ├── scripts │ │ │ ├── make_docs.sh │ │ │ └── publish_ghpages.sh │ │ ├── setup.cfg │ │ └── setup.py │ ├── face_parse │ │ ├── blocks.py │ │ ├── face_parsing.py │ │ ├── mask.png │ │ ├── model.py │ │ ├── parse_model.py │ │ ├── resnet.py │ │ └── test.png │ └── gpen_face_enhancer.py ├── face3d │ ├── checkpoints │ │ └── model_name │ │ │ └── test_opt.txt │ ├── coeff_detector.py │ ├── data │ │ ├── __init__.py │ │ ├── base_dataset.py │ │ ├── flist_dataset.py │ │ ├── image_folder.py │ │ └── template_dataset.py │ ├── data_preparation.py │ ├── extract_kp_videos.py │ ├── face_recon_videos.py │ ├── models │ │ ├── __init__.py │ │ ├── arcface_torch │ │ │ ├── README.md │ │ │ ├── backbones │ │ │ │ ├── __init__.py │ │ │ │ ├── iresnet.py │ │ │ │ ├── iresnet2060.py │ │ │ │ └── mobilefacenet.py │ │ │ ├── configs │ │ │ │ ├── 3millions.py │ │ │ │ ├── 3millions_pfc.py │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── glint360k_mbf.py │ │ │ │ ├── glint360k_r100.py │ │ │ │ ├── glint360k_r18.py │ │ │ │ ├── glint360k_r34.py │ │ │ │ ├── glint360k_r50.py │ │ │ │ ├── ms1mv3_mbf.py │ │ │ │ ├── ms1mv3_r18.py │ │ │ │ ├── ms1mv3_r2060.py │ │ │ │ ├── ms1mv3_r34.py │ │ │ │ ├── ms1mv3_r50.py │ │ │ │ └── speed.py │ │ │ ├── dataset.py │ │ │ ├── docs │ │ │ │ ├── eval.md │ │ │ │ ├── install.md │ │ │ │ ├── modelzoo.md │ │ │ │ └── speed_benchmark.md │ │ │ ├── eval │ │ │ │ ├── __init__.py │ │ │ │ └── verification.py │ │ │ ├── eval_ijbc.py │ │ │ ├── inference.py │ │ │ ├── losses.py │ │ │ ├── onnx_helper.py │ │ │ ├── onnx_ijbc.py │ │ │ ├── partial_fc.py │ │ │ ├── requirement.txt │ │ │ ├── run.sh │ │ │ ├── torch2onnx.py │ │ │ ├── train.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── plot.py │ │ │ │ ├── utils_amp.py │ │ │ │ ├── utils_callbacks.py │ │ │ │ ├── utils_config.py │ │ │ │ ├── utils_logging.py │ │ │ │ └── utils_os.py │ │ ├── base_model.py │ │ ├── bfm.py │ │ ├── facerecon_model.py │ │ ├── losses.py │ │ ├── networks.py │ │ └── template_model.py │ ├── options │ │ ├── __init__.py │ │ ├── base_options.py │ │ ├── inference_options.py │ │ ├── test_options.py │ │ └── train_options.py │ └── util │ │ ├── BBRegressorParam_r.mat │ │ ├── __init__.py │ │ ├── detect_lm68.py │ │ ├── generate_list.py │ │ ├── html.py │ │ ├── load_mats.py │ │ ├── nvdiffrast.py │ │ ├── preprocess.py │ │ ├── skin_mask.py │ │ ├── test_mean_face.txt │ │ ├── util.py │ │ └── visualizer.py ├── face_detection │ ├── README.md │ ├── __init__.py │ ├── api.py │ ├── detection │ │ ├── __init__.py │ │ ├── core.py │ │ └── sfd │ │ │ ├── __init__.py │ │ │ ├── bbox.py │ │ │ ├── detect.py │ │ │ ├── net_s3fd.py │ │ │ └── sfd_detector.py │ ├── models.py │ └── utils.py └── ganimation_replicate │ ├── LICENSE │ ├── checkpoints │ ├── opt.txt │ └── run_script.sh │ ├── ckpts │ ├── ganimation │ │ ├── 220419_183211 │ │ │ ├── opt.txt │ │ │ └── run_script.sh │ │ └── 220419_183229 │ │ │ ├── opt.txt │ │ │ └── run_script.sh │ ├── opt.txt │ └── run_script.sh │ ├── data │ ├── __init__.py │ ├── base_dataset.py │ ├── celeba.py │ └── data_loader.py │ ├── main.py │ ├── model │ ├── __init__.py │ ├── base_model.py │ ├── ganimation.py │ ├── model_utils.py │ └── stargan.py │ ├── options.py │ ├── solvers.py │ └── visualizer.py ├── utils ├── alignment_stit.py ├── audio.py ├── ffhq_preprocess.py ├── flow_util.py ├── hparams.py └── inference_utils.py └── webUI.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pkl 2 | *.jpg 3 | *.pth 4 | *.pyc 5 | __pycache__ 6 | *.h5 7 | *.pyc 8 | *.mkv 9 | *.gif 10 | *.webm 11 | checkpoints/* 12 | results/* 13 | temp/* 14 | segments.txt 15 | .DS_Store 16 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | - Using welcoming and inclusive language 12 | - Being respectful of differing viewpoints and experiences 13 | - Gracefully accepting constructive criticism 14 | - Focusing on what is best for the community 15 | - Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | - The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | - Trolling, insulting/derogatory comments, and personal or political attacks 21 | - Public or private harassment 22 | - Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | - Other conduct that could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned with this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project email address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the Contributor Covenant, version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html). 44 | -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- 1 | # Configuration for Cog ⚙️ 2 | # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md 3 | 4 | build: 5 | gpu: true 6 | system_packages: 7 | - "libgl1-mesa-glx" 8 | - "libglib2.0-0" 9 | - "ffmpeg" 10 | python_version: "3.11" 11 | python_packages: 12 | - "torch==2.0.1" 13 | - "torchvision==0.15.2" 14 | - "basicsr==1.4.2" 15 | - "kornia==0.5.1" 16 | - "face-alignment==1.3.4" 17 | - "ninja==1.10.2.3" 18 | - "einops==0.4.1" 19 | - "facexlib==0.2.5" 20 | - "librosa==0.9.2" 21 | - "cmake==3.27.7" 22 | - "numpy==1.23.4" 23 | run: 24 | - pip install dlib 25 | - mkdir -p /root/.pyenv/versions/3.11.6/lib/python3.11/site-packages/facexlib/weights/ && wget --output-document "/root/.pyenv/versions/3.11.6/lib/python3.11/site-packages/facexlib/weights/detection_Resnet50_Final.pth" "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" 26 | - mkdir -p /root/.pyenv/versions/3.11.6/lib/python3.11/site-packages/facexlib/weights/ && wget --output-document "/root/.pyenv/versions/3.11.6/lib/python3.11/site-packages/facexlib/weights/parsing_parsenet.pth" "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" 27 | - mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/s3fd-619a316812.pth" "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" 28 | - mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip" "https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip" 29 | predict: "predict.py:Predictor" 30 | -------------------------------------------------------------------------------- /docs/static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /docs/static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | .hero-body-img{ 6 | text-align: center; 7 | } 8 | 9 | .footer .icon-link { 10 | font-size: 25px; 11 | color: #000; 12 | } 13 | 14 | .link-block a { 15 | margin-top: 5px; 16 | margin-bottom: 5px; 17 | } 18 | 19 | .dnerf { 20 | font-variant: small-caps; 21 | } 22 | 23 | 24 | .teaser .hero-body { 25 | padding-top: 0; 26 | padding-bottom: 3rem; 27 | } 28 | 29 | .teaser { 30 | font-family: 'Google Sans', sans-serif; 31 | } 32 | 33 | 34 | .publication-title { 35 | } 36 | 37 | .publication-banner { 38 | max-height: parent; 39 | 40 | } 41 | 42 | .publication-banner video { 43 | position: relative; 44 | left: auto; 45 | top: auto; 46 | transform: none; 47 | object-fit: fit; 48 | } 49 | 50 | .publication-header .hero-body { 51 | } 52 | 53 | .publication-title { 54 | font-family: 'Google Sans', sans-serif; 55 | } 56 | 57 | .publication-authors { 58 | font-family: 'Google Sans', sans-serif; 59 | } 60 | 61 | .publication-venue { 62 | color: #555; 63 | width: fit-content; 64 | font-weight: bold; 65 | } 66 | 67 | .publication-awards { 68 | color: #ff3860; 69 | width: fit-content; 70 | font-weight: bolder; 71 | } 72 | 73 | .publication-authors { 74 | } 75 | 76 | .author-block { 77 | font-size: 16px; 78 | padding: 0 5px; 79 | display: inline-block; 80 | } 81 | 82 | .publication-banner img { 83 | } 84 | 85 | .publication-authors { 86 | /*color: #4286f4;*/ 87 | } 88 | 89 | .publication-video { 90 | position: relative; 91 | width: 100%; 92 | height: 0; 93 | padding-bottom: 56.25%; 94 | 95 | overflow: hidden; 96 | border-radius: 10px !important; 97 | } 98 | 99 | .publication-video iframe { 100 | position: absolute; 101 | top: 0; 102 | left: 0; 103 | width: 100%; 104 | height: 100%; 105 | } 106 | 107 | .publication-body img { 108 | } 109 | 110 | .results-carousel { 111 | overflow: hidden; 112 | } 113 | 114 | .results-carousel .item { 115 | margin: 5px; 116 | overflow: hidden; 117 | padding: 20px; 118 | font-size: 0; 119 | } 120 | 121 | .results-carousel video { 122 | margin: 0; 123 | } 124 | 125 | .slider-pagination .slider-page { 126 | background: #000000; 127 | } 128 | 129 | .eql-cntrb { 130 | font-size: smaller; 131 | } 132 | 133 | 134 | 135 | 136 | body{ 137 | font-weight: 200; 138 | font-size: 16px; 139 | /*background-color: rgb(43, 60, 197);*/ 140 | /*color: rgb(0, 79, 241);*/ 141 | /*color: white;*/ 142 | border-top:5px solid rgb(255, 180, 240); 143 | /*border-bottom:5px solid orange;*/ 144 | } 145 | b{ 146 | color:rgb(0, 79, 241); 147 | } 148 | 149 | .title{ 150 | text-align: center; 151 | } 152 | 153 | .posts{ 154 | /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ 155 | font-size: 14px; 156 | 157 | } 158 | .news{ 159 | line-height: 1.5em; 160 | } 161 | .post{ 162 | border-left: 5px solid rgb(255, 180, 240); 163 | } 164 | .xtitle{ 165 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; 166 | font-size: 30px; 167 | text-align: center; 168 | /* margin: 10px 0; */ 169 | /* font-weight: 400; */ 170 | /*color: rgb(0, 79, 241);*/ 171 | } 172 | 173 | .posts .teaser{ 174 | width: 160px; 175 | height: 120px; 176 | float: left; 177 | margin: 0 0 10px 10px; 178 | } 179 | 180 | .link-block{ 181 | margin: 0 10px; 182 | } 183 | 184 | a{ 185 | color: #111; 186 | position: relative; 187 | } 188 | a:after{ 189 | content: ''; 190 | position: absolute; 191 | top: 60%; 192 | left: -0.1em; 193 | right: -0.1em; 194 | bottom: 0; 195 | transition:top 200ms cubic-bezier(0, 0.8, 0.13, 1); 196 | /*background-color: rgba(225, 166, 121, 0.5);*/ 197 | background-color: rgba(255,211,30, 0.4); 198 | } 199 | .emojilink:after{ 200 | background-color: rgba(255,211,30, 0.0) 201 | /*rgba(225, 166, 121, 0.0);*/ 202 | } 203 | a:hover{ 204 | color: black; 205 | text-decoration: none; 206 | } 207 | a:hover:after{ 208 | color:black; 209 | /*color: #111;*/ 210 | text-decoration: none; 211 | top:0%; 212 | } 213 | .entry{ 214 | position: relative; 215 | top:0; 216 | left: 20px; 217 | margin-top: 5px; 218 | } 219 | 220 | .posts > .post{ 221 | border-bottom: 0; 222 | padding-bottom:0em; 223 | padding-top: 10px; 224 | margin-bottom: 5px; 225 | } 226 | .papertitle{ 227 | margin-top: 0px; 228 | font-weight: 600; 229 | font-size:16px; 230 | font-style: italic; 231 | /*font-style: italic;*/ 232 | /*height: 2.6em*/ 233 | } 234 | -------------------------------------------------------------------------------- /docs/static/images/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/docs/static/images/pipeline.png -------------------------------------------------------------------------------- /docs/static/images/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/docs/static/images/teaser.png -------------------------------------------------------------------------------- /docs/static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | 4 | $(document).ready(function() { 5 | // Check for click events on the navbar burger icon 6 | 7 | var options = { 8 | slidesToScroll: 1, 9 | slidesToShow: 1, 10 | loop: true, 11 | infinite: true, 12 | autoplay: true, 13 | autoplaySpeed: 5000, 14 | } 15 | 16 | // Initialize all div with carousel class 17 | var carousels = bulmaCarousel.attach('.carousel', options); 18 | 19 | bulmaSlider.attach(); 20 | 21 | }) 22 | -------------------------------------------------------------------------------- /docs/static/pdfs/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/docs/static/pdfs/sample.pdf -------------------------------------------------------------------------------- /docs/static/videos/Ablation.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/docs/static/videos/Ablation.mp4 -------------------------------------------------------------------------------- /docs/static/videos/Comparison.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/docs/static/videos/Comparison.mp4 -------------------------------------------------------------------------------- /docs/static/videos/Results_in_the_wild.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/docs/static/videos/Results_in_the_wild.mp4 -------------------------------------------------------------------------------- /examples/audio/1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/audio/1.wav -------------------------------------------------------------------------------- /examples/audio/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/audio/2.wav -------------------------------------------------------------------------------- /examples/face/1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/face/1.mp4 -------------------------------------------------------------------------------- /examples/face/2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/face/2.mp4 -------------------------------------------------------------------------------- /examples/face/3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/face/3.mp4 -------------------------------------------------------------------------------- /examples/face/4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/face/4.mp4 -------------------------------------------------------------------------------- /examples/face/5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/examples/face/5.mp4 -------------------------------------------------------------------------------- /inference_videoretalking.sh: -------------------------------------------------------------------------------- 1 | python3 inference.py \ 2 | --face ./examples/face/1.mp4 \ 3 | --audio ./examples/audio/1.wav \ 4 | --outfile results/1_1.mp4 -------------------------------------------------------------------------------- /models/DNet.py: -------------------------------------------------------------------------------- 1 | # TODO 2 | import functools 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from utils import flow_util 10 | from models.base_blocks import LayerNorm2d, ADAINHourglass, FineEncoder, FineDecoder 11 | 12 | # DNet 13 | class DNet(nn.Module): 14 | def __init__(self): 15 | super(DNet, self).__init__() 16 | self.mapping_net = MappingNet() 17 | self.warpping_net = WarpingNet() 18 | self.editing_net = EditingNet() 19 | 20 | def forward(self, input_image, driving_source, stage=None): 21 | if stage == 'warp': 22 | descriptor = self.mapping_net(driving_source) 23 | output = self.warpping_net(input_image, descriptor) 24 | else: 25 | descriptor = self.mapping_net(driving_source) 26 | output = self.warpping_net(input_image, descriptor) 27 | output['fake_image'] = self.editing_net(input_image, output['warp_image'], descriptor) 28 | return output 29 | 30 | class MappingNet(nn.Module): 31 | def __init__(self, coeff_nc=73, descriptor_nc=256, layer=3): 32 | super( MappingNet, self).__init__() 33 | 34 | self.layer = layer 35 | nonlinearity = nn.LeakyReLU(0.1) 36 | 37 | self.first = nn.Sequential( 38 | torch.nn.Conv1d(coeff_nc, descriptor_nc, kernel_size=7, padding=0, bias=True)) 39 | 40 | for i in range(layer): 41 | net = nn.Sequential(nonlinearity, 42 | torch.nn.Conv1d(descriptor_nc, descriptor_nc, kernel_size=3, padding=0, dilation=3)) 43 | setattr(self, 'encoder' + str(i), net) 44 | 45 | self.pooling = nn.AdaptiveAvgPool1d(1) 46 | self.output_nc = descriptor_nc 47 | 48 | def forward(self, input_3dmm): 49 | out = self.first(input_3dmm) 50 | for i in range(self.layer): 51 | model = getattr(self, 'encoder' + str(i)) 52 | out = model(out) + out[:,:,3:-3] 53 | out = self.pooling(out) 54 | return out 55 | 56 | class WarpingNet(nn.Module): 57 | def __init__( 58 | self, 59 | image_nc=3, 60 | descriptor_nc=256, 61 | base_nc=32, 62 | max_nc=256, 63 | encoder_layer=5, 64 | decoder_layer=3, 65 | use_spect=False 66 | ): 67 | super( WarpingNet, self).__init__() 68 | 69 | nonlinearity = nn.LeakyReLU(0.1) 70 | norm_layer = functools.partial(LayerNorm2d, affine=True) 71 | kwargs = {'nonlinearity':nonlinearity, 'use_spect':use_spect} 72 | 73 | self.descriptor_nc = descriptor_nc 74 | self.hourglass = ADAINHourglass(image_nc, self.descriptor_nc, base_nc, 75 | max_nc, encoder_layer, decoder_layer, **kwargs) 76 | 77 | self.flow_out = nn.Sequential(norm_layer(self.hourglass.output_nc), 78 | nonlinearity, 79 | nn.Conv2d(self.hourglass.output_nc, 2, kernel_size=7, stride=1, padding=3)) 80 | 81 | self.pool = nn.AdaptiveAvgPool2d(1) 82 | 83 | def forward(self, input_image, descriptor): 84 | final_output={} 85 | output = self.hourglass(input_image, descriptor) 86 | final_output['flow_field'] = self.flow_out(output) 87 | 88 | deformation = flow_util.convert_flow_to_deformation(final_output['flow_field']) 89 | final_output['warp_image'] = flow_util.warp_image(input_image, deformation) 90 | return final_output 91 | 92 | 93 | class EditingNet(nn.Module): 94 | def __init__( 95 | self, 96 | image_nc=3, 97 | descriptor_nc=256, 98 | layer=3, 99 | base_nc=64, 100 | max_nc=256, 101 | num_res_blocks=2, 102 | use_spect=False): 103 | super(EditingNet, self).__init__() 104 | 105 | nonlinearity = nn.LeakyReLU(0.1) 106 | norm_layer = functools.partial(LayerNorm2d, affine=True) 107 | kwargs = {'norm_layer':norm_layer, 'nonlinearity':nonlinearity, 'use_spect':use_spect} 108 | self.descriptor_nc = descriptor_nc 109 | 110 | # encoder part 111 | self.encoder = FineEncoder(image_nc*2, base_nc, max_nc, layer, **kwargs) 112 | self.decoder = FineDecoder(image_nc, self.descriptor_nc, base_nc, max_nc, layer, num_res_blocks, **kwargs) 113 | 114 | def forward(self, input_image, warp_image, descriptor): 115 | x = torch.cat([input_image, warp_image], 1) 116 | x = self.encoder(x) 117 | gen_image = self.decoder(x, descriptor) 118 | return gen_image 119 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from models.DNet import DNet 3 | from models.LNet import LNet 4 | from models.ENet import ENet 5 | 6 | 7 | def _load(checkpoint_path): 8 | map_location=None if torch.cuda.is_available() else torch.device('cpu') 9 | checkpoint = torch.load(checkpoint_path, map_location=map_location) 10 | return checkpoint 11 | 12 | def load_checkpoint(path, model): 13 | print("Load checkpoint from: {}".format(path)) 14 | checkpoint = _load(path) 15 | s = checkpoint["state_dict"] if 'arcface' not in path else checkpoint 16 | new_s = {} 17 | for k, v in s.items(): 18 | if 'low_res' in k: 19 | continue 20 | else: 21 | new_s[k.replace('module.', '')] = v 22 | model.load_state_dict(new_s, strict=False) 23 | return model 24 | 25 | def load_network(args): 26 | L_net = LNet() 27 | L_net = load_checkpoint(args.LNet_path, L_net) 28 | E_net = ENet(lnet=L_net) 29 | model = load_checkpoint(args.ENet_path, E_net) 30 | return model.eval() 31 | 32 | def load_DNet(args): 33 | D_Net = DNet() 34 | print("Load checkpoint from: {}".format(args.DNet_path)) 35 | checkpoint = torch.load(args.DNet_path, map_location=lambda storage, loc: storage) 36 | D_Net.load_state_dict(checkpoint['net_G_ema'], strict=False) 37 | return D_Net.eval() -------------------------------------------------------------------------------- /models/transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from einops import rearrange 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import numpy as np 9 | 10 | 11 | class GELU(nn.Module): 12 | def __init__(self): 13 | super(GELU, self).__init__() 14 | def forward(self, x): 15 | return 0.5*x*(1+F.tanh(np.sqrt(2/np.pi)*(x+0.044715*torch.pow(x,3)))) 16 | 17 | # helpers 18 | 19 | def pair(t): 20 | return t if isinstance(t, tuple) else (t, t) 21 | 22 | # classes 23 | 24 | class PreNorm(nn.Module): 25 | def __init__(self, dim, fn): 26 | super().__init__() 27 | self.norm = nn.LayerNorm(dim) 28 | self.fn = fn 29 | def forward(self, x, **kwargs): 30 | return self.fn(self.norm(x), **kwargs) 31 | 32 | class DualPreNorm(nn.Module): 33 | def __init__(self, dim, fn): 34 | super().__init__() 35 | self.normx = nn.LayerNorm(dim) 36 | self.normy = nn.LayerNorm(dim) 37 | self.fn = fn 38 | def forward(self, x, y, **kwargs): 39 | return self.fn(self.normx(x), self.normy(y), **kwargs) 40 | 41 | class FeedForward(nn.Module): 42 | def __init__(self, dim, hidden_dim, dropout = 0.): 43 | super().__init__() 44 | self.net = nn.Sequential( 45 | nn.Linear(dim, hidden_dim), 46 | GELU(), 47 | nn.Dropout(dropout), 48 | nn.Linear(hidden_dim, dim), 49 | nn.Dropout(dropout) 50 | ) 51 | def forward(self, x): 52 | return self.net(x) 53 | 54 | class Attention(nn.Module): 55 | def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.): 56 | super().__init__() 57 | inner_dim = dim_head * heads 58 | project_out = not (heads == 1 and dim_head == dim) 59 | 60 | self.heads = heads 61 | self.scale = dim_head ** -0.5 62 | 63 | self.attend = nn.Softmax(dim = -1) 64 | 65 | self.to_q = nn.Linear(dim, inner_dim, bias = False) 66 | self.to_k = nn.Linear(dim, inner_dim, bias = False) 67 | self.to_v = nn.Linear(dim, inner_dim, bias = False) 68 | 69 | 70 | self.to_out = nn.Sequential( 71 | nn.Linear(inner_dim, dim), 72 | nn.Dropout(dropout) 73 | ) if project_out else nn.Identity() 74 | 75 | def forward(self, x, y): 76 | # qk = self.to_qk(x).chunk(2, dim = -1) # 77 | q = rearrange(self.to_q(x), 'b n (h d) -> b h n d', h = self.heads) # q,k from the zero feature 78 | k = rearrange(self.to_k(x), 'b n (h d) -> b h n d', h = self.heads) # v from the reference features 79 | v = rearrange(self.to_v(y), 'b n (h d) -> b h n d', h = self.heads) 80 | 81 | dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale 82 | 83 | attn = self.attend(dots) 84 | 85 | out = torch.matmul(attn, v) 86 | out = rearrange(out, 'b h n d -> b n (h d)') 87 | return self.to_out(out) 88 | 89 | class Transformer(nn.Module): 90 | def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.): 91 | super().__init__() 92 | self.layers = nn.ModuleList([]) 93 | for _ in range(depth): 94 | self.layers.append(nn.ModuleList([ 95 | DualPreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)), 96 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout)) 97 | ])) 98 | 99 | 100 | def forward(self, x, y): # x is the cropped, y is the foreign reference 101 | bs,c,h,w = x.size() 102 | 103 | # img to embedding 104 | x = x.view(bs,c,-1).permute(0,2,1) 105 | y = y.view(bs,c,-1).permute(0,2,1) 106 | 107 | for attn, ff in self.layers: 108 | x = attn(x, y) + x 109 | x = ff(x) + x 110 | 111 | x = x.view(bs,h,w,c).permute(0,3,1,2) 112 | return x 113 | 114 | class RETURNX(nn.Module): 115 | def __init__(self,): 116 | super().__init__() 117 | 118 | def forward(self, x, y): # x is the cropped, y is the foreign reference 119 | return x -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | basicsr==1.4.2 2 | kornia==0.5.1 3 | face-alignment==1.3.4 4 | ninja==1.10.2.3 5 | einops==0.4.1 6 | facexlib==0.2.5 7 | librosa==0.9.2 8 | dlib==19.24.0 9 | gradio>=3.7.0 10 | numpy==1.23.4 11 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | from .archs import * 4 | from .data import * 5 | from .models import * 6 | from .utils import * 7 | 8 | # from .version import * 9 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/archs/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from basicsr.utils import scandir 3 | from os import path as osp 4 | 5 | # automatically scan and import arch modules for registry 6 | # scan all the files that end with '_arch.py' under the archs folder 7 | arch_folder = osp.dirname(osp.abspath(__file__)) 8 | arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')] 9 | # import all the arch modules 10 | _arch_modules = [importlib.import_module(f'gfpgan.archs.{file_name}') for file_name in arch_filenames] 11 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/data/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from basicsr.utils import scandir 3 | from os import path as osp 4 | 5 | # automatically scan and import dataset modules for registry 6 | # scan all the files that end with '_dataset.py' under the data folder 7 | data_folder = osp.dirname(osp.abspath(__file__)) 8 | dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')] 9 | # import all the dataset modules 10 | _dataset_modules = [importlib.import_module(f'gfpgan.data.{file_name}') for file_name in dataset_filenames] 11 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/models/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from basicsr.utils import scandir 3 | from os import path as osp 4 | 5 | # automatically scan and import model modules for registry 6 | # scan all the files that end with '_model.py' under the model folder 7 | model_folder = osp.dirname(osp.abspath(__file__)) 8 | model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')] 9 | # import all the model modules 10 | _model_modules = [importlib.import_module(f'gfpgan.models.{file_name}') for file_name in model_filenames] 11 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/train.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import os.path as osp 3 | from basicsr.train import train_pipeline 4 | 5 | import gfpgan.archs 6 | import gfpgan.data 7 | import gfpgan.models 8 | 9 | if __name__ == '__main__': 10 | root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir)) 11 | train_pipeline(root_path) 12 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | # TIME: Wed Apr 20 14:43:06 2022 3 | __version__ = '1.3.2' 4 | __gitsha__ = '924ce47' 5 | version_info = (1, 3, 2) 6 | -------------------------------------------------------------------------------- /third_part/GFPGAN/gfpgan/weights/README.md: -------------------------------------------------------------------------------- 1 | # Weights 2 | 3 | Put the downloaded weights to this folder. 4 | -------------------------------------------------------------------------------- /third_part/GFPGAN/options/train_gfpgan_v1_simple.yml: -------------------------------------------------------------------------------- 1 | # general settings 2 | name: train_GFPGANv1_512_simple 3 | model_type: GFPGANModel 4 | num_gpu: auto # officially, we use 4 GPUs 5 | manual_seed: 0 6 | 7 | # dataset and data loader settings 8 | datasets: 9 | train: 10 | name: FFHQ 11 | type: FFHQDegradationDataset 12 | # dataroot_gt: datasets/ffhq/ffhq_512.lmdb 13 | dataroot_gt: datasets/ffhq/ffhq_512 14 | io_backend: 15 | # type: lmdb 16 | type: disk 17 | 18 | use_hflip: true 19 | mean: [0.5, 0.5, 0.5] 20 | std: [0.5, 0.5, 0.5] 21 | out_size: 512 22 | 23 | blur_kernel_size: 41 24 | kernel_list: ['iso', 'aniso'] 25 | kernel_prob: [0.5, 0.5] 26 | blur_sigma: [0.1, 10] 27 | downsample_range: [0.8, 8] 28 | noise_range: [0, 20] 29 | jpeg_range: [60, 100] 30 | 31 | # color jitter and gray 32 | color_jitter_prob: 0.3 33 | color_jitter_shift: 20 34 | color_jitter_pt_prob: 0.3 35 | gray_prob: 0.01 36 | 37 | # If you do not want colorization, please set 38 | # color_jitter_prob: ~ 39 | # color_jitter_pt_prob: ~ 40 | # gray_prob: 0.01 41 | # gt_gray: True 42 | 43 | # data loader 44 | use_shuffle: true 45 | num_worker_per_gpu: 6 46 | batch_size_per_gpu: 3 47 | dataset_enlarge_ratio: 1 48 | prefetch_mode: ~ 49 | 50 | val: 51 | # Please modify accordingly to use your own validation 52 | # Or comment the val block if do not need validation during training 53 | name: validation 54 | type: PairedImageDataset 55 | dataroot_lq: datasets/faces/validation/input 56 | dataroot_gt: datasets/faces/validation/reference 57 | io_backend: 58 | type: disk 59 | mean: [0.5, 0.5, 0.5] 60 | std: [0.5, 0.5, 0.5] 61 | scale: 1 62 | 63 | # network structures 64 | network_g: 65 | type: GFPGANv1 66 | out_size: 512 67 | num_style_feat: 512 68 | channel_multiplier: 1 69 | resample_kernel: [1, 3, 3, 1] 70 | decoder_load_path: experiments/pretrained_models/StyleGAN2_512_Cmul1_FFHQ_B12G4_scratch_800k.pth 71 | fix_decoder: true 72 | num_mlp: 8 73 | lr_mlp: 0.01 74 | input_is_latent: true 75 | different_w: true 76 | narrow: 1 77 | sft_half: true 78 | 79 | network_d: 80 | type: StyleGAN2Discriminator 81 | out_size: 512 82 | channel_multiplier: 1 83 | resample_kernel: [1, 3, 3, 1] 84 | 85 | 86 | # path 87 | path: 88 | pretrain_network_g: ~ 89 | param_key_g: params_ema 90 | strict_load_g: ~ 91 | pretrain_network_d: ~ 92 | resume_state: ~ 93 | 94 | # training settings 95 | train: 96 | optim_g: 97 | type: Adam 98 | lr: !!float 2e-3 99 | optim_d: 100 | type: Adam 101 | lr: !!float 2e-3 102 | optim_component: 103 | type: Adam 104 | lr: !!float 2e-3 105 | 106 | scheduler: 107 | type: MultiStepLR 108 | milestones: [600000, 700000] 109 | gamma: 0.5 110 | 111 | total_iter: 800000 112 | warmup_iter: -1 # no warm up 113 | 114 | # losses 115 | # pixel loss 116 | pixel_opt: 117 | type: L1Loss 118 | loss_weight: !!float 1e-1 119 | reduction: mean 120 | # L1 loss used in pyramid loss, component style loss and identity loss 121 | L1_opt: 122 | type: L1Loss 123 | loss_weight: 1 124 | reduction: mean 125 | 126 | # image pyramid loss 127 | pyramid_loss_weight: 1 128 | remove_pyramid_loss: 50000 129 | # perceptual loss (content and style losses) 130 | perceptual_opt: 131 | type: PerceptualLoss 132 | layer_weights: 133 | # before relu 134 | 'conv1_2': 0.1 135 | 'conv2_2': 0.1 136 | 'conv3_4': 1 137 | 'conv4_4': 1 138 | 'conv5_4': 1 139 | vgg_type: vgg19 140 | use_input_norm: true 141 | perceptual_weight: !!float 1 142 | style_weight: 50 143 | range_norm: true 144 | criterion: l1 145 | # gan loss 146 | gan_opt: 147 | type: GANLoss 148 | gan_type: wgan_softplus 149 | loss_weight: !!float 1e-1 150 | # r1 regularization for discriminator 151 | r1_reg_weight: 10 152 | 153 | net_d_iters: 1 154 | net_d_init_iters: 0 155 | net_d_reg_every: 16 156 | 157 | # validation settings 158 | val: 159 | val_freq: !!float 5e3 160 | save_img: true 161 | 162 | metrics: 163 | psnr: # metric name 164 | type: calculate_psnr 165 | crop_border: 0 166 | test_y_channel: false 167 | 168 | # logging settings 169 | logger: 170 | print_freq: 100 171 | save_checkpoint_freq: !!float 5e3 172 | use_tb_logger: true 173 | wandb: 174 | project: ~ 175 | resume_id: ~ 176 | 177 | # dist training settings 178 | dist_params: 179 | backend: nccl 180 | port: 29500 181 | 182 | find_unused_parameters: true 183 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/GPEN/face_detect/.DS_Store -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .wider_face import WiderFaceDetection, detection_collate 2 | from .data_augment import * 3 | from .config import * 4 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | cfg_mnet = { 4 | 'name': 'mobilenet0.25', 5 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 6 | 'steps': [8, 16, 32], 7 | 'variance': [0.1, 0.2], 8 | 'clip': False, 9 | 'loc_weight': 2.0, 10 | 'gpu_train': True, 11 | 'batch_size': 32, 12 | 'ngpu': 1, 13 | 'epoch': 250, 14 | 'decay1': 190, 15 | 'decay2': 220, 16 | 'image_size': 640, 17 | 'pretrain': False, 18 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 19 | 'in_channel': 32, 20 | 'out_channel': 64 21 | } 22 | 23 | cfg_re50 = { 24 | 'name': 'Resnet50', 25 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 26 | 'steps': [8, 16, 32], 27 | 'variance': [0.1, 0.2], 28 | 'clip': False, 29 | 'loc_weight': 2.0, 30 | 'gpu_train': True, 31 | 'batch_size': 24, 32 | 'ngpu': 4, 33 | 'epoch': 100, 34 | 'decay1': 70, 35 | 'decay2': 90, 36 | 'image_size': 840, 37 | 'pretrain': False, 38 | 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, 39 | 'in_channel': 256, 40 | 'out_channel': 256 41 | } 42 | 43 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/data/wider_face.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import torch 5 | import torch.utils.data as data 6 | import cv2 7 | import numpy as np 8 | 9 | class WiderFaceDetection(data.Dataset): 10 | def __init__(self, txt_path, preproc=None): 11 | self.preproc = preproc 12 | self.imgs_path = [] 13 | self.words = [] 14 | f = open(txt_path,'r') 15 | lines = f.readlines() 16 | isFirst = True 17 | labels = [] 18 | for line in lines: 19 | line = line.rstrip() 20 | if line.startswith('#'): 21 | if isFirst is True: 22 | isFirst = False 23 | else: 24 | labels_copy = labels.copy() 25 | self.words.append(labels_copy) 26 | labels.clear() 27 | path = line[2:] 28 | path = txt_path.replace('label.txt','images/') + path 29 | self.imgs_path.append(path) 30 | else: 31 | line = line.split(' ') 32 | label = [float(x) for x in line] 33 | labels.append(label) 34 | 35 | self.words.append(labels) 36 | 37 | def __len__(self): 38 | return len(self.imgs_path) 39 | 40 | def __getitem__(self, index): 41 | img = cv2.imread(self.imgs_path[index]) 42 | height, width, _ = img.shape 43 | 44 | labels = self.words[index] 45 | annotations = np.zeros((0, 15)) 46 | if len(labels) == 0: 47 | return annotations 48 | for idx, label in enumerate(labels): 49 | annotation = np.zeros((1, 15)) 50 | # bbox 51 | annotation[0, 0] = label[0] # x1 52 | annotation[0, 1] = label[1] # y1 53 | annotation[0, 2] = label[0] + label[2] # x2 54 | annotation[0, 3] = label[1] + label[3] # y2 55 | 56 | # landmarks 57 | annotation[0, 4] = label[4] # l0_x 58 | annotation[0, 5] = label[5] # l0_y 59 | annotation[0, 6] = label[7] # l1_x 60 | annotation[0, 7] = label[8] # l1_y 61 | annotation[0, 8] = label[10] # l2_x 62 | annotation[0, 9] = label[11] # l2_y 63 | annotation[0, 10] = label[13] # l3_x 64 | annotation[0, 11] = label[14] # l3_y 65 | annotation[0, 12] = label[16] # l4_x 66 | annotation[0, 13] = label[17] # l4_y 67 | if (annotation[0, 4]<0): 68 | annotation[0, 14] = -1 69 | else: 70 | annotation[0, 14] = 1 71 | 72 | annotations = np.append(annotations, annotation, axis=0) 73 | target = np.array(annotations) 74 | if self.preproc is not None: 75 | img, target = self.preproc(img, target) 76 | 77 | return torch.from_numpy(img), target 78 | 79 | def detection_collate(batch): 80 | """Custom collate fn for dealing with batches of images that have a different 81 | number of associated object annotations (bounding boxes). 82 | 83 | Arguments: 84 | batch: (tuple) A tuple of tensor images and lists of annotations 85 | 86 | Return: 87 | A tuple containing: 88 | 1) (tensor) batch of images stacked on their 0 dim 89 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 90 | """ 91 | targets = [] 92 | imgs = [] 93 | for _, sample in enumerate(batch): 94 | for _, tup in enumerate(sample): 95 | if torch.is_tensor(tup): 96 | imgs.append(tup) 97 | elif isinstance(tup, type(np.empty(0))): 98 | annos = torch.from_numpy(tup).float() 99 | targets.append(annos) 100 | 101 | return (torch.stack(imgs, 0), targets) 102 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/facemodels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/GPEN/face_detect/facemodels/__init__.py -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from itertools import product as product 3 | import numpy as np 4 | from math import ceil 5 | 6 | 7 | class PriorBox(object): 8 | def __init__(self, cfg, image_size=None, phase='train'): 9 | super(PriorBox, self).__init__() 10 | self.min_sizes = cfg['min_sizes'] 11 | self.steps = cfg['steps'] 12 | self.clip = cfg['clip'] 13 | self.image_size = image_size 14 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] 15 | self.name = "s" 16 | 17 | def forward(self): 18 | anchors = [] 19 | for k, f in enumerate(self.feature_maps): 20 | min_sizes = self.min_sizes[k] 21 | for i, j in product(range(f[0]), range(f[1])): 22 | for min_size in min_sizes: 23 | s_kx = min_size / self.image_size[1] 24 | s_ky = min_size / self.image_size[0] 25 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] 26 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] 27 | for cy, cx in product(dense_cy, dense_cx): 28 | anchors += [cx, cy, s_kx, s_ky] 29 | 30 | # back to torch land 31 | output = torch.Tensor(anchors).view(-1, 4) 32 | if self.clip: 33 | output.clamp_(max=1, min=0) 34 | return output 35 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .multibox_loss import MultiBoxLoss 2 | 3 | __all__ = ['MultiBoxLoss'] 4 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/GPEN/face_detect/utils/__init__.py -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/GPEN/face_detect/utils/nms/__init__.py -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /third_part/GPEN/face_detect/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /third_part/GPEN/face_model/face_gan.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @paper: GAN Prior Embedded Network for Blind Face Restoration in the Wild (CVPR2021) 3 | @author: yangxy (yangtao9009@gmail.com) 4 | ''' 5 | import torch 6 | import os 7 | import cv2 8 | import glob 9 | import numpy as np 10 | from torch import nn 11 | import torch.nn.functional as F 12 | from torchvision import transforms, utils 13 | from face_model.gpen_model import FullGenerator 14 | 15 | class FaceGAN(object): 16 | def __init__(self, base_dir='./', size=512, model=None, channel_multiplier=2, narrow=1, is_norm=True, device='cuda'): 17 | self.mfile = os.path.join(base_dir, model+'.pth') 18 | self.n_mlp = 8 19 | self.device = device 20 | self.is_norm = is_norm 21 | self.resolution = size 22 | self.load_model(channel_multiplier, narrow) 23 | 24 | def load_model(self, channel_multiplier=2, narrow=1): 25 | self.model = FullGenerator(self.resolution, 512, self.n_mlp, channel_multiplier, narrow=narrow, device=self.device) 26 | pretrained_dict = torch.load(self.mfile, map_location=torch.device('cpu')) 27 | self.model.load_state_dict(pretrained_dict) 28 | self.model.to(self.device) 29 | self.model.eval() 30 | 31 | def process(self, img): 32 | img = cv2.resize(img, (self.resolution, self.resolution)) 33 | img_t = self.img2tensor(img) 34 | 35 | with torch.no_grad(): 36 | out, __ = self.model(img_t) 37 | 38 | out = self.tensor2img(out) 39 | 40 | return out 41 | 42 | def img2tensor(self, img): 43 | img_t = torch.from_numpy(img).to(self.device)/255. 44 | if self.is_norm: 45 | img_t = (img_t - 0.5) / 0.5 46 | img_t = img_t.permute(2, 0, 1).unsqueeze(0).flip(1) # BGR->RGB 47 | return img_t 48 | 49 | def tensor2img(self, img_t, pmax=255.0, imtype=np.uint8): 50 | if self.is_norm: 51 | img_t = img_t * 0.5 + 0.5 52 | img_t = img_t.squeeze(0).permute(1, 2, 0).flip(2) # RGB->BGR 53 | img_np = np.clip(img_t.float().cpu().numpy(), 0, 1) * pmax 54 | 55 | return img_np.astype(imtype) 56 | -------------------------------------------------------------------------------- /third_part/GPEN/face_model/op/__init__.py: -------------------------------------------------------------------------------- 1 | from .fused_act import FusedLeakyReLU, fused_leaky_relu 2 | from .upfirdn2d import upfirdn2d 3 | -------------------------------------------------------------------------------- /third_part/GPEN/face_model/op/fused_act.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Function 8 | from torch.utils.cpp_extension import load, _import_module_from_library 9 | 10 | # if running GPEN without cuda, please comment line 11-19 11 | if platform.system() == 'Linux' and torch.cuda.is_available(): 12 | module_path = os.path.dirname(__file__) 13 | fused = load( 14 | 'fused', 15 | sources=[ 16 | os.path.join(module_path, 'fused_bias_act.cpp'), 17 | os.path.join(module_path, 'fused_bias_act_kernel.cu'), 18 | ], 19 | ) 20 | 21 | 22 | #fused = _import_module_from_library('fused', '/tmp/torch_extensions/fused', True) 23 | 24 | 25 | class FusedLeakyReLUFunctionBackward(Function): 26 | @staticmethod 27 | def forward(ctx, grad_output, out, negative_slope, scale): 28 | ctx.save_for_backward(out) 29 | ctx.negative_slope = negative_slope 30 | ctx.scale = scale 31 | 32 | empty = grad_output.new_empty(0) 33 | 34 | grad_input = fused.fused_bias_act( 35 | grad_output, empty, out, 3, 1, negative_slope, scale 36 | ) 37 | 38 | dim = [0] 39 | 40 | if grad_input.ndim > 2: 41 | dim += list(range(2, grad_input.ndim)) 42 | 43 | grad_bias = grad_input.sum(dim).detach() 44 | 45 | return grad_input, grad_bias 46 | 47 | @staticmethod 48 | def backward(ctx, gradgrad_input, gradgrad_bias): 49 | out, = ctx.saved_tensors 50 | gradgrad_out = fused.fused_bias_act( 51 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 52 | ) 53 | 54 | return gradgrad_out, None, None, None 55 | 56 | 57 | class FusedLeakyReLUFunction(Function): 58 | @staticmethod 59 | def forward(ctx, input, bias, negative_slope, scale): 60 | empty = input.new_empty(0) 61 | out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale) 62 | ctx.save_for_backward(out) 63 | ctx.negative_slope = negative_slope 64 | ctx.scale = scale 65 | 66 | return out 67 | 68 | @staticmethod 69 | def backward(ctx, grad_output): 70 | out, = ctx.saved_tensors 71 | 72 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 73 | grad_output, out, ctx.negative_slope, ctx.scale 74 | ) 75 | 76 | return grad_input, grad_bias, None, None 77 | 78 | 79 | class FusedLeakyReLU(nn.Module): 80 | def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5, device='cpu'): 81 | super().__init__() 82 | 83 | self.bias = nn.Parameter(torch.zeros(channel)) 84 | self.negative_slope = negative_slope 85 | self.scale = scale 86 | self.device = device 87 | 88 | def forward(self, input): 89 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale, self.device) 90 | 91 | 92 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5, device='cpu'): 93 | if platform.system() == 'Linux' and torch.cuda.is_available() and device != 'cpu': 94 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 95 | else: 96 | return scale * F.leaky_relu(input + bias.view((1, -1)+(1,)*(len(input.shape)-2)), negative_slope=negative_slope) 97 | -------------------------------------------------------------------------------- /third_part/GPEN/face_model/op/fused_bias_act.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, 5 | int act, int grad, float alpha, float scale); 6 | 7 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 8 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 9 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 10 | 11 | torch::Tensor fused_bias_act(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, 12 | int act, int grad, float alpha, float scale) { 13 | CHECK_CUDA(input); 14 | CHECK_CUDA(bias); 15 | 16 | return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale); 17 | } 18 | 19 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 20 | m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)"); 21 | } -------------------------------------------------------------------------------- /third_part/GPEN/face_model/op/fused_bias_act_kernel.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. 2 | // 3 | // This work is made available under the Nvidia Source Code License-NC. 4 | // To view a copy of this license, visit 5 | // https://nvlabs.github.io/stylegan2/license.html 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | 18 | template 19 | static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref, 20 | int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) { 21 | int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x; 22 | 23 | scalar_t zero = 0.0; 24 | 25 | for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) { 26 | scalar_t x = p_x[xi]; 27 | 28 | if (use_bias) { 29 | x += p_b[(xi / step_b) % size_b]; 30 | } 31 | 32 | scalar_t ref = use_ref ? p_ref[xi] : zero; 33 | 34 | scalar_t y; 35 | 36 | switch (act * 10 + grad) { 37 | default: 38 | case 10: y = x; break; 39 | case 11: y = x; break; 40 | case 12: y = 0.0; break; 41 | 42 | case 30: y = (x > 0.0) ? x : x * alpha; break; 43 | case 31: y = (ref > 0.0) ? x : x * alpha; break; 44 | case 32: y = 0.0; break; 45 | } 46 | 47 | out[xi] = y * scale; 48 | } 49 | } 50 | 51 | 52 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer, 53 | int act, int grad, float alpha, float scale) { 54 | int curDevice = -1; 55 | cudaGetDevice(&curDevice); 56 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice); 57 | 58 | auto x = input.contiguous(); 59 | auto b = bias.contiguous(); 60 | auto ref = refer.contiguous(); 61 | 62 | int use_bias = b.numel() ? 1 : 0; 63 | int use_ref = ref.numel() ? 1 : 0; 64 | 65 | int size_x = x.numel(); 66 | int size_b = b.numel(); 67 | int step_b = 1; 68 | 69 | for (int i = 1 + 1; i < x.dim(); i++) { 70 | step_b *= x.size(i); 71 | } 72 | 73 | int loop_x = 4; 74 | int block_size = 4 * 32; 75 | int grid_size = (size_x - 1) / (loop_x * block_size) + 1; 76 | 77 | auto y = torch::empty_like(x); 78 | 79 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] { 80 | fused_bias_act_kernel<<>>( 81 | y.data_ptr(), 82 | x.data_ptr(), 83 | b.data_ptr(), 84 | ref.data_ptr(), 85 | act, 86 | grad, 87 | alpha, 88 | scale, 89 | loop_x, 90 | size_x, 91 | step_b, 92 | size_b, 93 | use_bias, 94 | use_ref 95 | ); 96 | }); 97 | 98 | return y; 99 | } -------------------------------------------------------------------------------- /third_part/GPEN/face_model/op/upfirdn2d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel, 5 | int up_x, int up_y, int down_x, int down_y, 6 | int pad_x0, int pad_x1, int pad_y0, int pad_y1); 7 | 8 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 9 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 10 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 11 | 12 | torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel, 13 | int up_x, int up_y, int down_x, int down_y, 14 | int pad_x0, int pad_x1, int pad_y0, int pad_y1) { 15 | CHECK_CUDA(input); 16 | CHECK_CUDA(kernel); 17 | 18 | return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1); 19 | } 20 | 21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 22 | m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)"); 23 | } -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | MANIFEST 4 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/facemorpher/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Face Morpher module init code 3 | """ 4 | from .morpher import morpher, list_imgpaths 5 | from .averager import averager 6 | 7 | __all__ = ['list_imgpaths', 8 | 'morpher', 9 | 'averager'] 10 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/facemorpher/aligner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Align face and image sizes 3 | """ 4 | import cv2 5 | import numpy as np 6 | 7 | def positive_cap(num): 8 | """ Cap a number to ensure positivity 9 | 10 | :param num: positive or negative number 11 | :returns: (overflow, capped_number) 12 | """ 13 | if num < 0: 14 | return 0, abs(num) 15 | else: 16 | return num, 0 17 | 18 | def roi_coordinates(rect, size, scale): 19 | """ Align the rectangle into the center and return the top-left coordinates 20 | within the new size. If rect is smaller, we add borders. 21 | 22 | :param rect: (x, y, w, h) bounding rectangle of the face 23 | :param size: (width, height) are the desired dimensions 24 | :param scale: scaling factor of the rectangle to be resized 25 | :returns: 4 numbers. Top-left coordinates of the aligned ROI. 26 | (x, y, border_x, border_y). All values are > 0. 27 | """ 28 | rectx, recty, rectw, recth = rect 29 | new_height, new_width = size 30 | mid_x = int((rectx + rectw/2) * scale) 31 | mid_y = int((recty + recth/2) * scale) 32 | roi_x = mid_x - int(new_width/2) 33 | roi_y = mid_y - int(new_height/2) 34 | 35 | roi_x, border_x = positive_cap(roi_x) 36 | roi_y, border_y = positive_cap(roi_y) 37 | return roi_x, roi_y, border_x, border_y 38 | 39 | def scaling_factor(rect, size): 40 | """ Calculate the scaling factor for the current image to be 41 | resized to the new dimensions 42 | 43 | :param rect: (x, y, w, h) bounding rectangle of the face 44 | :param size: (width, height) are the desired dimensions 45 | :returns: floating point scaling factor 46 | """ 47 | new_height, new_width = size 48 | rect_h, rect_w = rect[2:] 49 | height_ratio = rect_h / new_height 50 | width_ratio = rect_w / new_width 51 | scale = 1 52 | if height_ratio > width_ratio: 53 | new_recth = 0.8 * new_height 54 | scale = new_recth / rect_h 55 | else: 56 | new_rectw = 0.8 * new_width 57 | scale = new_rectw / rect_w 58 | return scale 59 | 60 | def resize_image(img, scale): 61 | """ Resize image with the provided scaling factor 62 | 63 | :param img: image to be resized 64 | :param scale: scaling factor for resizing the image 65 | """ 66 | cur_height, cur_width = img.shape[:2] 67 | new_scaled_height = int(scale * cur_height) 68 | new_scaled_width = int(scale * cur_width) 69 | 70 | return cv2.resize(img, (new_scaled_width, new_scaled_height)) 71 | 72 | def resize_align(img, points, size): 73 | """ Resize image and associated points, align face to the center 74 | and crop to the desired size 75 | 76 | :param img: image to be resized 77 | :param points: *m* x 2 array of points 78 | :param size: (height, width) tuple of new desired size 79 | """ 80 | new_height, new_width = size 81 | 82 | # Resize image based on bounding rectangle 83 | rect = cv2.boundingRect(np.array([points], np.int32)) 84 | scale = scaling_factor(rect, size) 85 | img = resize_image(img, scale) 86 | 87 | # Align bounding rect to center 88 | cur_height, cur_width = img.shape[:2] 89 | roi_x, roi_y, border_x, border_y = roi_coordinates(rect, size, scale) 90 | roi_h = np.min([new_height-border_y, cur_height-roi_y]) 91 | roi_w = np.min([new_width-border_x, cur_width-roi_x]) 92 | 93 | # Crop to supplied size 94 | crop = np.zeros((new_height, new_width, 3), img.dtype) 95 | crop[border_y:border_y+roi_h, border_x:border_x+roi_w] = ( 96 | img[roi_y:roi_y+roi_h, roi_x:roi_x+roi_w]) 97 | 98 | # Scale and align face points to the crop 99 | points[:, 0] = (points[:, 0] * scale) + (border_x - roi_x) 100 | points[:, 1] = (points[:, 1] * scale) + (border_y - roi_y) 101 | 102 | return (crop, points) 103 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/facemorpher/averager.py: -------------------------------------------------------------------------------- 1 | """ 2 | :: 3 | 4 | Face averager 5 | 6 | Usage: 7 | averager.py --images= [--blur] [--plot] 8 | [--background=(black|transparent|average)] 9 | [--width=] [--height=] 10 | [--out=] [--destimg=] 11 | 12 | Options: 13 | -h, --help Show this screen. 14 | --images= Folder to images (.jpg, .jpeg, .png) 15 | --blur Flag to blur edges of image [default: False] 16 | --width= Custom width of the images/video [default: 500] 17 | --height= Custom height of the images/video [default: 600] 18 | --out= Filename to save the average face [default: result.png] 19 | --destimg= Destination face image to overlay average face 20 | --plot Flag to display the average face [default: False] 21 | --background= Background of image to be one of (black|transparent|average) [default: black] 22 | --version Show version. 23 | """ 24 | 25 | from docopt import docopt 26 | import os 27 | import cv2 28 | import numpy as np 29 | import matplotlib.pyplot as plt 30 | import matplotlib.image as mpimg 31 | 32 | from facemorpher import locator 33 | from facemorpher import aligner 34 | from facemorpher import warper 35 | from facemorpher import blender 36 | from facemorpher import plotter 37 | 38 | def list_imgpaths(imgfolder): 39 | for fname in os.listdir(imgfolder): 40 | if (fname.lower().endswith('.jpg') or 41 | fname.lower().endswith('.png') or 42 | fname.lower().endswith('.jpeg')): 43 | yield os.path.join(imgfolder, fname) 44 | 45 | def sharpen(img): 46 | blured = cv2.GaussianBlur(img, (0, 0), 2.5) 47 | return cv2.addWeighted(img, 1.4, blured, -0.4, 0) 48 | 49 | def load_image_points(path, size): 50 | img = cv2.imread(path) 51 | points = locator.face_points(img) 52 | 53 | if len(points) == 0: 54 | print('No face in %s' % path) 55 | return None, None 56 | else: 57 | return aligner.resize_align(img, points, size) 58 | 59 | def averager(imgpaths, dest_filename=None, width=500, height=600, background='black', 60 | blur_edges=False, out_filename='result.png', plot=False): 61 | 62 | size = (height, width) 63 | 64 | images = [] 65 | point_set = [] 66 | for path in imgpaths: 67 | img, points = load_image_points(path, size) 68 | if img is not None: 69 | images.append(img) 70 | point_set.append(points) 71 | 72 | if len(images) == 0: 73 | raise FileNotFoundError('Could not find any valid images.' + 74 | ' Supported formats are .jpg, .png, .jpeg') 75 | 76 | if dest_filename is not None: 77 | dest_img, dest_points = load_image_points(dest_filename, size) 78 | if dest_img is None or dest_points is None: 79 | raise Exception('No face or detected face points in dest img: ' + dest_filename) 80 | else: 81 | dest_img = np.zeros(images[0].shape, np.uint8) 82 | dest_points = locator.average_points(point_set) 83 | 84 | num_images = len(images) 85 | result_images = np.zeros(images[0].shape, np.float32) 86 | for i in range(num_images): 87 | result_images += warper.warp_image(images[i], point_set[i], 88 | dest_points, size, np.float32) 89 | 90 | result_image = np.uint8(result_images / num_images) 91 | face_indexes = np.nonzero(result_image) 92 | dest_img[face_indexes] = result_image[face_indexes] 93 | 94 | mask = blender.mask_from_points(size, dest_points) 95 | if blur_edges: 96 | blur_radius = 10 97 | mask = cv2.blur(mask, (blur_radius, blur_radius)) 98 | 99 | if background in ('transparent', 'average'): 100 | dest_img = np.dstack((dest_img, mask)) 101 | 102 | if background == 'average': 103 | average_background = locator.average_points(images) 104 | dest_img = blender.overlay_image(dest_img, mask, average_background) 105 | 106 | print('Averaged {} images'.format(num_images)) 107 | plt = plotter.Plotter(plot, num_images=1, out_filename=out_filename) 108 | plt.save(dest_img) 109 | plt.plot_one(dest_img) 110 | plt.show() 111 | 112 | def main(): 113 | args = docopt(__doc__, version='Face Averager 1.0') 114 | try: 115 | averager(list_imgpaths(args['--images']), args['--destimg'], 116 | int(args['--width']), int(args['--height']), 117 | args['--background'], args['--blur'], args['--out'], args['--plot']) 118 | except Exception as e: 119 | print(e) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() 124 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/facemorpher/locator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Locate face points 3 | """ 4 | 5 | import cv2 6 | import numpy as np 7 | import os.path as path 8 | import dlib 9 | import os 10 | 11 | 12 | DATA_DIR = os.environ.get( 13 | 'DLIB_DATA_DIR', 14 | path.join(path.dirname(path.dirname(path.realpath(__file__))), 'data') 15 | ) 16 | dlib_detector = dlib.get_frontal_face_detector() 17 | dlib_predictor = dlib.shape_predictor(path.join(DATA_DIR, 'shape_predictor_68_face_landmarks.dat')) 18 | 19 | def boundary_points(points, width_percent=0.1, height_percent=0.1): 20 | """ Produce additional boundary points 21 | :param points: *m* x 2 array of x,y points 22 | :param width_percent: [-1, 1] percentage of width to taper inwards. Negative for opposite direction 23 | :param height_percent: [-1, 1] percentage of height to taper downwards. Negative for opposite direction 24 | :returns: 2 additional points at the top corners 25 | """ 26 | x, y, w, h = cv2.boundingRect(np.array([points], np.int32)) 27 | spacerw = int(w * width_percent) 28 | spacerh = int(h * height_percent) 29 | return [[x+spacerw, y+spacerh], 30 | [x+w-spacerw, y+spacerh]] 31 | 32 | 33 | def face_points(img, add_boundary_points=True): 34 | return face_points_dlib(img, add_boundary_points) 35 | 36 | def face_points_dlib(img, add_boundary_points=True): 37 | """ Locates 68 face points using dlib (http://dlib.net) 38 | Requires shape_predictor_68_face_landmarks.dat to be in face_morpher/data 39 | Download at: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 40 | :param img: an image array 41 | :param add_boundary_points: bool to add additional boundary points 42 | :returns: Array of x,y face points. Empty array if no face found 43 | """ 44 | try: 45 | points = [] 46 | rgbimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 47 | rects = dlib_detector(rgbimg, 1) 48 | 49 | if rects and len(rects) > 0: 50 | # We only take the first found face 51 | shapes = dlib_predictor(rgbimg, rects[0]) 52 | points = np.array([(shapes.part(i).x, shapes.part(i).y) for i in range(68)], np.int32) 53 | 54 | if add_boundary_points: 55 | # Add more points inwards and upwards as dlib only detects up to eyebrows 56 | points = np.vstack([ 57 | points, 58 | boundary_points(points, 0.1, -0.03), 59 | boundary_points(points, 0.13, -0.05), 60 | boundary_points(points, 0.15, -0.08), 61 | boundary_points(points, 0.33, -0.12)]) 62 | 63 | return points 64 | except Exception as e: 65 | print(e) 66 | return [] 67 | 68 | def face_points_stasm(img, add_boundary_points=True): 69 | import stasm 70 | """ Locates 77 face points using stasm (http://www.milbo.users.sonic.net/stasm) 71 | 72 | :param img: an image array 73 | :param add_boundary_points: bool to add 2 additional points 74 | :returns: Array of x,y face points. Empty array if no face found 75 | """ 76 | try: 77 | points = stasm.search_single(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)) 78 | except Exception as e: 79 | print('Failed finding face points: ', e) 80 | return [] 81 | 82 | points = points.astype(np.int32) 83 | if len(points) == 0: 84 | return points 85 | 86 | if add_boundary_points: 87 | return np.vstack([points, boundary_points(points)]) 88 | 89 | return points 90 | 91 | def average_points(point_set): 92 | """ Averages a set of face points from images 93 | 94 | :param point_set: *n* x *m* x 2 array of face points. \\ 95 | *n* = number of images. *m* = number of face points per image 96 | """ 97 | return np.mean(point_set, 0).astype(np.int32) 98 | 99 | def weighted_average_points(start_points, end_points, percent=0.5): 100 | """ Weighted average of two sets of supplied points 101 | 102 | :param start_points: *m* x 2 array of start face points. 103 | :param end_points: *m* x 2 array of end face points. 104 | :param percent: [0, 1] percentage weight on start_points 105 | :returns: *m* x 2 array of weighted average points 106 | """ 107 | if percent <= 0: 108 | return end_points 109 | elif percent >= 1: 110 | return start_points 111 | else: 112 | return np.asarray(start_points*percent + end_points*(1-percent), np.int32) 113 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/facemorpher/plotter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plot and save images 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import matplotlib.image as mpimg 7 | import os.path 8 | import numpy as np 9 | import cv2 10 | 11 | def bgr2rgb(img): 12 | # OpenCV's BGR to RGB 13 | rgb = np.copy(img) 14 | rgb[..., 0], rgb[..., 2] = img[..., 2], img[..., 0] 15 | return rgb 16 | 17 | def check_do_plot(func): 18 | def inner(self, *args, **kwargs): 19 | if self.do_plot: 20 | func(self, *args, **kwargs) 21 | 22 | return inner 23 | 24 | def check_do_save(func): 25 | def inner(self, *args, **kwargs): 26 | if self.do_save: 27 | func(self, *args, **kwargs) 28 | 29 | return inner 30 | 31 | class Plotter(object): 32 | def __init__(self, plot=True, rows=0, cols=0, num_images=0, out_folder=None, out_filename=None): 33 | self.save_counter = 1 34 | self.plot_counter = 1 35 | self.do_plot = plot 36 | self.do_save = out_filename is not None 37 | self.out_filename = out_filename 38 | self.set_filepath(out_folder) 39 | 40 | if (rows + cols) == 0 and num_images > 0: 41 | # Auto-calculate the number of rows and cols for the figure 42 | self.rows = np.ceil(np.sqrt(num_images / 2.0)) 43 | self.cols = np.ceil(num_images / self.rows) 44 | else: 45 | self.rows = rows 46 | self.cols = cols 47 | 48 | def set_filepath(self, folder): 49 | if folder is None: 50 | self.filepath = None 51 | return 52 | 53 | if not os.path.exists(folder): 54 | os.makedirs(folder) 55 | self.filepath = os.path.join(folder, 'frame{0:03d}.png') 56 | self.do_save = True 57 | 58 | @check_do_save 59 | def save(self, img, filename=None): 60 | if self.filepath: 61 | filename = self.filepath.format(self.save_counter) 62 | self.save_counter += 1 63 | elif filename is None: 64 | filename = self.out_filename 65 | 66 | mpimg.imsave(filename, bgr2rgb(img)) 67 | print(filename + ' saved') 68 | 69 | @check_do_plot 70 | def plot_one(self, img): 71 | p = plt.subplot(self.rows, self.cols, self.plot_counter) 72 | p.axes.get_xaxis().set_visible(False) 73 | p.axes.get_yaxis().set_visible(False) 74 | plt.imshow(bgr2rgb(img)) 75 | self.plot_counter += 1 76 | 77 | @check_do_plot 78 | def show(self): 79 | plt.gcf().subplots_adjust(hspace=0.05, wspace=0, 80 | left=0, bottom=0, right=1, top=0.98) 81 | plt.axis('off') 82 | #plt.show() 83 | plt.savefig('result.png') 84 | 85 | @check_do_plot 86 | def plot_mesh(self, points, tri, color='k'): 87 | """ plot triangles """ 88 | for tri_indices in tri.simplices: 89 | t_ext = [tri_indices[0], tri_indices[1], tri_indices[2], tri_indices[0]] 90 | plt.plot(points[t_ext, 0], points[t_ext, 1], color) 91 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/facemorpher/videoer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create a video with image frames 3 | """ 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | def check_write_video(func): 10 | def inner(self, *args, **kwargs): 11 | if self.video: 12 | return func(self, *args, **kwargs) 13 | else: 14 | pass 15 | return inner 16 | 17 | 18 | class Video(object): 19 | def __init__(self, filename, fps, w, h): 20 | self.filename = filename 21 | 22 | if filename is None: 23 | self.video = None 24 | else: 25 | fourcc = cv2.VideoWriter_fourcc(*'MJPG') 26 | self.video = cv2.VideoWriter(filename, fourcc, fps, (w, h), True) 27 | 28 | @check_write_video 29 | def write(self, img, num_times=1): 30 | for i in range(num_times): 31 | self.video.write(img[..., :3]) 32 | 33 | @check_write_video 34 | def end(self): 35 | print(self.filename + ' saved') 36 | self.video.release() 37 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | docopt 5 | dlib 6 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/scripts/make_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf docs 4 | # reStructuredText in python files to rst. Documentation in docs folder 5 | sphinx-apidoc -A "Alyssa Quek" -f -F -o docs facemorpher/ 6 | 7 | cd docs 8 | 9 | # Append module path to end of conf file 10 | echo "" >> conf.py 11 | echo "import os" >> conf.py 12 | echo "import sys" >> conf.py 13 | echo "sys.path.insert(0, os.path.abspath('../'))" >> conf.py 14 | echo "sys.path.insert(0, os.path.abspath('../facemorpher'))" >> conf.py 15 | 16 | # Make sphinx documentation 17 | make html 18 | cd .. 19 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/scripts/publish_ghpages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # delete previous gh-pages 4 | git branch -D gh-pages 5 | git push origin :gh-pages 6 | 7 | git checkout -b gh-pages 8 | git rebase master 9 | git reset HEAD 10 | 11 | # make docs 12 | ./scripts/make_docs.sh 13 | 14 | # Add docs 15 | mv docs/_build/html/*.html . 16 | git add *.html 17 | mv docs/_build/html/*.js . 18 | git add *.js 19 | mv docs/_build/html/_static/ _static 20 | git add _static 21 | 22 | touch .nojekyll 23 | git add .nojekyll 24 | 25 | # Publish to gh-pages 26 | git commit -m "docs" 27 | git push origin gh-pages 28 | 29 | git checkout master 30 | -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/setup.cfg: -------------------------------------------------------------------------------- 1 | [pep8] 2 | ignore = E111,E114,E226,E302,E41,E121,E701 3 | max-line-length = 100 4 | 5 | [flake8] 6 | ignore = E111,E114,E226,E302,E41,E121,E701 7 | max-line-length = 100 -------------------------------------------------------------------------------- /third_part/GPEN/face_morpher/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # To test locally: python setup.py sdist bdist_wheel 4 | # To upload to pypi: twine upload dist/* 5 | 6 | setup( 7 | name='facemorpher', 8 | version='5.2.dev0', 9 | author='Alyssa Quek', 10 | author_email='alyssaquek@gmail.com', 11 | description=('Warp, morph and average human faces!'), 12 | keywords='face morphing, averaging, warping', 13 | url='https://github.com/alyssaq/face_morpher', 14 | license='MIT', 15 | packages=find_packages(), 16 | install_requires=[ 17 | 'docopt', 18 | 'numpy', 19 | 'scipy', 20 | 'matplotlib', 21 | 'dlib' 22 | ], 23 | entry_points={'console_scripts': [ 24 | 'facemorpher=facemorpher.morpher:main', 25 | 'faceaverager=facemorpher.averager:main' 26 | ] 27 | }, 28 | data_files=[('readme', ['README.rst'])], 29 | long_description=open('README.rst').read(), 30 | ) 31 | -------------------------------------------------------------------------------- /third_part/GPEN/face_parse/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/GPEN/face_parse/mask.png -------------------------------------------------------------------------------- /third_part/GPEN/face_parse/parse_model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Created by chaofengc (chaofenghust@gmail.com) 3 | 4 | @Modified by yangxy (yangtao9009@gmail.com) 5 | ''' 6 | 7 | from face_parse.blocks import * 8 | import torch 9 | from torch import nn 10 | import numpy as np 11 | 12 | def define_P(in_size=512, out_size=512, min_feat_size=32, relu_type='LeakyReLU', isTrain=False, weight_path=None): 13 | net = ParseNet(in_size, out_size, min_feat_size, 64, 19, norm_type='bn', relu_type=relu_type, ch_range=[32, 256]) 14 | if not isTrain: 15 | net.eval() 16 | if weight_path is not None: 17 | net.load_state_dict(torch.load(weight_path)) 18 | return net 19 | 20 | 21 | class ParseNet(nn.Module): 22 | def __init__(self, 23 | in_size=128, 24 | out_size=128, 25 | min_feat_size=32, 26 | base_ch=64, 27 | parsing_ch=19, 28 | res_depth=10, 29 | relu_type='prelu', 30 | norm_type='bn', 31 | ch_range=[32, 512], 32 | ): 33 | super().__init__() 34 | self.res_depth = res_depth 35 | act_args = {'norm_type': norm_type, 'relu_type': relu_type} 36 | min_ch, max_ch = ch_range 37 | 38 | ch_clip = lambda x: max(min_ch, min(x, max_ch)) 39 | min_feat_size = min(in_size, min_feat_size) 40 | 41 | down_steps = int(np.log2(in_size//min_feat_size)) 42 | up_steps = int(np.log2(out_size//min_feat_size)) 43 | 44 | # =============== define encoder-body-decoder ==================== 45 | self.encoder = [] 46 | self.encoder.append(ConvLayer(3, base_ch, 3, 1)) 47 | head_ch = base_ch 48 | for i in range(down_steps): 49 | cin, cout = ch_clip(head_ch), ch_clip(head_ch * 2) 50 | self.encoder.append(ResidualBlock(cin, cout, scale='down', **act_args)) 51 | head_ch = head_ch * 2 52 | 53 | self.body = [] 54 | for i in range(res_depth): 55 | self.body.append(ResidualBlock(ch_clip(head_ch), ch_clip(head_ch), **act_args)) 56 | 57 | self.decoder = [] 58 | for i in range(up_steps): 59 | cin, cout = ch_clip(head_ch), ch_clip(head_ch // 2) 60 | self.decoder.append(ResidualBlock(cin, cout, scale='up', **act_args)) 61 | head_ch = head_ch // 2 62 | 63 | self.encoder = nn.Sequential(*self.encoder) 64 | self.body = nn.Sequential(*self.body) 65 | self.decoder = nn.Sequential(*self.decoder) 66 | self.out_img_conv = ConvLayer(ch_clip(head_ch), 3) 67 | self.out_mask_conv = ConvLayer(ch_clip(head_ch), parsing_ch) 68 | 69 | def forward(self, x): 70 | feat = self.encoder(x) 71 | x = feat + self.body(feat) 72 | x = self.decoder(x) 73 | out_img = self.out_img_conv(x) 74 | out_mask = self.out_mask_conv(x) 75 | return out_mask, out_img 76 | 77 | 78 | -------------------------------------------------------------------------------- /third_part/GPEN/face_parse/resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.utils.model_zoo as modelzoo 8 | 9 | # from modules.bn import InPlaceABNSync as BatchNorm2d 10 | 11 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' 12 | 13 | 14 | def conv3x3(in_planes, out_planes, stride=1): 15 | """3x3 convolution with padding""" 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 17 | padding=1, bias=False) 18 | 19 | 20 | class BasicBlock(nn.Module): 21 | def __init__(self, in_chan, out_chan, stride=1): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(in_chan, out_chan, stride) 24 | self.bn1 = nn.BatchNorm2d(out_chan) 25 | self.conv2 = conv3x3(out_chan, out_chan) 26 | self.bn2 = nn.BatchNorm2d(out_chan) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.downsample = None 29 | if in_chan != out_chan or stride != 1: 30 | self.downsample = nn.Sequential( 31 | nn.Conv2d(in_chan, out_chan, 32 | kernel_size=1, stride=stride, bias=False), 33 | nn.BatchNorm2d(out_chan), 34 | ) 35 | 36 | def forward(self, x): 37 | residual = self.conv1(x) 38 | residual = F.relu(self.bn1(residual)) 39 | residual = self.conv2(residual) 40 | residual = self.bn2(residual) 41 | 42 | shortcut = x 43 | if self.downsample is not None: 44 | shortcut = self.downsample(x) 45 | 46 | out = shortcut + residual 47 | out = self.relu(out) 48 | return out 49 | 50 | 51 | def create_layer_basic(in_chan, out_chan, bnum, stride=1): 52 | layers = [BasicBlock(in_chan, out_chan, stride=stride)] 53 | for i in range(bnum-1): 54 | layers.append(BasicBlock(out_chan, out_chan, stride=1)) 55 | return nn.Sequential(*layers) 56 | 57 | 58 | class Resnet18(nn.Module): 59 | def __init__(self): 60 | super(Resnet18, self).__init__() 61 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 62 | bias=False) 63 | self.bn1 = nn.BatchNorm2d(64) 64 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 65 | self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) 66 | self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) 67 | self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) 68 | self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) 69 | self.init_weight() 70 | 71 | def forward(self, x): 72 | x = self.conv1(x) 73 | x = F.relu(self.bn1(x)) 74 | x = self.maxpool(x) 75 | 76 | x = self.layer1(x) 77 | feat8 = self.layer2(x) # 1/8 78 | feat16 = self.layer3(feat8) # 1/16 79 | feat32 = self.layer4(feat16) # 1/32 80 | return feat8, feat16, feat32 81 | 82 | def init_weight(self): 83 | state_dict = modelzoo.load_url(resnet18_url) 84 | self_state_dict = self.state_dict() 85 | for k, v in state_dict.items(): 86 | if 'fc' in k: continue 87 | self_state_dict.update({k: v}) 88 | self.load_state_dict(self_state_dict) 89 | 90 | def get_params(self): 91 | wd_params, nowd_params = [], [] 92 | for name, module in self.named_modules(): 93 | if isinstance(module, (nn.Linear, nn.Conv2d)): 94 | wd_params.append(module.weight) 95 | if not module.bias is None: 96 | nowd_params.append(module.bias) 97 | elif isinstance(module, nn.BatchNorm2d): 98 | nowd_params += list(module.parameters()) 99 | return wd_params, nowd_params 100 | 101 | 102 | if __name__ == "__main__": 103 | net = Resnet18() 104 | x = torch.randn(16, 3, 224, 224) 105 | out = net(x) 106 | print(out[0].size()) 107 | print(out[1].size()) 108 | print(out[2].size()) 109 | net.get_params() 110 | -------------------------------------------------------------------------------- /third_part/GPEN/face_parse/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/GPEN/face_parse/test.png -------------------------------------------------------------------------------- /third_part/face3d/checkpoints/model_name/test_opt.txt: -------------------------------------------------------------------------------- 1 | ----------------- Options --------------- 2 | add_image: True 3 | bfm_folder: BFM 4 | bfm_model: BFM_model_front.mat 5 | camera_d: 10.0 6 | center: 112.0 7 | checkpoints_dir: ./checkpoints 8 | dataset_mode: None 9 | ddp_port: 12355 10 | display_per_batch: True 11 | epoch: 20 [default: latest] 12 | eval_batch_nums: inf 13 | focal: 1015.0 14 | gpu_ids: 0 15 | inference_batch_size: 8 16 | init_path: checkpoints/init_model/resnet50-0676ba61.pth 17 | input_dir: demo_video [default: None] 18 | isTrain: False [default: None] 19 | keypoint_dir: demo_cctv [default: None] 20 | model: facerecon 21 | name: model_name [default: face_recon] 22 | net_recon: resnet50 23 | output_dir: demo_cctv [default: mp4] 24 | phase: test 25 | save_split_files: False 26 | suffix: 27 | use_ddp: False [default: True] 28 | use_last_fc: False 29 | verbose: False 30 | vis_batch_nums: 1 31 | world_size: 1 32 | z_far: 15.0 33 | z_near: 5.0 34 | ----------------- End ------------------- 35 | -------------------------------------------------------------------------------- /third_part/face3d/coeff_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import numpy as np 4 | from os import makedirs, name 5 | from PIL import Image 6 | from tqdm import tqdm 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | from face3d.options.inference_options import InferenceOptions 12 | from face3d.models import create_model 13 | from face3d.util.preprocess import align_img 14 | from face3d.util.load_mats import load_lm3d 15 | from face3d.extract_kp_videos import KeypointExtractor 16 | 17 | 18 | class CoeffDetector(nn.Module): 19 | def __init__(self, opt): 20 | super().__init__() 21 | 22 | self.model = create_model(opt) 23 | self.model.setup(opt) 24 | self.model.device = 'cuda' 25 | self.model.parallelize() 26 | self.model.eval() 27 | 28 | self.lm3d_std = load_lm3d(opt.bfm_folder) 29 | 30 | def forward(self, img, lm): 31 | 32 | img, trans_params = self.image_transform(img, lm) 33 | 34 | data_input = { 35 | 'imgs': img[None], 36 | } 37 | self.model.set_input(data_input) 38 | self.model.test() 39 | pred_coeff = {key:self.model.pred_coeffs_dict[key].cpu().numpy() for key in self.model.pred_coeffs_dict} 40 | pred_coeff = np.concatenate([ 41 | pred_coeff['id'], 42 | pred_coeff['exp'], 43 | pred_coeff['tex'], 44 | pred_coeff['angle'], 45 | pred_coeff['gamma'], 46 | pred_coeff['trans'], 47 | trans_params[None], 48 | ], 1) 49 | 50 | return {'coeff_3dmm':pred_coeff, 51 | 'crop_img': Image.fromarray((img.cpu().permute(1, 2, 0).numpy()*255).astype(np.uint8))} 52 | 53 | def image_transform(self, images, lm): 54 | """ 55 | param: 56 | images: -- PIL image 57 | lm: -- numpy array 58 | """ 59 | W,H = images.size 60 | if np.mean(lm) == -1: 61 | lm = (self.lm3d_std[:, :2]+1)/2. 62 | lm = np.concatenate( 63 | [lm[:, :1]*W, lm[:, 1:2]*H], 1 64 | ) 65 | else: 66 | lm[:, -1] = H - 1 - lm[:, -1] 67 | 68 | trans_params, img, lm, _ = align_img(images, lm, self.lm3d_std) 69 | img = torch.tensor(np.array(img)/255., dtype=torch.float32).permute(2, 0, 1) 70 | trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]) 71 | trans_params = torch.tensor(trans_params.astype(np.float32)) 72 | return img, trans_params 73 | 74 | def get_data_path(root, keypoint_root): 75 | filenames = list() 76 | keypoint_filenames = list() 77 | 78 | IMAGE_EXTENSIONS_LOWERCASE = {'jpg', 'png', 'jpeg', 'webp'} 79 | IMAGE_EXTENSIONS = IMAGE_EXTENSIONS_LOWERCASE.union({f.upper() for f in IMAGE_EXTENSIONS_LOWERCASE}) 80 | extensions = IMAGE_EXTENSIONS 81 | 82 | for ext in extensions: 83 | filenames += glob.glob(f'{root}/*.{ext}', recursive=True) 84 | filenames = sorted(filenames) 85 | for filename in filenames: 86 | name = os.path.splitext(os.path.basename(filename))[0] 87 | keypoint_filenames.append( 88 | os.path.join(keypoint_root, name + '.txt') 89 | ) 90 | return filenames, keypoint_filenames 91 | 92 | 93 | if __name__ == "__main__": 94 | opt = InferenceOptions().parse() 95 | coeff_detector = CoeffDetector(opt) 96 | kp_extractor = KeypointExtractor() 97 | image_names, keypoint_names = get_data_path(opt.input_dir, opt.keypoint_dir) 98 | makedirs(opt.keypoint_dir, exist_ok=True) 99 | makedirs(opt.output_dir, exist_ok=True) 100 | 101 | for image_name, keypoint_name in tqdm(zip(image_names, keypoint_names)): 102 | image = Image.open(image_name) 103 | if not os.path.isfile(keypoint_name): 104 | lm = kp_extractor.extract_keypoint(image, keypoint_name) 105 | else: 106 | lm = np.loadtxt(keypoint_name).astype(np.float32) 107 | lm = lm.reshape([-1, 2]) 108 | predicted = coeff_detector(image, lm) 109 | name = os.path.splitext(os.path.basename(image_name))[0] 110 | np.savetxt( 111 | "{}/{}_3dmm_coeff.txt".format(opt.output_dir, name), 112 | predicted['coeff_3dmm'].reshape(-1)) 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /third_part/face3d/data/flist_dataset.py: -------------------------------------------------------------------------------- 1 | """This script defines the custom dataset for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import os.path 5 | from data.base_dataset import BaseDataset, get_transform, get_affine_mat, apply_img_affine, apply_lm_affine 6 | from data.image_folder import make_dataset 7 | from PIL import Image 8 | import random 9 | import util.util as util 10 | import numpy as np 11 | import json 12 | import torch 13 | from scipy.io import loadmat, savemat 14 | import pickle 15 | from util.preprocess import align_img, estimate_norm 16 | from util.load_mats import load_lm3d 17 | 18 | 19 | def default_flist_reader(flist): 20 | """ 21 | flist format: impath label\nimpath label\n ...(same to caffe's filelist) 22 | """ 23 | imlist = [] 24 | with open(flist, 'r') as rf: 25 | for line in rf.readlines(): 26 | impath = line.strip() 27 | imlist.append(impath) 28 | 29 | return imlist 30 | 31 | def jason_flist_reader(flist): 32 | with open(flist, 'r') as fp: 33 | info = json.load(fp) 34 | return info 35 | 36 | def parse_label(label): 37 | return torch.tensor(np.array(label).astype(np.float32)) 38 | 39 | 40 | class FlistDataset(BaseDataset): 41 | """ 42 | It requires one directories to host training images '/path/to/data/train' 43 | You can train the model with the dataset flag '--dataroot /path/to/data'. 44 | """ 45 | 46 | def __init__(self, opt): 47 | """Initialize this dataset class. 48 | 49 | Parameters: 50 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 51 | """ 52 | BaseDataset.__init__(self, opt) 53 | 54 | self.lm3d_std = load_lm3d(opt.bfm_folder) 55 | 56 | msk_names = default_flist_reader(opt.flist) 57 | self.msk_paths = [os.path.join(opt.data_root, i) for i in msk_names] 58 | 59 | self.size = len(self.msk_paths) 60 | self.opt = opt 61 | 62 | self.name = 'train' if opt.isTrain else 'val' 63 | if '_' in opt.flist: 64 | self.name += '_' + opt.flist.split(os.sep)[-1].split('_')[0] 65 | 66 | 67 | def __getitem__(self, index): 68 | """Return a data point and its metadata information. 69 | 70 | Parameters: 71 | index (int) -- a random integer for data indexing 72 | 73 | Returns a dictionary that contains A, B, A_paths and B_paths 74 | img (tensor) -- an image in the input domain 75 | msk (tensor) -- its corresponding attention mask 76 | lm (tensor) -- its corresponding 3d landmarks 77 | im_paths (str) -- image paths 78 | aug_flag (bool) -- a flag used to tell whether its raw or augmented 79 | """ 80 | msk_path = self.msk_paths[index % self.size] # make sure index is within then range 81 | img_path = msk_path.replace('mask/', '') 82 | lm_path = '.'.join(msk_path.replace('mask', 'landmarks').split('.')[:-1]) + '.txt' 83 | 84 | raw_img = Image.open(img_path).convert('RGB') 85 | raw_msk = Image.open(msk_path).convert('RGB') 86 | raw_lm = np.loadtxt(lm_path).astype(np.float32) 87 | 88 | _, img, lm, msk = align_img(raw_img, raw_lm, self.lm3d_std, raw_msk) 89 | 90 | aug_flag = self.opt.use_aug and self.opt.isTrain 91 | if aug_flag: 92 | img, lm, msk = self._augmentation(img, lm, self.opt, msk) 93 | 94 | _, H = img.size 95 | M = estimate_norm(lm, H) 96 | transform = get_transform() 97 | img_tensor = transform(img) 98 | msk_tensor = transform(msk)[:1, ...] 99 | lm_tensor = parse_label(lm) 100 | M_tensor = parse_label(M) 101 | 102 | 103 | return {'imgs': img_tensor, 104 | 'lms': lm_tensor, 105 | 'msks': msk_tensor, 106 | 'M': M_tensor, 107 | 'im_paths': img_path, 108 | 'aug_flag': aug_flag, 109 | 'dataset': self.name} 110 | 111 | def _augmentation(self, img, lm, opt, msk=None): 112 | affine, affine_inv, flip = get_affine_mat(opt, img.size) 113 | img = apply_img_affine(img, affine_inv) 114 | lm = apply_lm_affine(lm, affine, flip, img.size) 115 | if msk is not None: 116 | msk = apply_img_affine(msk, affine_inv, method=Image.BILINEAR) 117 | return img, lm, msk 118 | 119 | 120 | 121 | 122 | def __len__(self): 123 | """Return the total number of images in the dataset. 124 | """ 125 | return self.size 126 | -------------------------------------------------------------------------------- /third_part/face3d/data/image_folder.py: -------------------------------------------------------------------------------- 1 | """A modified image folder class 2 | 3 | We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py) 4 | so that this class can load images from both current directory and its subdirectories. 5 | """ 6 | import numpy as np 7 | import torch.utils.data as data 8 | 9 | from PIL import Image 10 | import os 11 | import os.path 12 | 13 | IMG_EXTENSIONS = [ 14 | '.jpg', '.JPG', '.jpeg', '.JPEG', 15 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 16 | '.tif', '.TIF', '.tiff', '.TIFF', 17 | ] 18 | 19 | 20 | def is_image_file(filename): 21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 22 | 23 | 24 | def make_dataset(dir, max_dataset_size=float("inf")): 25 | images = [] 26 | assert os.path.isdir(dir) or os.path.islink(dir), '%s is not a valid directory' % dir 27 | 28 | for root, _, fnames in sorted(os.walk(dir, followlinks=True)): 29 | for fname in fnames: 30 | if is_image_file(fname): 31 | path = os.path.join(root, fname) 32 | images.append(path) 33 | return images[:min(max_dataset_size, len(images))] 34 | 35 | 36 | def default_loader(path): 37 | return Image.open(path).convert('RGB') 38 | 39 | 40 | class ImageFolder(data.Dataset): 41 | 42 | def __init__(self, root, transform=None, return_paths=False, 43 | loader=default_loader): 44 | imgs = make_dataset(root) 45 | if len(imgs) == 0: 46 | raise(RuntimeError("Found 0 images in: " + root + "\n" 47 | "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) 48 | 49 | self.root = root 50 | self.imgs = imgs 51 | self.transform = transform 52 | self.return_paths = return_paths 53 | self.loader = loader 54 | 55 | def __getitem__(self, index): 56 | path = self.imgs[index] 57 | img = self.loader(path) 58 | if self.transform is not None: 59 | img = self.transform(img) 60 | if self.return_paths: 61 | return img, path 62 | else: 63 | return img 64 | 65 | def __len__(self): 66 | return len(self.imgs) 67 | -------------------------------------------------------------------------------- /third_part/face3d/data/template_dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class template 2 | 3 | This module provides a template for users to implement custom datasets. 4 | You can specify '--dataset_mode template' to use this dataset. 5 | The class name should be consistent with both the filename and its dataset_mode option. 6 | The filename should be _dataset.py 7 | The class name should be Dataset.py 8 | You need to implement the following functions: 9 | -- : Add dataset-specific options and rewrite default values for existing options. 10 | -- <__init__>: Initialize this dataset class. 11 | -- <__getitem__>: Return a data point and its metadata information. 12 | -- <__len__>: Return the number of images. 13 | """ 14 | from data.base_dataset import BaseDataset, get_transform 15 | # from data.image_folder import make_dataset 16 | # from PIL import Image 17 | 18 | 19 | class TemplateDataset(BaseDataset): 20 | """A template dataset class for you to implement custom datasets.""" 21 | @staticmethod 22 | def modify_commandline_options(parser, is_train): 23 | """Add new dataset-specific options, and rewrite default values for existing options. 24 | 25 | Parameters: 26 | parser -- original option parser 27 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 28 | 29 | Returns: 30 | the modified parser. 31 | """ 32 | parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option') 33 | parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0) # specify dataset-specific default values 34 | return parser 35 | 36 | def __init__(self, opt): 37 | """Initialize this dataset class. 38 | 39 | Parameters: 40 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 41 | 42 | A few things can be done here. 43 | - save the options (have been done in BaseDataset) 44 | - get image paths and meta information of the dataset. 45 | - define the image transformation. 46 | """ 47 | # save the option and dataset root 48 | BaseDataset.__init__(self, opt) 49 | # get the image paths of your dataset; 50 | self.image_paths = [] # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root 51 | # define the default transform function. You can use ; You can also define your custom transform function 52 | self.transform = get_transform(opt) 53 | 54 | def __getitem__(self, index): 55 | """Return a data point and its metadata information. 56 | 57 | Parameters: 58 | index -- a random integer for data indexing 59 | 60 | Returns: 61 | a dictionary of data with their names. It usually contains the data itself and its metadata information. 62 | 63 | Step 1: get a random image path: e.g., path = self.image_paths[index] 64 | Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB'). 65 | Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image) 66 | Step 4: return a data point as a dictionary. 67 | """ 68 | path = 'temp' # needs to be a string 69 | data_A = None # needs to be a tensor 70 | data_B = None # needs to be a tensor 71 | return {'data_A': data_A, 'data_B': data_B, 'path': path} 72 | 73 | def __len__(self): 74 | """Return the total number of images.""" 75 | return len(self.image_paths) 76 | -------------------------------------------------------------------------------- /third_part/face3d/data_preparation.py: -------------------------------------------------------------------------------- 1 | """This script is the data preparation script for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import os 5 | import numpy as np 6 | import argparse 7 | from util.detect_lm68 import detect_68p,load_lm_graph 8 | from util.skin_mask import get_skin_mask 9 | from util.generate_list import check_list, write_list 10 | import warnings 11 | warnings.filterwarnings("ignore") 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--data_root', type=str, default='datasets', help='root directory for training data') 15 | parser.add_argument('--img_folder', nargs="+", required=True, help='folders of training images') 16 | parser.add_argument('--mode', type=str, default='train', help='train or val') 17 | opt = parser.parse_args() 18 | 19 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 20 | 21 | def data_prepare(folder_list,mode): 22 | 23 | lm_sess,input_op,output_op = load_lm_graph('./checkpoints/lm_model/68lm_detector.pb') # load a tensorflow version 68-landmark detector 24 | 25 | for img_folder in folder_list: 26 | detect_68p(img_folder,lm_sess,input_op,output_op) # detect landmarks for images 27 | get_skin_mask(img_folder) # generate skin attention mask for images 28 | 29 | # create files that record path to all training data 30 | msks_list = [] 31 | for img_folder in folder_list: 32 | path = os.path.join(img_folder, 'mask') 33 | msks_list += ['/'.join([img_folder, 'mask', i]) for i in sorted(os.listdir(path)) if 'jpg' in i or 34 | 'png' in i or 'jpeg' in i or 'PNG' in i] 35 | 36 | imgs_list = [i.replace('mask/', '') for i in msks_list] 37 | lms_list = [i.replace('mask', 'landmarks') for i in msks_list] 38 | lms_list = ['.'.join(i.split('.')[:-1]) + '.txt' for i in lms_list] 39 | 40 | lms_list_final, imgs_list_final, msks_list_final = check_list(lms_list, imgs_list, msks_list) # check if the path is valid 41 | write_list(lms_list_final, imgs_list_final, msks_list_final, mode=mode) # save files 42 | 43 | if __name__ == '__main__': 44 | print('Datasets:',opt.img_folder) 45 | data_prepare([os.path.join(opt.data_root,folder) for folder in opt.img_folder],opt.mode) 46 | -------------------------------------------------------------------------------- /third_part/face3d/extract_kp_videos.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import glob 5 | import argparse 6 | import face_alignment 7 | import numpy as np 8 | from PIL import Image 9 | import torch 10 | from tqdm import tqdm 11 | from itertools import cycle 12 | 13 | from torch.multiprocessing import Pool, Process, set_start_method 14 | 15 | class KeypointExtractor(): 16 | def __init__(self): 17 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 18 | self.detector = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device) 19 | 20 | def extract_keypoint(self, images, name=None, info=True): 21 | if isinstance(images, list): 22 | keypoints = [] 23 | if info: 24 | i_range = tqdm(images,desc='landmark Det:') 25 | else: 26 | i_range = images 27 | 28 | for image in i_range: 29 | current_kp = self.extract_keypoint(image) 30 | if np.mean(current_kp) == -1 and keypoints: 31 | keypoints.append(keypoints[-1]) 32 | else: 33 | keypoints.append(current_kp[None]) 34 | 35 | keypoints = np.concatenate(keypoints, 0) 36 | np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1)) 37 | return keypoints 38 | else: 39 | while True: 40 | try: 41 | keypoints = self.detector.get_landmarks_from_image(np.array(images))[0] 42 | break 43 | except RuntimeError as e: 44 | if str(e).startswith('CUDA'): 45 | print("Warning: out of memory, sleep for 1s") 46 | time.sleep(1) 47 | else: 48 | print(e) 49 | break 50 | except TypeError: 51 | print('No face detected in this image') 52 | shape = [68, 2] 53 | keypoints = -1. * np.ones(shape) 54 | break 55 | if name is not None: 56 | np.savetxt(os.path.splitext(name)[0]+'.txt', keypoints.reshape(-1)) 57 | return keypoints 58 | 59 | def read_video(filename): 60 | frames = [] 61 | cap = cv2.VideoCapture(filename) 62 | while cap.isOpened(): 63 | ret, frame = cap.read() 64 | if ret: 65 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 66 | frame = Image.fromarray(frame) 67 | frames.append(frame) 68 | else: 69 | break 70 | cap.release() 71 | return frames 72 | 73 | def run(data): 74 | filename, opt, device = data 75 | os.environ['CUDA_VISIBLE_DEVICES'] = device 76 | kp_extractor = KeypointExtractor() 77 | images = read_video(filename) 78 | name = filename.split('/')[-2:] 79 | os.makedirs(os.path.join(opt.output_dir, name[-2]), exist_ok=True) 80 | kp_extractor.extract_keypoint( 81 | images, 82 | name=os.path.join(opt.output_dir, name[-2], name[-1]) 83 | ) 84 | 85 | if __name__ == '__main__': 86 | set_start_method('spawn') 87 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 88 | parser.add_argument('--input_dir', type=str, help='the folder of the input files') 89 | parser.add_argument('--output_dir', type=str, help='the folder of the output files') 90 | parser.add_argument('--device_ids', type=str, default='0,1') 91 | parser.add_argument('--workers', type=int, default=4) 92 | 93 | opt = parser.parse_args() 94 | filenames = list() 95 | VIDEO_EXTENSIONS_LOWERCASE = {'mp4'} 96 | VIDEO_EXTENSIONS = VIDEO_EXTENSIONS_LOWERCASE.union({f.upper() for f in VIDEO_EXTENSIONS_LOWERCASE}) 97 | extensions = VIDEO_EXTENSIONS 98 | 99 | for ext in extensions: 100 | os.listdir(f'{opt.input_dir}') 101 | print(f'{opt.input_dir}/*.{ext}') 102 | filenames = sorted(glob.glob(f'{opt.input_dir}/*.{ext}')) 103 | print('Total number of videos:', len(filenames)) 104 | pool = Pool(opt.workers) 105 | args_list = cycle([opt]) 106 | device_ids = opt.device_ids.split(",") 107 | device_ids = cycle(device_ids) 108 | for data in tqdm(pool.imap_unordered(run, zip(filenames, args_list, device_ids))): 109 | None 110 | -------------------------------------------------------------------------------- /third_part/face3d/models/__init__.py: -------------------------------------------------------------------------------- 1 | """This package contains modules related to objective functions, optimizations, and network architectures. 2 | 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. 4 | You need to implement the following five functions: 5 | -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). 6 | -- : unpack data from dataset and apply preprocessing. 7 | -- : produce intermediate results. 8 | -- : calculate loss, gradients, and update network weights. 9 | -- : (optionally) add model-specific options and set default options. 10 | 11 | In the function <__init__>, you need to define four lists: 12 | -- self.loss_names (str list): specify the training losses that you want to plot and save. 13 | -- self.model_names (str list): define networks used in our training. 14 | -- self.visual_names (str list): specify the images that you want to display and save. 15 | -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. 16 | 17 | Now you can use the model class by specifying flag '--model dummy'. 18 | See our template model class 'template_model.py' for more details. 19 | """ 20 | 21 | import importlib 22 | from face3d.models.base_model import BaseModel 23 | 24 | 25 | def find_model_using_name(model_name): 26 | """Import the module "models/[model_name]_model.py". 27 | 28 | In the file, the class called DatasetNameModel() will 29 | be instantiated. It has to be a subclass of BaseModel, 30 | and it is case-insensitive. 31 | """ 32 | model_filename = "face3d.models." + model_name + "_model" 33 | modellib = importlib.import_module(model_filename) 34 | model = None 35 | target_model_name = model_name.replace('_', '') + 'model' 36 | for name, cls in modellib.__dict__.items(): 37 | if name.lower() == target_model_name.lower() \ 38 | and issubclass(cls, BaseModel): 39 | model = cls 40 | 41 | if model is None: 42 | print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) 43 | exit(0) 44 | 45 | return model 46 | 47 | 48 | def get_option_setter(model_name): 49 | """Return the static method of the model class.""" 50 | model_class = find_model_using_name(model_name) 51 | return model_class.modify_commandline_options 52 | 53 | 54 | def create_model(opt): 55 | """Create a model given the option. 56 | 57 | This function warps the class CustomDatasetDataLoader. 58 | This is the main interface between this package and 'train.py'/'test.py' 59 | 60 | Example: 61 | >>> from models import create_model 62 | >>> model = create_model(opt) 63 | """ 64 | model = find_model_using_name(opt.model) 65 | instance = model(opt) 66 | print("model [%s] was created" % type(instance).__name__) 67 | return instance 68 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200 2 | from .mobilefacenet import get_mbf 3 | 4 | 5 | def get_model(name, **kwargs): 6 | # resnet 7 | if name == "r18": 8 | return iresnet18(False, **kwargs) 9 | elif name == "r34": 10 | return iresnet34(False, **kwargs) 11 | elif name == "r50": 12 | return iresnet50(False, **kwargs) 13 | elif name == "r100": 14 | return iresnet100(False, **kwargs) 15 | elif name == "r200": 16 | return iresnet200(False, **kwargs) 17 | elif name == "r2060": 18 | from .iresnet2060 import iresnet2060 19 | return iresnet2060(False, **kwargs) 20 | elif name == "mbf": 21 | fp16 = kwargs.get("fp16", False) 22 | num_features = kwargs.get("num_features", 512) 23 | return get_mbf(fp16=fp16, num_features=num_features) 24 | else: 25 | raise ValueError() -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/3millions.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.loss = "arcface" 7 | config.network = "r50" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 1.0 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 128 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 300 * 10000 20 | config.num_epoch = 30 21 | config.warmup_epoch = -1 22 | config.decay_epoch = [10, 16, 22] 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/3millions_pfc.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.loss = "arcface" 7 | config.network = "r50" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 0.1 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 128 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 300 * 10000 20 | config.num_epoch = 30 21 | config.warmup_epoch = -1 22 | config.decay_epoch = [10, 16, 22] 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/face3d/models/arcface_torch/configs/__init__.py -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/base.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = "ms1mv3_arcface_r50" 12 | 13 | config.dataset = "ms1m-retinaface-t1" 14 | config.embedding_size = 512 15 | config.sample_rate = 1 16 | config.fp16 = False 17 | config.momentum = 0.9 18 | config.weight_decay = 5e-4 19 | config.batch_size = 128 20 | config.lr = 0.1 # batch size is 512 21 | 22 | if config.dataset == "emore": 23 | config.rec = "/train_tmp/faces_emore" 24 | config.num_classes = 85742 25 | config.num_image = 5822653 26 | config.num_epoch = 16 27 | config.warmup_epoch = -1 28 | config.decay_epoch = [8, 14, ] 29 | config.val_targets = ["lfw", ] 30 | 31 | elif config.dataset == "ms1m-retinaface-t1": 32 | config.rec = "/train_tmp/ms1m-retinaface-t1" 33 | config.num_classes = 93431 34 | config.num_image = 5179510 35 | config.num_epoch = 25 36 | config.warmup_epoch = -1 37 | config.decay_epoch = [11, 17, 22] 38 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 39 | 40 | elif config.dataset == "glint360k": 41 | config.rec = "/train_tmp/glint360k" 42 | config.num_classes = 360232 43 | config.num_image = 17091657 44 | config.num_epoch = 20 45 | config.warmup_epoch = -1 46 | config.decay_epoch = [8, 12, 15, 18] 47 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 48 | 49 | elif config.dataset == "webface": 50 | config.rec = "/train_tmp/faces_webface_112x112" 51 | config.num_classes = 10572 52 | config.num_image = "forget" 53 | config.num_epoch = 34 54 | config.warmup_epoch = -1 55 | config.decay_epoch = [20, 28, 32] 56 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 57 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/glint360k_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 0.1 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 2e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/glint360k_r100.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r100" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/glint360k_r18.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r18" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/glint360k_r34.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r34" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/glint360k_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "cosface" 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/glint360k" 21 | config.num_classes = 360232 22 | config.num_image = 17091657 23 | config.num_epoch = 20 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [8, 12, 15, 18] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/ms1mv3_mbf.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "mbf" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 2e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 30 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 20, 25] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/ms1mv3_r18.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r18" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/ms1mv3_r2060.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r2060" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 64 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/ms1mv3_r34.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r34" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/ms1mv3_r50.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # make training faster 4 | # our RAM is 256G 5 | # mount -t tmpfs -o size=140G tmpfs /train_tmp 6 | 7 | config = edict() 8 | config.loss = "arcface" 9 | config.network = "r50" 10 | config.resume = False 11 | config.output = None 12 | config.embedding_size = 512 13 | config.sample_rate = 1.0 14 | config.fp16 = True 15 | config.momentum = 0.9 16 | config.weight_decay = 5e-4 17 | config.batch_size = 128 18 | config.lr = 0.1 # batch size is 512 19 | 20 | config.rec = "/train_tmp/ms1m-retinaface-t1" 21 | config.num_classes = 93431 22 | config.num_image = 5179510 23 | config.num_epoch = 25 24 | config.warmup_epoch = -1 25 | config.decay_epoch = [10, 16, 22] 26 | config.val_targets = ["lfw", "cfp_fp", "agedb_30"] 27 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/configs/speed.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | # configs for test speed 4 | 5 | config = edict() 6 | config.loss = "arcface" 7 | config.network = "r50" 8 | config.resume = False 9 | config.output = None 10 | config.embedding_size = 512 11 | config.sample_rate = 1.0 12 | config.fp16 = True 13 | config.momentum = 0.9 14 | config.weight_decay = 5e-4 15 | config.batch_size = 128 16 | config.lr = 0.1 # batch size is 512 17 | 18 | config.rec = "synthetic" 19 | config.num_classes = 100 * 10000 20 | config.num_epoch = 30 21 | config.warmup_epoch = -1 22 | config.decay_epoch = [10, 16, 22] 23 | config.val_targets = [] 24 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/dataset.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import os 3 | import queue as Queue 4 | import threading 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | import torch 9 | from torch.utils.data import DataLoader, Dataset 10 | from torchvision import transforms 11 | 12 | 13 | class BackgroundGenerator(threading.Thread): 14 | def __init__(self, generator, local_rank, max_prefetch=6): 15 | super(BackgroundGenerator, self).__init__() 16 | self.queue = Queue.Queue(max_prefetch) 17 | self.generator = generator 18 | self.local_rank = local_rank 19 | self.daemon = True 20 | self.start() 21 | 22 | def run(self): 23 | torch.cuda.set_device(self.local_rank) 24 | for item in self.generator: 25 | self.queue.put(item) 26 | self.queue.put(None) 27 | 28 | def next(self): 29 | next_item = self.queue.get() 30 | if next_item is None: 31 | raise StopIteration 32 | return next_item 33 | 34 | def __next__(self): 35 | return self.next() 36 | 37 | def __iter__(self): 38 | return self 39 | 40 | 41 | class DataLoaderX(DataLoader): 42 | 43 | def __init__(self, local_rank, **kwargs): 44 | super(DataLoaderX, self).__init__(**kwargs) 45 | self.stream = torch.cuda.Stream(local_rank) 46 | self.local_rank = local_rank 47 | 48 | def __iter__(self): 49 | self.iter = super(DataLoaderX, self).__iter__() 50 | self.iter = BackgroundGenerator(self.iter, self.local_rank) 51 | self.preload() 52 | return self 53 | 54 | def preload(self): 55 | self.batch = next(self.iter, None) 56 | if self.batch is None: 57 | return None 58 | with torch.cuda.stream(self.stream): 59 | for k in range(len(self.batch)): 60 | self.batch[k] = self.batch[k].to(device=self.local_rank, non_blocking=True) 61 | 62 | def __next__(self): 63 | torch.cuda.current_stream().wait_stream(self.stream) 64 | batch = self.batch 65 | if batch is None: 66 | raise StopIteration 67 | self.preload() 68 | return batch 69 | 70 | 71 | class MXFaceDataset(Dataset): 72 | def __init__(self, root_dir, local_rank): 73 | super(MXFaceDataset, self).__init__() 74 | self.transform = transforms.Compose( 75 | [transforms.ToPILImage(), 76 | transforms.RandomHorizontalFlip(), 77 | transforms.ToTensor(), 78 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), 79 | ]) 80 | self.root_dir = root_dir 81 | self.local_rank = local_rank 82 | path_imgrec = os.path.join(root_dir, 'train.rec') 83 | path_imgidx = os.path.join(root_dir, 'train.idx') 84 | self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') 85 | s = self.imgrec.read_idx(0) 86 | header, _ = mx.recordio.unpack(s) 87 | if header.flag > 0: 88 | self.header0 = (int(header.label[0]), int(header.label[1])) 89 | self.imgidx = np.array(range(1, int(header.label[0]))) 90 | else: 91 | self.imgidx = np.array(list(self.imgrec.keys)) 92 | 93 | def __getitem__(self, index): 94 | idx = self.imgidx[index] 95 | s = self.imgrec.read_idx(idx) 96 | header, img = mx.recordio.unpack(s) 97 | label = header.label 98 | if not isinstance(label, numbers.Number): 99 | label = label[0] 100 | label = torch.tensor(label, dtype=torch.long) 101 | sample = mx.image.imdecode(img).asnumpy() 102 | if self.transform is not None: 103 | sample = self.transform(sample) 104 | return sample, label 105 | 106 | def __len__(self): 107 | return len(self.imgidx) 108 | 109 | 110 | class SyntheticDataset(Dataset): 111 | def __init__(self, local_rank): 112 | super(SyntheticDataset, self).__init__() 113 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) 114 | img = np.transpose(img, (2, 0, 1)) 115 | img = torch.from_numpy(img).squeeze(0).float() 116 | img = ((img / 255) - 0.5) / 0.5 117 | self.img = img 118 | self.label = 1 119 | 120 | def __getitem__(self, index): 121 | return self.img, self.label 122 | 123 | def __len__(self): 124 | return 1000000 125 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/docs/eval.md: -------------------------------------------------------------------------------- 1 | ## Eval on ICCV2021-MFR 2 | 3 | coming soon. 4 | 5 | 6 | ## Eval IJBC 7 | You can eval ijbc with pytorch or onnx. 8 | 9 | 10 | 1. Eval IJBC With Onnx 11 | ```shell 12 | CUDA_VISIBLE_DEVICES=0 python onnx_ijbc.py --model-root ms1mv3_arcface_r50 --image-path IJB_release/IJBC --result-dir ms1mv3_arcface_r50 13 | ``` 14 | 15 | 2. Eval IJBC With Pytorch 16 | ```shell 17 | CUDA_VISIBLE_DEVICES=0,1 python eval_ijbc.py \ 18 | --model-prefix ms1mv3_arcface_r50/backbone.pth \ 19 | --image-path IJB_release/IJBC \ 20 | --result-dir ms1mv3_arcface_r50 \ 21 | --batch-size 128 \ 22 | --job ms1mv3_arcface_r50 \ 23 | --target IJBC \ 24 | --network iresnet50 25 | ``` 26 | 27 | ## Inference 28 | 29 | ```shell 30 | python inference.py --weight ms1mv3_arcface_r50/backbone.pth --network r50 31 | ``` 32 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/docs/install.md: -------------------------------------------------------------------------------- 1 | ## v1.8.0 2 | ### Linux and Windows 3 | ```shell 4 | # CUDA 11.0 5 | pip --default-timeout=100 install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html 6 | 7 | # CUDA 10.2 8 | pip --default-timeout=100 install torch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 9 | 10 | # CPU only 11 | pip --default-timeout=100 install torch==1.8.0+cpu torchvision==0.9.0+cpu torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html 12 | 13 | ``` 14 | 15 | 16 | ## v1.7.1 17 | ### Linux and Windows 18 | ```shell 19 | # CUDA 11.0 20 | pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 21 | 22 | # CUDA 10.2 23 | pip install torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 24 | 25 | # CUDA 10.1 26 | pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 27 | 28 | # CUDA 9.2 29 | pip install torch==1.7.1+cu92 torchvision==0.8.2+cu92 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 30 | 31 | # CPU only 32 | pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 33 | ``` 34 | 35 | 36 | ## v1.6.0 37 | 38 | ### Linux and Windows 39 | ```shell 40 | # CUDA 10.2 41 | pip install torch==1.6.0 torchvision==0.7.0 42 | 43 | # CUDA 10.1 44 | pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html 45 | 46 | # CUDA 9.2 47 | pip install torch==1.6.0+cu92 torchvision==0.7.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html 48 | 49 | # CPU only 50 | pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 51 | ``` -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/docs/modelzoo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/face3d/models/arcface_torch/docs/modelzoo.md -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/face3d/models/arcface_torch/eval/__init__.py -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | 7 | from backbones import get_model 8 | 9 | 10 | @torch.no_grad() 11 | def inference(weight, name, img): 12 | if img is None: 13 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.uint8) 14 | else: 15 | img = cv2.imread(img) 16 | img = cv2.resize(img, (112, 112)) 17 | 18 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 19 | img = np.transpose(img, (2, 0, 1)) 20 | img = torch.from_numpy(img).unsqueeze(0).float() 21 | img.div_(255).sub_(0.5).div_(0.5) 22 | net = get_model(name, fp16=False) 23 | net.load_state_dict(torch.load(weight)) 24 | net.eval() 25 | feat = net(img).numpy() 26 | print(feat) 27 | 28 | 29 | if __name__ == "__main__": 30 | parser = argparse.ArgumentParser(description='PyTorch ArcFace Training') 31 | parser.add_argument('--network', type=str, default='r50', help='backbone network') 32 | parser.add_argument('--weight', type=str, default='') 33 | parser.add_argument('--img', type=str, default=None) 34 | args = parser.parse_args() 35 | inference(args.weight, args.network, args.img) 36 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | def get_loss(name): 6 | if name == "cosface": 7 | return CosFace() 8 | elif name == "arcface": 9 | return ArcFace() 10 | else: 11 | raise ValueError() 12 | 13 | 14 | class CosFace(nn.Module): 15 | def __init__(self, s=64.0, m=0.40): 16 | super(CosFace, self).__init__() 17 | self.s = s 18 | self.m = m 19 | 20 | def forward(self, cosine, label): 21 | index = torch.where(label != -1)[0] 22 | m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device) 23 | m_hot.scatter_(1, label[index, None], self.m) 24 | cosine[index] -= m_hot 25 | ret = cosine * self.s 26 | return ret 27 | 28 | 29 | class ArcFace(nn.Module): 30 | def __init__(self, s=64.0, m=0.5): 31 | super(ArcFace, self).__init__() 32 | self.s = s 33 | self.m = m 34 | 35 | def forward(self, cosine: torch.Tensor, label): 36 | index = torch.where(label != -1)[0] 37 | m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device) 38 | m_hot.scatter_(1, label[index, None], self.m) 39 | cosine.acos_() 40 | cosine[index] += m_hot 41 | cosine.cos_().mul_(self.s) 42 | return cosine 43 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/requirement.txt: -------------------------------------------------------------------------------- 1 | tensorboard 2 | easydict 3 | mxnet 4 | onnx 5 | sklearn 6 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 --master_addr="127.0.0.1" --master_port=1234 train.py configs/ms1mv3_r50 2 | ps -ef | grep "train" | grep -v grep | awk '{print "kill -9 "$2}' | sh 3 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/torch2onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnx 3 | import torch 4 | 5 | 6 | def convert_onnx(net, path_module, output, opset=11, simplify=False): 7 | assert isinstance(net, torch.nn.Module) 8 | img = np.random.randint(0, 255, size=(112, 112, 3), dtype=np.int32) 9 | img = img.astype(np.float) 10 | img = (img / 255. - 0.5) / 0.5 # torch style norm 11 | img = img.transpose((2, 0, 1)) 12 | img = torch.from_numpy(img).unsqueeze(0).float() 13 | 14 | weight = torch.load(path_module) 15 | net.load_state_dict(weight) 16 | net.eval() 17 | torch.onnx.export(net, img, output, keep_initializers_as_inputs=False, verbose=False, opset_version=opset) 18 | model = onnx.load(output) 19 | graph = model.graph 20 | graph.input[0].type.tensor_type.shape.dim[0].dim_param = 'None' 21 | if simplify: 22 | from onnxsim import simplify 23 | model, check = simplify(model) 24 | assert check, "Simplified ONNX model could not be validated" 25 | onnx.save(model, output) 26 | 27 | 28 | if __name__ == '__main__': 29 | import os 30 | import argparse 31 | from backbones import get_model 32 | 33 | parser = argparse.ArgumentParser(description='ArcFace PyTorch to onnx') 34 | parser.add_argument('input', type=str, help='input backbone.pth file or path') 35 | parser.add_argument('--output', type=str, default=None, help='output onnx path') 36 | parser.add_argument('--network', type=str, default=None, help='backbone network') 37 | parser.add_argument('--simplify', type=bool, default=False, help='onnx simplify') 38 | args = parser.parse_args() 39 | input_file = args.input 40 | if os.path.isdir(input_file): 41 | input_file = os.path.join(input_file, "backbone.pth") 42 | assert os.path.exists(input_file) 43 | model_name = os.path.basename(os.path.dirname(input_file)).lower() 44 | params = model_name.split("_") 45 | if len(params) >= 3 and params[1] in ('arcface', 'cosface'): 46 | if args.network is None: 47 | args.network = params[2] 48 | assert args.network is not None 49 | print(args) 50 | backbone_onnx = get_model(args.network, dropout=0) 51 | 52 | output_path = args.output 53 | if output_path is None: 54 | output_path = os.path.join(os.path.dirname(__file__), 'onnx') 55 | if not os.path.exists(output_path): 56 | os.makedirs(output_path) 57 | assert os.path.isdir(output_path) 58 | output_file = os.path.join(output_path, "%s.onnx" % model_name) 59 | convert_onnx(backbone_onnx, input_file, output_file, simplify=args.simplify) 60 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/face3d/models/arcface_torch/utils/__init__.py -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/utils/plot.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | from pathlib import Path 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap 10 | from prettytable import PrettyTable 11 | from sklearn.metrics import roc_curve, auc 12 | 13 | image_path = "/data/anxiang/IJB_release/IJBC" 14 | files = [ 15 | "./ms1mv3_arcface_r100/ms1mv3_arcface_r100/ijbc.npy" 16 | ] 17 | 18 | 19 | def read_template_pair_list(path): 20 | pairs = pd.read_csv(path, sep=' ', header=None).values 21 | t1 = pairs[:, 0].astype(np.int) 22 | t2 = pairs[:, 1].astype(np.int) 23 | label = pairs[:, 2].astype(np.int) 24 | return t1, t2, label 25 | 26 | 27 | p1, p2, label = read_template_pair_list( 28 | os.path.join('%s/meta' % image_path, 29 | '%s_template_pair_label.txt' % 'ijbc')) 30 | 31 | methods = [] 32 | scores = [] 33 | for file in files: 34 | methods.append(file.split('/')[-2]) 35 | scores.append(np.load(file)) 36 | 37 | methods = np.array(methods) 38 | scores = dict(zip(methods, scores)) 39 | colours = dict( 40 | zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2'))) 41 | x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1] 42 | tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels]) 43 | fig = plt.figure() 44 | for method in methods: 45 | fpr, tpr, _ = roc_curve(label, scores[method]) 46 | roc_auc = auc(fpr, tpr) 47 | fpr = np.flipud(fpr) 48 | tpr = np.flipud(tpr) # select largest tpr at same fpr 49 | plt.plot(fpr, 50 | tpr, 51 | color=colours[method], 52 | lw=1, 53 | label=('[%s (AUC = %0.4f %%)]' % 54 | (method.split('-')[-1], roc_auc * 100))) 55 | tpr_fpr_row = [] 56 | tpr_fpr_row.append("%s-%s" % (method, "IJBC")) 57 | for fpr_iter in np.arange(len(x_labels)): 58 | _, min_index = min( 59 | list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr))))) 60 | tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100)) 61 | tpr_fpr_table.add_row(tpr_fpr_row) 62 | plt.xlim([10 ** -6, 0.1]) 63 | plt.ylim([0.3, 1.0]) 64 | plt.grid(linestyle='--', linewidth=1) 65 | plt.xticks(x_labels) 66 | plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) 67 | plt.xscale('log') 68 | plt.xlabel('False Positive Rate') 69 | plt.ylabel('True Positive Rate') 70 | plt.title('ROC on IJB') 71 | plt.legend(loc="lower right") 72 | print(tpr_fpr_table) 73 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/utils/utils_amp.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import torch 4 | 5 | if torch.__version__ < '1.9': 6 | Iterable = torch._six.container_abcs.Iterable 7 | else: 8 | import collections 9 | 10 | Iterable = collections.abc.Iterable 11 | from torch.cuda.amp import GradScaler 12 | 13 | 14 | class _MultiDeviceReplicator(object): 15 | """ 16 | Lazily serves copies of a tensor to requested devices. Copies are cached per-device. 17 | """ 18 | 19 | def __init__(self, master_tensor: torch.Tensor) -> None: 20 | assert master_tensor.is_cuda 21 | self.master = master_tensor 22 | self._per_device_tensors: Dict[torch.device, torch.Tensor] = {} 23 | 24 | def get(self, device) -> torch.Tensor: 25 | retval = self._per_device_tensors.get(device, None) 26 | if retval is None: 27 | retval = self.master.to(device=device, non_blocking=True, copy=True) 28 | self._per_device_tensors[device] = retval 29 | return retval 30 | 31 | 32 | class MaxClipGradScaler(GradScaler): 33 | def __init__(self, init_scale, max_scale: float, growth_interval=100): 34 | GradScaler.__init__(self, init_scale=init_scale, growth_interval=growth_interval) 35 | self.max_scale = max_scale 36 | 37 | def scale_clip(self): 38 | if self.get_scale() == self.max_scale: 39 | self.set_growth_factor(1) 40 | elif self.get_scale() < self.max_scale: 41 | self.set_growth_factor(2) 42 | elif self.get_scale() > self.max_scale: 43 | self._scale.fill_(self.max_scale) 44 | self.set_growth_factor(1) 45 | 46 | def scale(self, outputs): 47 | """ 48 | Multiplies ('scales') a tensor or list of tensors by the scale factor. 49 | 50 | Returns scaled outputs. If this instance of :class:`GradScaler` is not enabled, outputs are returned 51 | unmodified. 52 | 53 | Arguments: 54 | outputs (Tensor or iterable of Tensors): Outputs to scale. 55 | """ 56 | if not self._enabled: 57 | return outputs 58 | self.scale_clip() 59 | # Short-circuit for the common case. 60 | if isinstance(outputs, torch.Tensor): 61 | assert outputs.is_cuda 62 | if self._scale is None: 63 | self._lazy_init_scale_growth_tracker(outputs.device) 64 | assert self._scale is not None 65 | return outputs * self._scale.to(device=outputs.device, non_blocking=True) 66 | 67 | # Invoke the more complex machinery only if we're treating multiple outputs. 68 | stash: List[_MultiDeviceReplicator] = [] # holds a reference that can be overwritten by apply_scale 69 | 70 | def apply_scale(val): 71 | if isinstance(val, torch.Tensor): 72 | assert val.is_cuda 73 | if len(stash) == 0: 74 | if self._scale is None: 75 | self._lazy_init_scale_growth_tracker(val.device) 76 | assert self._scale is not None 77 | stash.append(_MultiDeviceReplicator(self._scale)) 78 | return val * stash[0].get(val.device) 79 | elif isinstance(val, Iterable): 80 | iterable = map(apply_scale, val) 81 | if isinstance(val, list) or isinstance(val, tuple): 82 | return type(val)(iterable) 83 | else: 84 | return iterable 85 | else: 86 | raise ValueError("outputs must be a Tensor or an iterable of Tensors") 87 | 88 | return apply_scale(outputs) 89 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/utils/utils_config.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os.path as osp 3 | 4 | 5 | def get_config(config_file): 6 | assert config_file.startswith('configs/'), 'config file setting must start with configs/' 7 | temp_config_name = osp.basename(config_file) 8 | temp_module_name = osp.splitext(temp_config_name)[0] 9 | config = importlib.import_module("configs.base") 10 | cfg = config.config 11 | config = importlib.import_module("configs.%s" % temp_module_name) 12 | job_cfg = config.config 13 | cfg.update(job_cfg) 14 | if cfg.output is None: 15 | cfg.output = osp.join('work_dirs', temp_module_name) 16 | return cfg -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/utils/utils_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | 6 | class AverageMeter(object): 7 | """Computes and stores the average and current value 8 | """ 9 | 10 | def __init__(self): 11 | self.val = None 12 | self.avg = None 13 | self.sum = None 14 | self.count = None 15 | self.reset() 16 | 17 | def reset(self): 18 | self.val = 0 19 | self.avg = 0 20 | self.sum = 0 21 | self.count = 0 22 | 23 | def update(self, val, n=1): 24 | self.val = val 25 | self.sum += val * n 26 | self.count += n 27 | self.avg = self.sum / self.count 28 | 29 | 30 | def init_logging(rank, models_root): 31 | if rank == 0: 32 | log_root = logging.getLogger() 33 | log_root.setLevel(logging.INFO) 34 | formatter = logging.Formatter("Training: %(asctime)s-%(message)s") 35 | handler_file = logging.FileHandler(os.path.join(models_root, "training.log")) 36 | handler_stream = logging.StreamHandler(sys.stdout) 37 | handler_file.setFormatter(formatter) 38 | handler_stream.setFormatter(formatter) 39 | log_root.addHandler(handler_file) 40 | log_root.addHandler(handler_stream) 41 | log_root.info('rank_id: %d' % rank) 42 | -------------------------------------------------------------------------------- /third_part/face3d/models/arcface_torch/utils/utils_os.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/face3d/models/arcface_torch/utils/utils_os.py -------------------------------------------------------------------------------- /third_part/face3d/models/losses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from kornia.geometry import warp_affine 5 | import torch.nn.functional as F 6 | 7 | def resize_n_crop(image, M, dsize=112): 8 | # image: (b, c, h, w) 9 | # M : (b, 2, 3) 10 | return warp_affine(image, M, dsize=(dsize, dsize)) 11 | 12 | ### perceptual level loss 13 | class PerceptualLoss(nn.Module): 14 | def __init__(self, recog_net, input_size=112): 15 | super(PerceptualLoss, self).__init__() 16 | self.recog_net = recog_net 17 | self.preprocess = lambda x: 2 * x - 1 18 | self.input_size=input_size 19 | def forward(imageA, imageB, M): 20 | """ 21 | 1 - cosine distance 22 | Parameters: 23 | imageA --torch.tensor (B, 3, H, W), range (0, 1) , RGB order 24 | imageB --same as imageA 25 | """ 26 | 27 | imageA = self.preprocess(resize_n_crop(imageA, M, self.input_size)) 28 | imageB = self.preprocess(resize_n_crop(imageB, M, self.input_size)) 29 | 30 | # freeze bn 31 | self.recog_net.eval() 32 | 33 | id_featureA = F.normalize(self.recog_net(imageA), dim=-1, p=2) 34 | id_featureB = F.normalize(self.recog_net(imageB), dim=-1, p=2) 35 | cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) 36 | # assert torch.sum((cosine_d > 1).float()) == 0 37 | return torch.sum(1 - cosine_d) / cosine_d.shape[0] 38 | 39 | def perceptual_loss(id_featureA, id_featureB): 40 | cosine_d = torch.sum(id_featureA * id_featureB, dim=-1) 41 | # assert torch.sum((cosine_d > 1).float()) == 0 42 | return torch.sum(1 - cosine_d) / cosine_d.shape[0] 43 | 44 | ### image level loss 45 | def photo_loss(imageA, imageB, mask, eps=1e-6): 46 | """ 47 | l2 norm (with sqrt, to ensure backward stabililty, use eps, otherwise Nan may occur) 48 | Parameters: 49 | imageA --torch.tensor (B, 3, H, W), range (0, 1), RGB order 50 | imageB --same as imageA 51 | """ 52 | loss = torch.sqrt(eps + torch.sum((imageA - imageB) ** 2, dim=1, keepdims=True)) * mask 53 | loss = torch.sum(loss) / torch.max(torch.sum(mask), torch.tensor(1.0).to(mask.device)) 54 | return loss 55 | 56 | def landmark_loss(predict_lm, gt_lm, weight=None): 57 | """ 58 | weighted mse loss 59 | Parameters: 60 | predict_lm --torch.tensor (B, 68, 2) 61 | gt_lm --torch.tensor (B, 68, 2) 62 | weight --numpy.array (1, 68) 63 | """ 64 | if not weight: 65 | weight = np.ones([68]) 66 | weight[28:31] = 20 67 | weight[-8:] = 20 68 | weight = np.expand_dims(weight, 0) 69 | weight = torch.tensor(weight).to(predict_lm.device) 70 | loss = torch.sum((predict_lm - gt_lm)**2, dim=-1) * weight 71 | loss = torch.sum(loss) / (predict_lm.shape[0] * predict_lm.shape[1]) 72 | return loss 73 | 74 | 75 | ### regulization 76 | def reg_loss(coeffs_dict, opt=None): 77 | """ 78 | l2 norm without the sqrt, from yu's implementation (mse) 79 | tf.nn.l2_loss https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss 80 | Parameters: 81 | coeffs_dict -- a dict of torch.tensors , keys: id, exp, tex, angle, gamma, trans 82 | 83 | """ 84 | # coefficient regularization to ensure plausible 3d faces 85 | if opt: 86 | w_id, w_exp, w_tex = opt.w_id, opt.w_exp, opt.w_tex 87 | else: 88 | w_id, w_exp, w_tex = 1, 1, 1, 1 89 | creg_loss = w_id * torch.sum(coeffs_dict['id'] ** 2) + \ 90 | w_exp * torch.sum(coeffs_dict['exp'] ** 2) + \ 91 | w_tex * torch.sum(coeffs_dict['tex'] ** 2) 92 | creg_loss = creg_loss / coeffs_dict['id'].shape[0] 93 | 94 | # gamma regularization to ensure a nearly-monochromatic light 95 | gamma = coeffs_dict['gamma'].reshape([-1, 3, 9]) 96 | gamma_mean = torch.mean(gamma, dim=1, keepdims=True) 97 | gamma_loss = torch.mean((gamma - gamma_mean) ** 2) 98 | 99 | return creg_loss, gamma_loss 100 | 101 | def reflectance_loss(texture, mask): 102 | """ 103 | minimize texture variance (mse), albedo regularization to ensure an uniform skin albedo 104 | Parameters: 105 | texture --torch.tensor, (B, N, 3) 106 | mask --torch.tensor, (N), 1 or 0 107 | 108 | """ 109 | mask = mask.reshape([1, mask.shape[0], 1]) 110 | texture_mean = torch.sum(mask * texture, dim=1, keepdims=True) / torch.sum(mask) 111 | loss = torch.sum(((texture - texture_mean) * mask)**2) / (texture.shape[0] * torch.sum(mask)) 112 | return loss 113 | 114 | -------------------------------------------------------------------------------- /third_part/face3d/options/__init__.py: -------------------------------------------------------------------------------- 1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test).""" 2 | -------------------------------------------------------------------------------- /third_part/face3d/options/inference_options.py: -------------------------------------------------------------------------------- 1 | from face3d.options.base_options import BaseOptions 2 | 3 | 4 | class InferenceOptions(BaseOptions): 5 | """This class includes test options. 6 | 7 | It also includes shared options defined in BaseOptions. 8 | """ 9 | 10 | def initialize(self, parser): 11 | parser = BaseOptions.initialize(self, parser) # define shared options 12 | parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 13 | parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') 14 | 15 | parser.add_argument('--input_dir', type=str, help='the folder of the input files') 16 | parser.add_argument('--keypoint_dir', type=str, help='the folder of the keypoint files') 17 | parser.add_argument('--output_dir', type=str, default='mp4', help='the output dir to save the extracted coefficients') 18 | parser.add_argument('--save_split_files', action='store_true', help='save split files or not') 19 | parser.add_argument('--inference_batch_size', type=int, default=8) 20 | 21 | # Dropout and Batchnorm has different behavior during training and test. 22 | self.isTrain = False 23 | return parser 24 | -------------------------------------------------------------------------------- /third_part/face3d/options/test_options.py: -------------------------------------------------------------------------------- 1 | """This script contains the test options for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | from .base_options import BaseOptions 5 | 6 | 7 | class TestOptions(BaseOptions): 8 | """This class includes test options. 9 | 10 | It also includes shared options defined in BaseOptions. 11 | """ 12 | 13 | def initialize(self, parser): 14 | parser = BaseOptions.initialize(self, parser) # define shared options 15 | parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 16 | parser.add_argument('--dataset_mode', type=str, default=None, help='chooses how datasets are loaded. [None | flist]') 17 | parser.add_argument('--img_folder', type=str, default='examples', help='folder for test images.') 18 | 19 | # Dropout and Batchnorm has different behavior during training and test. 20 | self.isTrain = False 21 | return parser 22 | -------------------------------------------------------------------------------- /third_part/face3d/options/train_options.py: -------------------------------------------------------------------------------- 1 | """This script contains the training options for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | from .base_options import BaseOptions 5 | from util import util 6 | 7 | class TrainOptions(BaseOptions): 8 | """This class includes training options. 9 | 10 | It also includes shared options defined in BaseOptions. 11 | """ 12 | 13 | def initialize(self, parser): 14 | parser = BaseOptions.initialize(self, parser) 15 | # dataset parameters 16 | # for train 17 | parser.add_argument('--data_root', type=str, default='./', help='dataset root') 18 | parser.add_argument('--flist', type=str, default='datalist/train/masks.txt', help='list of mask names of training set') 19 | parser.add_argument('--batch_size', type=int, default=32) 20 | parser.add_argument('--dataset_mode', type=str, default='flist', help='chooses how datasets are loaded. [None | flist]') 21 | parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') 22 | parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data') 23 | parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') 24 | parser.add_argument('--preprocess', type=str, default='shift_scale_rot_flip', help='scaling and cropping of images at load time [shift_scale_rot_flip | shift_scale | shift | shift_rot_flip ]') 25 | parser.add_argument('--use_aug', type=util.str2bool, nargs='?', const=True, default=True, help='whether use data augmentation') 26 | 27 | # for val 28 | parser.add_argument('--flist_val', type=str, default='datalist/val/masks.txt', help='list of mask names of val set') 29 | parser.add_argument('--batch_size_val', type=int, default=32) 30 | 31 | 32 | # visualization parameters 33 | parser.add_argument('--display_freq', type=int, default=1000, help='frequency of showing training results on screen') 34 | parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') 35 | 36 | # network saving and loading parameters 37 | parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results') 38 | parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs') 39 | parser.add_argument('--evaluation_freq', type=int, default=5000, help='evaluation freq') 40 | parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration') 41 | parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') 42 | parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') 43 | parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 44 | parser.add_argument('--pretrained_name', type=str, default=None, help='resume training from another checkpoint') 45 | 46 | # training parameters 47 | parser.add_argument('--n_epochs', type=int, default=20, help='number of epochs with the initial learning rate') 48 | parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate for adam') 49 | parser.add_argument('--lr_policy', type=str, default='step', help='learning rate policy. [linear | step | plateau | cosine]') 50 | parser.add_argument('--lr_decay_epochs', type=int, default=10, help='multiply by a gamma every lr_decay_epochs epoches') 51 | 52 | self.isTrain = True 53 | return parser 54 | -------------------------------------------------------------------------------- /third_part/face3d/util/BBRegressorParam_r.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxwh/video-retalking/d2257f1290bd517ca18a0d15ecaeb11bbcc103a2/third_part/face3d/util/BBRegressorParam_r.mat -------------------------------------------------------------------------------- /third_part/face3d/util/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes a miscellaneous collection of useful helper functions.""" 2 | from face3d.util import * 3 | -------------------------------------------------------------------------------- /third_part/face3d/util/detect_lm68.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from scipy.io import loadmat 5 | import tensorflow as tf 6 | from util.preprocess import align_for_lm 7 | from shutil import move 8 | 9 | mean_face = np.loadtxt('util/test_mean_face.txt') 10 | mean_face = mean_face.reshape([68, 2]) 11 | 12 | def save_label(labels, save_path): 13 | np.savetxt(save_path, labels) 14 | 15 | def draw_landmarks(img, landmark, save_name): 16 | landmark = landmark 17 | lm_img = np.zeros([img.shape[0], img.shape[1], 3]) 18 | lm_img[:] = img.astype(np.float32) 19 | landmark = np.round(landmark).astype(np.int32) 20 | 21 | for i in range(len(landmark)): 22 | for j in range(-1, 1): 23 | for k in range(-1, 1): 24 | if img.shape[0] - 1 - landmark[i, 1]+j > 0 and \ 25 | img.shape[0] - 1 - landmark[i, 1]+j < img.shape[0] and \ 26 | landmark[i, 0]+k > 0 and \ 27 | landmark[i, 0]+k < img.shape[1]: 28 | lm_img[img.shape[0] - 1 - landmark[i, 1]+j, landmark[i, 0]+k, 29 | :] = np.array([0, 0, 255]) 30 | lm_img = lm_img.astype(np.uint8) 31 | 32 | cv2.imwrite(save_name, lm_img) 33 | 34 | 35 | def load_data(img_name, txt_name): 36 | return cv2.imread(img_name), np.loadtxt(txt_name) 37 | 38 | # create tensorflow graph for landmark detector 39 | def load_lm_graph(graph_filename): 40 | with tf.gfile.GFile(graph_filename, 'rb') as f: 41 | graph_def = tf.GraphDef() 42 | graph_def.ParseFromString(f.read()) 43 | 44 | with tf.Graph().as_default() as graph: 45 | tf.import_graph_def(graph_def, name='net') 46 | img_224 = graph.get_tensor_by_name('net/input_imgs:0') 47 | output_lm = graph.get_tensor_by_name('net/lm:0') 48 | lm_sess = tf.Session(graph=graph) 49 | 50 | return lm_sess,img_224,output_lm 51 | 52 | # landmark detection 53 | def detect_68p(img_path,sess,input_op,output_op): 54 | print('detecting landmarks......') 55 | names = [i for i in sorted(os.listdir( 56 | img_path)) if 'jpg' in i or 'png' in i or 'jpeg' in i or 'PNG' in i] 57 | vis_path = os.path.join(img_path, 'vis') 58 | remove_path = os.path.join(img_path, 'remove') 59 | save_path = os.path.join(img_path, 'landmarks') 60 | if not os.path.isdir(vis_path): 61 | os.makedirs(vis_path) 62 | if not os.path.isdir(remove_path): 63 | os.makedirs(remove_path) 64 | if not os.path.isdir(save_path): 65 | os.makedirs(save_path) 66 | 67 | for i in range(0, len(names)): 68 | name = names[i] 69 | print('%05d' % (i), ' ', name) 70 | full_image_name = os.path.join(img_path, name) 71 | txt_name = '.'.join(name.split('.')[:-1]) + '.txt' 72 | full_txt_name = os.path.join(img_path, 'detections', txt_name) # 5 facial landmark path for each image 73 | 74 | # if an image does not have detected 5 facial landmarks, remove it from the training list 75 | if not os.path.isfile(full_txt_name): 76 | move(full_image_name, os.path.join(remove_path, name)) 77 | continue 78 | 79 | # load data 80 | img, five_points = load_data(full_image_name, full_txt_name) 81 | input_img, scale, bbox = align_for_lm(img, five_points) # align for 68 landmark detection 82 | 83 | # if the alignment fails, remove corresponding image from the training list 84 | if scale == 0: 85 | move(full_txt_name, os.path.join( 86 | remove_path, txt_name)) 87 | move(full_image_name, os.path.join(remove_path, name)) 88 | continue 89 | 90 | # detect landmarks 91 | input_img = np.reshape( 92 | input_img, [1, 224, 224, 3]).astype(np.float32) 93 | landmark = sess.run( 94 | output_op, feed_dict={input_op: input_img}) 95 | 96 | # transform back to original image coordinate 97 | landmark = landmark.reshape([68, 2]) + mean_face 98 | landmark[:, 1] = 223 - landmark[:, 1] 99 | landmark = landmark / scale 100 | landmark[:, 0] = landmark[:, 0] + bbox[0] 101 | landmark[:, 1] = landmark[:, 1] + bbox[1] 102 | landmark[:, 1] = img.shape[0] - 1 - landmark[:, 1] 103 | 104 | if i % 100 == 0: 105 | draw_landmarks(img, landmark, os.path.join(vis_path, name)) 106 | save_label(landmark, os.path.join(save_path, txt_name)) 107 | -------------------------------------------------------------------------------- /third_part/face3d/util/generate_list.py: -------------------------------------------------------------------------------- 1 | """This script is to generate training list files for Deep3DFaceRecon_pytorch 2 | """ 3 | 4 | import os 5 | 6 | # save path to training data 7 | def write_list(lms_list, imgs_list, msks_list, mode='train',save_folder='datalist', save_name=''): 8 | save_path = os.path.join(save_folder, mode) 9 | if not os.path.isdir(save_path): 10 | os.makedirs(save_path) 11 | with open(os.path.join(save_path, save_name + 'landmarks.txt'), 'w') as fd: 12 | fd.writelines([i + '\n' for i in lms_list]) 13 | 14 | with open(os.path.join(save_path, save_name + 'images.txt'), 'w') as fd: 15 | fd.writelines([i + '\n' for i in imgs_list]) 16 | 17 | with open(os.path.join(save_path, save_name + 'masks.txt'), 'w') as fd: 18 | fd.writelines([i + '\n' for i in msks_list]) 19 | 20 | # check if the path is valid 21 | def check_list(rlms_list, rimgs_list, rmsks_list): 22 | lms_list, imgs_list, msks_list = [], [], [] 23 | for i in range(len(rlms_list)): 24 | flag = 'false' 25 | lm_path = rlms_list[i] 26 | im_path = rimgs_list[i] 27 | msk_path = rmsks_list[i] 28 | if os.path.isfile(lm_path) and os.path.isfile(im_path) and os.path.isfile(msk_path): 29 | flag = 'true' 30 | lms_list.append(rlms_list[i]) 31 | imgs_list.append(rimgs_list[i]) 32 | msks_list.append(rmsks_list[i]) 33 | print(i, rlms_list[i], flag) 34 | return lms_list, imgs_list, msks_list 35 | -------------------------------------------------------------------------------- /third_part/face3d/util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import meta, h3, table, tr, td, p, a, img, br 3 | import os 4 | 5 | 6 | class HTML: 7 | """This HTML class allows us to save images and write texts into a single HTML file. 8 | 9 | It consists of functions such as (add a text header to the HTML file), 10 | (add a row of images to the HTML file), and (save the HTML to the disk). 11 | It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API. 12 | """ 13 | 14 | def __init__(self, web_dir, title, refresh=0): 15 | """Initialize the HTML classes 16 | 17 | Parameters: 18 | web_dir (str) -- a directory that stores the webpage. HTML file will be created at /index.html; images will be saved at 0: 32 | with self.doc.head: 33 | meta(http_equiv="refresh", content=str(refresh)) 34 | 35 | def get_image_dir(self): 36 | """Return the directory that stores images""" 37 | return self.img_dir 38 | 39 | def add_header(self, text): 40 | """Insert a header to the HTML file 41 | 42 | Parameters: 43 | text (str) -- the header text 44 | """ 45 | with self.doc: 46 | h3(text) 47 | 48 | def add_images(self, ims, txts, links, width=400): 49 | """add images to the HTML file 50 | 51 | Parameters: 52 | ims (str list) -- a list of image paths 53 | txts (str list) -- a list of image names shown on the website 54 | links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page 55 | """ 56 | self.t = table(border=1, style="table-layout: fixed;") # Insert a table 57 | self.doc.add(self.t) 58 | with self.t: 59 | with tr(): 60 | for im, txt, link in zip(ims, txts, links): 61 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 62 | with p(): 63 | with a(href=os.path.join('images', link)): 64 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 65 | br() 66 | p(txt) 67 | 68 | def save(self): 69 | """save the current content to the HTML file""" 70 | html_file = '%s/index.html' % self.web_dir 71 | f = open(html_file, 'wt') 72 | f.write(self.doc.render()) 73 | f.close() 74 | 75 | 76 | if __name__ == '__main__': # we show an example usage here. 77 | html = HTML('web/', 'test_html') 78 | html.add_header('hello world') 79 | 80 | ims, txts, links = [], [], [] 81 | for n in range(4): 82 | ims.append('image_%d.png' % n) 83 | txts.append('text_%d' % n) 84 | links.append('image_%d.png' % n) 85 | html.add_images(ims, txts, links) 86 | html.save() 87 | -------------------------------------------------------------------------------- /third_part/face3d/util/nvdiffrast.py: -------------------------------------------------------------------------------- 1 | """This script is the differentiable renderer for Deep3DFaceRecon_pytorch 2 | Attention, antialiasing step is missing in current version. 3 | """ 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | import kornia 8 | from kornia.geometry.camera import pixel2cam 9 | import numpy as np 10 | from typing import List 11 | import nvdiffrast.torch as dr 12 | from scipy.io import loadmat 13 | from torch import nn 14 | 15 | def ndc_projection(x=0.1, n=1.0, f=50.0): 16 | return np.array([[n/x, 0, 0, 0], 17 | [ 0, n/-x, 0, 0], 18 | [ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)], 19 | [ 0, 0, -1, 0]]).astype(np.float32) 20 | 21 | class MeshRenderer(nn.Module): 22 | def __init__(self, 23 | rasterize_fov, 24 | znear=0.1, 25 | zfar=10, 26 | rasterize_size=224): 27 | super(MeshRenderer, self).__init__() 28 | 29 | x = np.tan(np.deg2rad(rasterize_fov * 0.5)) * znear 30 | self.ndc_proj = torch.tensor(ndc_projection(x=x, n=znear, f=zfar)).matmul( 31 | torch.diag(torch.tensor([1., -1, -1, 1]))) 32 | self.rasterize_size = rasterize_size 33 | self.glctx = None 34 | 35 | def forward(self, vertex, tri, feat=None): 36 | """ 37 | Return: 38 | mask -- torch.tensor, size (B, 1, H, W) 39 | depth -- torch.tensor, size (B, 1, H, W) 40 | features(optional) -- torch.tensor, size (B, C, H, W) if feat is not None 41 | 42 | Parameters: 43 | vertex -- torch.tensor, size (B, N, 3) 44 | tri -- torch.tensor, size (B, M, 3) or (M, 3), triangles 45 | feat(optional) -- torch.tensor, size (B, C), features 46 | """ 47 | device = vertex.device 48 | rsize = int(self.rasterize_size) 49 | ndc_proj = self.ndc_proj.to(device) 50 | # trans to homogeneous coordinates of 3d vertices, the direction of y is the same as v 51 | if vertex.shape[-1] == 3: 52 | vertex = torch.cat([vertex, torch.ones([*vertex.shape[:2], 1]).to(device)], dim=-1) 53 | vertex[..., 1] = -vertex[..., 1] 54 | 55 | 56 | vertex_ndc = vertex @ ndc_proj.t() 57 | if self.glctx is None: 58 | self.glctx = dr.RasterizeGLContext(device=device) 59 | print("create glctx on device cuda:%d"%device.index) 60 | 61 | ranges = None 62 | if isinstance(tri, List) or len(tri.shape) == 3: 63 | vum = vertex_ndc.shape[1] 64 | fnum = torch.tensor([f.shape[0] for f in tri]).unsqueeze(1).to(device) 65 | fstartidx = torch.cumsum(fnum, dim=0) - fnum 66 | ranges = torch.cat([fstartidx, fnum], axis=1).type(torch.int32).cpu() 67 | for i in range(tri.shape[0]): 68 | tri[i] = tri[i] + i*vum 69 | vertex_ndc = torch.cat(vertex_ndc, dim=0) 70 | tri = torch.cat(tri, dim=0) 71 | 72 | # for range_mode vetex: [B*N, 4], tri: [B*M, 3], for instance_mode vetex: [B, N, 4], tri: [M, 3] 73 | tri = tri.type(torch.int32).contiguous() 74 | rast_out, _ = dr.rasterize(self.glctx, vertex_ndc.contiguous(), tri, resolution=[rsize, rsize], ranges=ranges) 75 | 76 | depth, _ = dr.interpolate(vertex.reshape([-1,4])[...,2].unsqueeze(1).contiguous(), rast_out, tri) 77 | depth = depth.permute(0, 3, 1, 2) 78 | mask = (rast_out[..., 3] > 0).float().unsqueeze(1) 79 | depth = mask * depth 80 | 81 | 82 | image = None 83 | if feat is not None: 84 | image, _ = dr.interpolate(feat, rast_out, tri) 85 | image = image.permute(0, 3, 1, 2) 86 | image = mask * image 87 | 88 | return mask, depth, image 89 | 90 | -------------------------------------------------------------------------------- /third_part/face3d/util/test_mean_face.txt: -------------------------------------------------------------------------------- 1 | -5.228591537475585938e+01 2 | 2.078247070312500000e-01 3 | -5.064269638061523438e+01 4 | -1.315765380859375000e+01 5 | -4.952939224243164062e+01 6 | -2.592591094970703125e+01 7 | -4.793047332763671875e+01 8 | -3.832135772705078125e+01 9 | -4.512159729003906250e+01 10 | -5.059623336791992188e+01 11 | -3.917720794677734375e+01 12 | -6.043736648559570312e+01 13 | -2.929953765869140625e+01 14 | -6.861183166503906250e+01 15 | -1.719801330566406250e+01 16 | -7.572736358642578125e+01 17 | -1.961936950683593750e+00 18 | -7.862001037597656250e+01 19 | 1.467941284179687500e+01 20 | -7.607844543457031250e+01 21 | 2.744073486328125000e+01 22 | -6.915261840820312500e+01 23 | 3.855677795410156250e+01 24 | -5.950350570678710938e+01 25 | 4.478240966796875000e+01 26 | -4.867547225952148438e+01 27 | 4.714337158203125000e+01 28 | -3.800830078125000000e+01 29 | 4.940315246582031250e+01 30 | -2.496297454833984375e+01 31 | 5.117234802246093750e+01 32 | -1.241538238525390625e+01 33 | 5.190507507324218750e+01 34 | 8.244247436523437500e-01 35 | -4.150688934326171875e+01 36 | 2.386329650878906250e+01 37 | -3.570307159423828125e+01 38 | 3.017010498046875000e+01 39 | -2.790358734130859375e+01 40 | 3.212951660156250000e+01 41 | -1.941773223876953125e+01 42 | 3.156523132324218750e+01 43 | -1.138106536865234375e+01 44 | 2.841992187500000000e+01 45 | 5.993263244628906250e+00 46 | 2.895182800292968750e+01 47 | 1.343590545654296875e+01 48 | 3.189880371093750000e+01 49 | 2.203153991699218750e+01 50 | 3.302221679687500000e+01 51 | 2.992478942871093750e+01 52 | 3.099150085449218750e+01 53 | 3.628388977050781250e+01 54 | 2.765748596191406250e+01 55 | -1.933914184570312500e+00 56 | 1.405374145507812500e+01 57 | -2.153038024902343750e+00 58 | 5.772636413574218750e+00 59 | -2.270050048828125000e+00 60 | -2.121643066406250000e+00 61 | -2.218330383300781250e+00 62 | -1.068978118896484375e+01 63 | -1.187252044677734375e+01 64 | -1.997912597656250000e+01 65 | -6.879402160644531250e+00 66 | -2.143579864501953125e+01 67 | -1.227821350097656250e+00 68 | -2.193494415283203125e+01 69 | 4.623237609863281250e+00 70 | -2.152721405029296875e+01 71 | 9.721397399902343750e+00 72 | -1.953671264648437500e+01 73 | -3.648714447021484375e+01 74 | 9.811126708984375000e+00 75 | -3.130242919921875000e+01 76 | 1.422447967529296875e+01 77 | -2.212834930419921875e+01 78 | 1.493019866943359375e+01 79 | -1.500880432128906250e+01 80 | 1.073588562011718750e+01 81 | -2.095037078857421875e+01 82 | 9.054298400878906250e+00 83 | -3.050099182128906250e+01 84 | 8.704177856445312500e+00 85 | 1.173237609863281250e+01 86 | 1.054329681396484375e+01 87 | 1.856353759765625000e+01 88 | 1.535009765625000000e+01 89 | 2.893331909179687500e+01 90 | 1.451992797851562500e+01 91 | 3.452944946289062500e+01 92 | 1.065280151367187500e+01 93 | 2.875990295410156250e+01 94 | 8.654792785644531250e+00 95 | 1.942100524902343750e+01 96 | 9.422447204589843750e+00 97 | -2.204488372802734375e+01 98 | -3.983994293212890625e+01 99 | -1.324458312988281250e+01 100 | -3.467377471923828125e+01 101 | -6.749649047851562500e+00 102 | -3.092894744873046875e+01 103 | -9.183349609375000000e-01 104 | -3.196458435058593750e+01 105 | 4.220649719238281250e+00 106 | -3.090406036376953125e+01 107 | 1.089889526367187500e+01 108 | -3.497008514404296875e+01 109 | 1.874589538574218750e+01 110 | -4.065438079833984375e+01 111 | 1.124106597900390625e+01 112 | -4.438417816162109375e+01 113 | 5.181709289550781250e+00 114 | -4.649170684814453125e+01 115 | -1.158607482910156250e+00 116 | -4.680406951904296875e+01 117 | -7.918922424316406250e+00 118 | -4.671575164794921875e+01 119 | -1.452505493164062500e+01 120 | -4.416526031494140625e+01 121 | -2.005007171630859375e+01 122 | -3.997841644287109375e+01 123 | -1.054919433593750000e+01 124 | -3.849683380126953125e+01 125 | -1.051826477050781250e+00 126 | -3.794863128662109375e+01 127 | 6.412681579589843750e+00 128 | -3.804645538330078125e+01 129 | 1.627674865722656250e+01 130 | -4.039697265625000000e+01 131 | 6.373878479003906250e+00 132 | -4.087213897705078125e+01 133 | -8.551712036132812500e-01 134 | -4.157129669189453125e+01 135 | -1.014953613281250000e+01 136 | -4.128469085693359375e+01 137 | -------------------------------------------------------------------------------- /third_part/face_detection/README.md: -------------------------------------------------------------------------------- 1 | The code for Face Detection in this folder has been taken from the wonderful [face_alignment](https://github.com/1adrianb/face-alignment) repository. This has been modified to take batches of faces at a time. -------------------------------------------------------------------------------- /third_part/face_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = """Adrian Bulat""" 4 | __email__ = 'adrian.bulat@nottingham.ac.uk' 5 | __version__ = '1.0.1' 6 | 7 | from .api import FaceAlignment, LandmarksType, NetworkSize 8 | -------------------------------------------------------------------------------- /third_part/face_detection/api.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.model_zoo import load_url 5 | from enum import Enum 6 | import numpy as np 7 | import cv2 8 | try: 9 | import urllib.request as request_file 10 | except BaseException: 11 | import urllib as request_file 12 | 13 | from .models import FAN, ResNetDepth 14 | from .utils import * 15 | 16 | 17 | class LandmarksType(Enum): 18 | """Enum class defining the type of landmarks to detect. 19 | 20 | ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face 21 | ``_2halfD`` - this points represent the projection of the 3D points into 3D 22 | ``_3D`` - detect the points ``(x,y,z)``` in a 3D space 23 | 24 | """ 25 | _2D = 1 26 | _2halfD = 2 27 | _3D = 3 28 | 29 | 30 | class NetworkSize(Enum): 31 | # TINY = 1 32 | # SMALL = 2 33 | # MEDIUM = 3 34 | LARGE = 4 35 | 36 | def __new__(cls, value): 37 | member = object.__new__(cls) 38 | member._value_ = value 39 | return member 40 | 41 | def __int__(self): 42 | return self.value 43 | 44 | ROOT = os.path.dirname(os.path.abspath(__file__)) 45 | 46 | class FaceAlignment: 47 | def __init__(self, landmarks_type, network_size=NetworkSize.LARGE, 48 | device='cuda', flip_input=False, face_detector='sfd', verbose=False): 49 | self.device = device 50 | self.flip_input = flip_input 51 | self.landmarks_type = landmarks_type 52 | self.verbose = verbose 53 | 54 | network_size = int(network_size) 55 | 56 | if 'cuda' in device: 57 | torch.backends.cudnn.benchmark = True 58 | 59 | # Get the face detector 60 | face_detector_module = __import__('face_detection.detection.' + face_detector, 61 | globals(), locals(), [face_detector], 0) 62 | self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose) 63 | 64 | def get_detections_for_batch(self, images): 65 | images = images[..., ::-1] 66 | detected_faces = self.face_detector.detect_from_batch(images.copy()) 67 | results = [] 68 | 69 | for i, d in enumerate(detected_faces): 70 | if len(d) == 0: 71 | results.append(None) 72 | continue 73 | d = d[0] 74 | d = np.clip(d, 0, None) 75 | 76 | x1, y1, x2, y2 = map(int, d[:-1]) 77 | results.append((x1, y1, x2, y2)) 78 | 79 | return results -------------------------------------------------------------------------------- /third_part/face_detection/detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import FaceDetector -------------------------------------------------------------------------------- /third_part/face_detection/detection/sfd/__init__.py: -------------------------------------------------------------------------------- 1 | from .sfd_detector import SFDDetector as FaceDetector -------------------------------------------------------------------------------- /third_part/face_detection/detection/sfd/bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import cv2 5 | import random 6 | import datetime 7 | import time 8 | import math 9 | import argparse 10 | import numpy as np 11 | import torch 12 | 13 | try: 14 | from iou import IOU 15 | except BaseException: 16 | # IOU cython speedup 10x 17 | def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2): 18 | sa = abs((ax2 - ax1) * (ay2 - ay1)) 19 | sb = abs((bx2 - bx1) * (by2 - by1)) 20 | x1, y1 = max(ax1, bx1), max(ay1, by1) 21 | x2, y2 = min(ax2, bx2), min(ay2, by2) 22 | w = x2 - x1 23 | h = y2 - y1 24 | if w < 0 or h < 0: 25 | return 0.0 26 | else: 27 | return 1.0 * w * h / (sa + sb - w * h) 28 | 29 | 30 | def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh): 31 | xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1 32 | dx, dy = (xc - axc) / aww, (yc - ayc) / ahh 33 | dw, dh = math.log(ww / aww), math.log(hh / ahh) 34 | return dx, dy, dw, dh 35 | 36 | 37 | def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh): 38 | xc, yc = dx * aww + axc, dy * ahh + ayc 39 | ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh 40 | x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2 41 | return x1, y1, x2, y2 42 | 43 | 44 | def nms(dets, thresh): 45 | if 0 == len(dets): 46 | return [] 47 | x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] 48 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | order = scores.argsort()[::-1] 50 | 51 | keep = [] 52 | while order.size > 0: 53 | i = order[0] 54 | keep.append(i) 55 | xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]]) 56 | xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]]) 57 | 58 | w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1) 59 | ovr = w * h / (areas[i] + areas[order[1:]] - w * h) 60 | 61 | inds = np.where(ovr <= thresh)[0] 62 | order = order[inds + 1] 63 | 64 | return keep 65 | 66 | 67 | def encode(matched, priors, variances): 68 | """Encode the variances from the priorbox layers into the ground truth boxes 69 | we have matched (based on jaccard overlap) with the prior boxes. 70 | Args: 71 | matched: (tensor) Coords of ground truth for each prior in point-form 72 | Shape: [num_priors, 4]. 73 | priors: (tensor) Prior boxes in center-offset form 74 | Shape: [num_priors,4]. 75 | variances: (list[float]) Variances of priorboxes 76 | Return: 77 | encoded boxes (tensor), Shape: [num_priors, 4] 78 | """ 79 | 80 | # dist b/t match center and prior's center 81 | g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] 82 | # encode variance 83 | g_cxcy /= (variances[0] * priors[:, 2:]) 84 | # match wh / prior wh 85 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 86 | g_wh = torch.log(g_wh) / variances[1] 87 | # return target for smooth_l1_loss 88 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 89 | 90 | 91 | def decode(loc, priors, variances): 92 | """Decode locations from predictions using priors to undo 93 | the encoding we did for offset regression at train time. 94 | Args: 95 | loc (tensor): location predictions for loc layers, 96 | Shape: [num_priors,4] 97 | priors (tensor): Prior boxes in center-offset form. 98 | Shape: [num_priors,4]. 99 | variances: (list[float]) Variances of priorboxes 100 | Return: 101 | decoded bounding box predictions 102 | """ 103 | 104 | boxes = torch.cat(( 105 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 106 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 107 | boxes[:, :2] -= boxes[:, 2:] / 2 108 | boxes[:, 2:] += boxes[:, :2] 109 | return boxes 110 | 111 | def batch_decode(loc, priors, variances): 112 | """Decode locations from predictions using priors to undo 113 | the encoding we did for offset regression at train time. 114 | Args: 115 | loc (tensor): location predictions for loc layers, 116 | Shape: [num_priors,4] 117 | priors (tensor): Prior boxes in center-offset form. 118 | Shape: [num_priors,4]. 119 | variances: (list[float]) Variances of priorboxes 120 | Return: 121 | decoded bounding box predictions 122 | """ 123 | 124 | boxes = torch.cat(( 125 | priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], 126 | priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2) 127 | boxes[:, :, :2] -= boxes[:, :, 2:] / 2 128 | boxes[:, :, 2:] += boxes[:, :, :2] 129 | return boxes 130 | -------------------------------------------------------------------------------- /third_part/face_detection/detection/sfd/detect.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | import os 5 | import sys 6 | import cv2 7 | import random 8 | import datetime 9 | import math 10 | import argparse 11 | import numpy as np 12 | 13 | import scipy.io as sio 14 | import zipfile 15 | from .net_s3fd import s3fd 16 | from .bbox import * 17 | 18 | 19 | def detect(net, img, device): 20 | img = img - np.array([104, 117, 123]) 21 | img = img.transpose(2, 0, 1) 22 | img = img.reshape((1,) + img.shape) 23 | 24 | if 'cuda' in device: 25 | torch.backends.cudnn.benchmark = True 26 | 27 | img = torch.from_numpy(img).float().to(device) 28 | BB, CC, HH, WW = img.size() 29 | with torch.no_grad(): 30 | olist = net(img) 31 | 32 | bboxlist = [] 33 | for i in range(len(olist) // 2): 34 | olist[i * 2] = F.softmax(olist[i * 2], dim=1) 35 | olist = [oelem.data.cpu() for oelem in olist] 36 | for i in range(len(olist) // 2): 37 | ocls, oreg = olist[i * 2], olist[i * 2 + 1] 38 | FB, FC, FH, FW = ocls.size() # feature map size 39 | stride = 2**(i + 2) # 4,8,16,32,64,128 40 | anchor = stride * 4 41 | poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) 42 | for Iindex, hindex, windex in poss: 43 | axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride 44 | score = ocls[0, 1, hindex, windex] 45 | loc = oreg[0, :, hindex, windex].contiguous().view(1, 4) 46 | priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]) 47 | variances = [0.1, 0.2] 48 | box = decode(loc, priors, variances) 49 | x1, y1, x2, y2 = box[0] * 1.0 50 | # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1) 51 | bboxlist.append([x1, y1, x2, y2, score]) 52 | bboxlist = np.array(bboxlist) 53 | if 0 == len(bboxlist): 54 | bboxlist = np.zeros((1, 5)) 55 | 56 | return bboxlist 57 | 58 | def batch_detect(net, imgs, device): 59 | imgs = imgs - np.array([104, 117, 123]) 60 | imgs = imgs.transpose(0, 3, 1, 2) 61 | 62 | if 'cuda' in device: 63 | torch.backends.cudnn.benchmark = True 64 | 65 | imgs = torch.from_numpy(imgs).float().to(device) 66 | BB, CC, HH, WW = imgs.size() 67 | with torch.no_grad(): 68 | # print(type(net),type(imgs), device) 69 | olist = net(imgs) 70 | 71 | bboxlist = [] 72 | for i in range(len(olist) // 2): 73 | olist[i * 2] = F.softmax(olist[i * 2], dim=1) 74 | # print(olist) 75 | # import pdb; pdb.set_trace() 76 | olist = [oelem.cpu() for oelem in olist] 77 | for i in range(len(olist) // 2): 78 | ocls, oreg = olist[i * 2], olist[i * 2 + 1] 79 | FB, FC, FH, FW = ocls.size() # feature map size 80 | stride = 2**(i + 2) # 4,8,16,32,64,128 81 | anchor = stride * 4 82 | poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) 83 | for Iindex, hindex, windex in poss: 84 | axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride 85 | score = ocls[:, 1, hindex, windex] 86 | loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4) 87 | priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4) 88 | variances = [0.1, 0.2] 89 | box = batch_decode(loc, priors, variances) 90 | box = box[:, 0] * 1.0 91 | # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1) 92 | bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy()) 93 | bboxlist = np.array(bboxlist) 94 | if 0 == len(bboxlist): 95 | bboxlist = np.zeros((1, BB, 5)) 96 | 97 | return bboxlist 98 | 99 | def flip_detect(net, img, device): 100 | img = cv2.flip(img, 1) 101 | b = detect(net, img, device) 102 | 103 | bboxlist = np.zeros(b.shape) 104 | bboxlist[:, 0] = img.shape[1] - b[:, 2] 105 | bboxlist[:, 1] = b[:, 1] 106 | bboxlist[:, 2] = img.shape[1] - b[:, 0] 107 | bboxlist[:, 3] = b[:, 3] 108 | bboxlist[:, 4] = b[:, 4] 109 | return bboxlist 110 | 111 | 112 | def pts_to_bb(pts): 113 | min_x, min_y = np.min(pts, axis=0) 114 | max_x, max_y = np.max(pts, axis=0) 115 | return np.array([min_x, min_y, max_x, max_y]) 116 | -------------------------------------------------------------------------------- /third_part/face_detection/detection/sfd/sfd_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from torch.utils.model_zoo import load_url 4 | 5 | from ..core import FaceDetector 6 | 7 | from .net_s3fd import s3fd 8 | from .bbox import * 9 | from .detect import * 10 | 11 | models_urls = { 12 | 's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth', 13 | } 14 | 15 | 16 | class SFDDetector(FaceDetector): 17 | def __init__(self, device, path_to_detector='/apdcephfs/share_1290939/shadowcun/pretrained/s3fd.pth', verbose=False): 18 | super(SFDDetector, self).__init__(device, verbose) 19 | 20 | # Initialise the face detector 21 | if not os.path.isfile(path_to_detector): 22 | model_weights = load_url(models_urls['s3fd']) 23 | else: 24 | model_weights = torch.load(path_to_detector) 25 | 26 | self.face_detector = s3fd() 27 | self.face_detector.load_state_dict(model_weights) 28 | self.face_detector.to(device) 29 | self.face_detector.eval() 30 | 31 | def detect_from_image(self, tensor_or_path): 32 | image = self.tensor_or_path_to_ndarray(tensor_or_path) 33 | 34 | bboxlist = detect(self.face_detector, image, device=self.device) 35 | keep = nms(bboxlist, 0.3) 36 | bboxlist = bboxlist[keep, :] 37 | bboxlist = [x for x in bboxlist if x[-1] > 0.5] 38 | 39 | return bboxlist 40 | 41 | def detect_from_batch(self, images): 42 | bboxlists = batch_detect(self.face_detector, images, device=self.device) 43 | keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])] 44 | bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)] 45 | bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists] 46 | 47 | return bboxlists 48 | 49 | @property 50 | def reference_scale(self): 51 | return 195 52 | 53 | @property 54 | def reference_x_shift(self): 55 | return 0 56 | 57 | @property 58 | def reference_y_shift(self): 59 | return 0 60 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yuedong Chen (Donald) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/checkpoints/run_script.sh: -------------------------------------------------------------------------------- 1 | [ test][220417_224012]python main.py --mode test --data_root datasets/celebA --ckpt_dir checkpoints --load_epoch 30 2 | [ test][220419_184832]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 3 | [ test][220419_185232]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 4 | [ test][220419_185252]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 5 | [ test][220419_185305]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 6 | [ test][220419_185320]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 7 | [ test][220419_185810]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 8 | [ test][220419_190338]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 9 | [ test][220419_190445]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 10 | [ test][220419_190628]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 11 | [ test][220419_195037]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 12 | [ test][220419_200348]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 13 | [ test][220419_200512]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 14 | [ test][220419_200529]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 15 | [ test][220419_200554]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 16 | [ test][220419_200622]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 17 | [ test][220419_200641]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 18 | [ test][220419_200658]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 19 | [ test][220419_200717]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 20 | [ test][220419_200740]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 21 | [ test][220419_200807]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 22 | [ test][220419_213236]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 23 | [ test][220419_213329]python test.py --data_root . --mode test --load_epoch 30 --ckpt_dir checkpoints/ 24 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/ckpts/ganimation/220419_183211/opt.txt: -------------------------------------------------------------------------------- 1 | ------------------- [train][220419_183211]Options -------------------- 2 | aus_nc: 17 3 | aus_pkl: aus_openface.pkl 4 | batch_size: 25 5 | beta1: 0.5 6 | ckpt_dir: ./ckpts/./ganimation/220419_183211 [default: ./ckpts] 7 | data_root: . [default: None] 8 | epoch_count: 1 9 | final_size: 128 10 | gan_type: wgan-gp 11 | gpu_ids: [0] [default: 0] 12 | img_nc: 3 13 | imgs_dir: imgs 14 | init_gain: 0.02 15 | init_type: normal 16 | interpolate_len: 5 17 | lambda_aus: 160.0 18 | lambda_dis: 1.0 19 | lambda_mask: 0 20 | lambda_rec: 10.0 21 | lambda_tv: 0 22 | lambda_wgan_gp: 10.0 23 | load_epoch: 0 24 | load_size: 148 25 | log_file: logs.txt 26 | lr: 0.0001 27 | lr_decay_iters: 50 28 | lr_policy: lambda 29 | lucky_seed: 1650364331 [default: 0] 30 | max_dataset_size: inf 31 | mode: train 32 | model: ganimation 33 | n_threads: 6 34 | name: 220419_183211 35 | ndf: 64 36 | ngf: 64 37 | niter: 20 38 | niter_decay: 10 39 | no_aus_noise: False 40 | no_flip: False 41 | no_test_eval: False 42 | norm: instance 43 | opt_file: opt.txt 44 | plot_losses_freq: 20000 45 | print_losses_freq: 100 46 | resize_or_crop: none 47 | results: results 48 | sample_img_freq: 2000 49 | save_epoch_freq: 2 50 | save_test_gif: False 51 | serial_batches: False 52 | test_csv: test_ids.csv 53 | train_csv: train_ids.csv 54 | train_gen_iter: 5 55 | use_dropout: False 56 | visdom_display_id: 1 57 | visdom_env: main 58 | visdom_port: 8097 59 | --------------------- [train][220419_183211]End ---------------------- 60 | 61 | 62 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/ckpts/ganimation/220419_183211/run_script.sh: -------------------------------------------------------------------------------- 1 | [train][220419_183211]python test.py --data_root . 2 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/ckpts/ganimation/220419_183229/opt.txt: -------------------------------------------------------------------------------- 1 | ------------------- [train][220419_183229]Options -------------------- 2 | aus_nc: 17 3 | aus_pkl: aus_openface.pkl 4 | batch_size: 25 5 | beta1: 0.5 6 | ckpt_dir: ./ckpts/./ganimation/220419_183229 [default: ./ckpts] 7 | data_root: . [default: None] 8 | epoch_count: 1 9 | final_size: 128 10 | gan_type: wgan-gp 11 | gpu_ids: [0] [default: 0] 12 | img_nc: 3 13 | imgs_dir: imgs 14 | init_gain: 0.02 15 | init_type: normal 16 | interpolate_len: 5 17 | lambda_aus: 160.0 18 | lambda_dis: 1.0 19 | lambda_mask: 0 20 | lambda_rec: 10.0 21 | lambda_tv: 0 22 | lambda_wgan_gp: 10.0 23 | load_epoch: 0 24 | load_size: 148 25 | log_file: logs.txt 26 | lr: 0.0001 27 | lr_decay_iters: 50 28 | lr_policy: lambda 29 | lucky_seed: 1650364349 [default: 0] 30 | max_dataset_size: inf 31 | mode: train 32 | model: ganimation 33 | n_threads: 6 34 | name: 220419_183229 35 | ndf: 64 36 | ngf: 64 37 | niter: 20 38 | niter_decay: 10 39 | no_aus_noise: False 40 | no_flip: False 41 | no_test_eval: False 42 | norm: instance 43 | opt_file: opt.txt 44 | plot_losses_freq: 20000 45 | print_losses_freq: 100 46 | resize_or_crop: none 47 | results: results 48 | sample_img_freq: 2000 49 | save_epoch_freq: 2 50 | save_test_gif: False 51 | serial_batches: False 52 | test_csv: test_ids.csv 53 | train_csv: train_ids.csv 54 | train_gen_iter: 5 55 | use_dropout: False 56 | visdom_display_id: 1 57 | visdom_env: main 58 | visdom_port: 8097 59 | --------------------- [train][220419_183229]End ---------------------- 60 | 61 | 62 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/ckpts/ganimation/220419_183229/run_script.sh: -------------------------------------------------------------------------------- 1 | [train][220419_183229]python test.py --data_root . 2 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/ckpts/run_script.sh: -------------------------------------------------------------------------------- 1 | [ test][220419_183311]python test.py --data_root . --mode test 2 | [ test][220419_183356]python test.py --data_root . --mode test 3 | [ test][220419_183456]python test.py --data_root . --mode test 4 | [ test][220419_183528]python test.py --data_root . --mode test 5 | [ test][220419_183711]python test.py --data_root . --mode test 6 | [ test][220419_183837]python test.py --data_root . --mode test 7 | [ test][220419_184333]python test.py --data_root . --mode test 8 | [ test][220419_184442]python test.py --data_root . --mode test 9 | [ test][220419_184500]python test.py --data_root . --mode test 10 | [ test][220419_184533]python test.py --data_root . --mode test 11 | [ test][220419_184603]python test.py --data_root . --mode test 12 | [ test][220419_184714]python test.py --data_root . --mode test 13 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_loader import create_dataloader -------------------------------------------------------------------------------- /third_part/ganimation_replicate/data/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from PIL import Image 4 | import random 5 | import numpy as np 6 | import pickle 7 | import torchvision.transforms as transforms 8 | 9 | 10 | 11 | class BaseDataset(torch.utils.data.Dataset): 12 | """docstring for BaseDataset""" 13 | def __init__(self): 14 | super(BaseDataset, self).__init__() 15 | 16 | def name(self): 17 | return os.path.basename(self.opt.data_root.strip('/')) 18 | 19 | def initialize(self, opt): 20 | self.opt = opt 21 | self.imgs_dir = os.path.join(self.opt.data_root, self.opt.imgs_dir) 22 | self.is_train = self.opt.mode == "train" 23 | 24 | # load images path 25 | filename = self.opt.train_csv if self.is_train else self.opt.test_csv 26 | self.imgs_name_file = os.path.join(self.opt.data_root, filename) 27 | self.imgs_path = self.make_dataset() 28 | 29 | # load AUs dicitionary 30 | aus_pkl = os.path.join(self.opt.data_root, self.opt.aus_pkl) 31 | self.aus_dict = self.load_dict(aus_pkl) 32 | 33 | # load image to tensor transformer 34 | self.img2tensor = self.img_transformer() 35 | 36 | def make_dataset(self): 37 | return None 38 | 39 | def load_dict(self, pkl_path): 40 | saved_dict = {} 41 | with open(pkl_path, 'rb') as f: 42 | saved_dict = pickle.load(f, encoding='latin1') 43 | return saved_dict 44 | 45 | def get_img_by_path(self, img_path): 46 | assert os.path.isfile(img_path), "Cannot find image file: %s" % img_path 47 | img_type = 'L' if self.opt.img_nc == 1 else 'RGB' 48 | return Image.open(img_path).convert(img_type) 49 | 50 | def get_aus_by_path(self, img_path): 51 | return None 52 | 53 | def img_transformer(self): 54 | transform_list = [] 55 | if self.opt.resize_or_crop == 'resize_and_crop': 56 | transform_list.append(transforms.Resize([self.opt.load_size, self.opt.load_size], Image.BICUBIC)) 57 | transform_list.append(transforms.RandomCrop(self.opt.final_size)) 58 | elif self.opt.resize_or_crop == 'crop': 59 | transform_list.append(transforms.RandomCrop(self.opt.final_size)) 60 | elif self.opt.resize_or_crop == 'none': 61 | transform_list.append(transforms.Lambda(lambda image: image)) 62 | else: 63 | raise ValueError("--resize_or_crop %s is not a valid option." % self.opt.resize_or_crop) 64 | 65 | if self.is_train and not self.opt.no_flip: 66 | transform_list.append(transforms.RandomHorizontalFlip()) 67 | 68 | transform_list.append(transforms.ToTensor()) 69 | transform_list.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) 70 | 71 | img2tensor = transforms.Compose(transform_list) 72 | 73 | return img2tensor 74 | 75 | def __len__(self): 76 | return len(self.imgs_path) 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/data/celeba.py: -------------------------------------------------------------------------------- 1 | from .base_dataset import BaseDataset 2 | import os 3 | import random 4 | import numpy as np 5 | 6 | 7 | class CelebADataset(BaseDataset): 8 | """docstring for CelebADataset""" 9 | def __init__(self): 10 | super(CelebADataset, self).__init__() 11 | 12 | def initialize(self, opt): 13 | super(CelebADataset, self).initialize(opt) 14 | 15 | def get_aus_by_path(self, img_path): 16 | assert os.path.isfile(img_path), "Cannot find image file: %s" % img_path 17 | img_id = str(os.path.splitext(os.path.basename(img_path))[0]) 18 | return self.aus_dict[img_id] / 5.0 # norm to [0, 1] 19 | 20 | def make_dataset(self): 21 | # return all image full path in a list 22 | imgs_path = [] 23 | assert os.path.isfile(self.imgs_name_file), "%s does not exist." % self.imgs_name_file 24 | with open(self.imgs_name_file, 'r') as f: 25 | lines = f.readlines() 26 | imgs_path = [os.path.join(self.imgs_dir, line.strip()) for line in lines] 27 | imgs_path = sorted(imgs_path) 28 | return imgs_path 29 | 30 | def __getitem__(self, index): 31 | img_path = self.imgs_path[index] 32 | 33 | # load source image 34 | src_img = self.get_img_by_path(img_path) 35 | src_img_tensor = self.img2tensor(src_img) 36 | src_aus = self.get_aus_by_path(img_path) 37 | 38 | # load target image 39 | tar_img_path = random.choice(self.imgs_path) 40 | tar_img = self.get_img_by_path(tar_img_path) 41 | tar_img_tensor = self.img2tensor(tar_img) 42 | tar_aus = self.get_aus_by_path(tar_img_path) 43 | if self.is_train and not self.opt.no_aus_noise: 44 | tar_aus = tar_aus + np.random.uniform(-0.1, 0.1, tar_aus.shape) 45 | 46 | # record paths for debug and test usage 47 | data_dict = {'src_img':src_img_tensor, 'src_aus':src_aus, 'tar_img':tar_img_tensor, 'tar_aus':tar_aus, \ 48 | 'src_path':img_path, 'tar_path':tar_img_path} 49 | 50 | return data_dict 51 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/data/data_loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from PIL import Image 4 | import random 5 | import numpy as np 6 | import pickle 7 | import torchvision.transforms as transforms 8 | 9 | from .celeba import CelebADataset 10 | 11 | 12 | def create_dataloader(opt): 13 | data_loader = DataLoader() 14 | data_loader.initialize(opt) 15 | return data_loader 16 | 17 | 18 | class DataLoader: 19 | def name(self): 20 | return self.dataset.name() + "_Loader" 21 | 22 | def create_datase(self): 23 | # specify which dataset to load here 24 | loaded_dataset = os.path.basename(self.opt.data_root.strip('/')).lower() 25 | if 'celeba' in loaded_dataset or 'emotion' in loaded_dataset: 26 | dataset = CelebADataset() 27 | else: 28 | dataset = BaseDataset() 29 | dataset.initialize(self.opt) 30 | return dataset 31 | 32 | def initialize(self, opt): 33 | self.opt = opt 34 | self.dataset = self.create_datase() 35 | self.dataloader = torch.utils.data.DataLoader( 36 | self.dataset, 37 | batch_size=opt.batch_size, 38 | shuffle=not opt.serial_batches, 39 | num_workers=int(opt.n_threads) 40 | ) 41 | 42 | def __len__(self): 43 | return min(len(self.dataset), self.opt.max_dataset_size) 44 | 45 | def __iter__(self): 46 | for i, data in enumerate(self.dataloader): 47 | if i * self.opt.batch_size >= self.opt.max_dataset_size: 48 | break 49 | yield data 50 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Dec 13, 2018 3 | @author: Yuedong Chen 4 | """ 5 | 6 | from options import Options 7 | from solvers import create_solver 8 | 9 | 10 | 11 | 12 | if __name__ == '__main__': 13 | opt = Options().parse() 14 | 15 | solver = create_solver(opt) 16 | solver.run_solver() 17 | 18 | print('[THE END]') -------------------------------------------------------------------------------- /third_part/ganimation_replicate/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_model import BaseModel 2 | from .ganimation import GANimationModel 3 | from .stargan import StarGANModel 4 | 5 | 6 | 7 | def create_model(opt): 8 | # specify model name here 9 | if opt.model == "ganimation": 10 | instance = GANimationModel() 11 | elif opt.model == "stargan": 12 | instance = StarGANModel() 13 | else: 14 | instance = BaseModel() 15 | instance.initialize(opt) 16 | instance.setup() 17 | return instance 18 | 19 | -------------------------------------------------------------------------------- /third_part/ganimation_replicate/visualizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import math 5 | from PIL import Image 6 | # import matplotlib.pyplot as plt 7 | 8 | 9 | 10 | class Visualizer(object): 11 | """docstring for Visualizer""" 12 | def __init__(self): 13 | super(Visualizer, self).__init__() 14 | 15 | def initialize(self, opt): 16 | self.opt = opt 17 | # self.vis_saved_dir = os.path.join(self.opt.ckpt_dir, 'vis_pics') 18 | # if not os.path.isdir(self.vis_saved_dir): 19 | # os.makedirs(self.vis_saved_dir) 20 | # plt.switch_backend('agg') 21 | 22 | self.display_id = self.opt.visdom_display_id 23 | if self.display_id > 0: 24 | import visdom 25 | self.ncols = 8 26 | self.vis = visdom.Visdom(server="http://localhost", port=self.opt.visdom_port, env=self.opt.visdom_env) 27 | 28 | def throw_visdom_connection_error(self): 29 | print('\n\nno visdom server.') 30 | exit(1) 31 | 32 | def print_losses_info(self, info_dict): 33 | msg = '[{}][Epoch: {:0>3}/{:0>3}; Images: {:0>4}/{:0>4}; Time: {:.3f}s/Batch({}); LR: {:.7f}] '.format( 34 | self.opt.name, info_dict['epoch'], info_dict['epoch_len'], 35 | info_dict['epoch_steps'], info_dict['epoch_steps_len'], 36 | info_dict['step_time'], self.opt.batch_size, info_dict['cur_lr']) 37 | for k, v in info_dict['losses'].items(): 38 | msg += '| {}: {:.4f} '.format(k, v) 39 | msg += '|' 40 | print(msg) 41 | with open(info_dict['log_path'], 'a+') as f: 42 | f.write(msg + '\n') 43 | 44 | def display_current_losses(self, epoch, counter_ratio, losses_dict): 45 | if not hasattr(self, 'plot_data'): 46 | self.plot_data = {'X': [], 'Y': [], 'legend': list(losses_dict.keys())} 47 | self.plot_data['X'].append(epoch + counter_ratio) 48 | self.plot_data['Y'].append([losses_dict[k] for k in self.plot_data['legend']]) 49 | try: 50 | self.vis.line( 51 | X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1), 52 | Y=np.array(self.plot_data['Y']), 53 | opts={ 54 | 'title': self.opt.name + ' loss over time', 55 | 'legend':self.plot_data['legend'], 56 | 'xlabel':'epoch', 57 | 'ylabel':'loss'}, 58 | win=self.display_id) 59 | except ConnectionError: 60 | self.throw_visdom_connection_error() 61 | 62 | def display_online_results(self, visuals, epoch): 63 | win_id = self.display_id + 24 64 | images = [] 65 | labels = [] 66 | for label, image in visuals.items(): 67 | if 'mask' in label: # or 'focus' in label: 68 | image = (image - 0.5) / 0.5 # convert map from [0, 1] to [-1, 1] 69 | image_numpy = self.tensor2im(image) 70 | images.append(image_numpy.transpose([2, 0, 1])) 71 | labels.append(label) 72 | try: 73 | title = ' || '.join(labels) 74 | self.vis.images(images, nrow=self.ncols, win=win_id, 75 | padding=5, opts=dict(title=title)) 76 | except ConnectionError: 77 | self.throw_visdom_connection_error() 78 | 79 | # utils 80 | def tensor2im(self, input_image, imtype=np.uint8): 81 | if isinstance(input_image, torch.Tensor): 82 | image_tensor = input_image.data 83 | else: 84 | return input_image 85 | image_numpy = image_tensor[0].cpu().float().numpy() 86 | im = self.numpy2im(image_numpy, imtype).resize((80, 80), Image.ANTIALIAS) 87 | return np.array(im) 88 | 89 | def numpy2im(self, image_numpy, imtype=np.uint8): 90 | if image_numpy.shape[0] == 1: 91 | image_numpy = np.tile(image_numpy, (3, 1, 1)) 92 | # input should be [0, 1] 93 | #image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0 94 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) / 2. + 0.5) * 255.0 95 | # print(image_numpy.shape) 96 | image_numpy = image_numpy.astype(imtype) 97 | im = Image.fromarray(image_numpy) 98 | # im = Image.fromarray(image_numpy).resize((64, 64), Image.ANTIALIAS) 99 | return im # np.array(im) 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /utils/flow_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def convert_flow_to_deformation(flow): 4 | r"""convert flow fields to deformations. 5 | 6 | Args: 7 | flow (tensor): Flow field obtained by the model 8 | Returns: 9 | deformation (tensor): The deformation used for warping 10 | """ 11 | b,c,h,w = flow.shape 12 | flow_norm = 2 * torch.cat([flow[:,:1,...]/(w-1),flow[:,1:,...]/(h-1)], 1) 13 | grid = make_coordinate_grid(flow) 14 | deformation = grid + flow_norm.permute(0,2,3,1) 15 | return deformation 16 | 17 | def make_coordinate_grid(flow): 18 | r"""obtain coordinate grid with the same size as the flow filed. 19 | 20 | Args: 21 | flow (tensor): Flow field obtained by the model 22 | Returns: 23 | grid (tensor): The grid with the same size as the input flow 24 | """ 25 | b,c,h,w = flow.shape 26 | 27 | x = torch.arange(w).to(flow) 28 | y = torch.arange(h).to(flow) 29 | 30 | x = (2 * (x / (w - 1)) - 1) 31 | y = (2 * (y / (h - 1)) - 1) 32 | 33 | yy = y.view(-1, 1).repeat(1, w) 34 | xx = x.view(1, -1).repeat(h, 1) 35 | 36 | meshed = torch.cat([xx.unsqueeze_(2), yy.unsqueeze_(2)], 2) 37 | meshed = meshed.expand(b, -1, -1, -1) 38 | return meshed 39 | 40 | 41 | def warp_image(source_image, deformation): 42 | r"""warp the input image according to the deformation 43 | 44 | Args: 45 | source_image (tensor): source images to be warped 46 | deformation (tensor): deformations used to warp the images; value in range (-1, 1) 47 | Returns: 48 | output (tensor): the warped images 49 | """ 50 | _, h_old, w_old, _ = deformation.shape 51 | _, _, h, w = source_image.shape 52 | if h_old != h or w_old != w: 53 | deformation = deformation.permute(0, 3, 1, 2) 54 | deformation = torch.nn.functional.interpolate(deformation, size=(h, w), mode='bilinear') 55 | deformation = deformation.permute(0, 2, 3, 1) 56 | return torch.nn.functional.grid_sample(source_image, deformation) 57 | --------------------------------------------------------------------------------