├── LICENSE
├── README.md
├── __pycache__
    ├── audio.cpython-37.pyc
    └── hparams.cpython-37.pyc
├── audio.py
├── basicsr
    ├── README.md
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── apply_sr.cpython-37.pyc
    │   ├── test.cpython-37.pyc
    │   └── train.cpython-37.pyc
    ├── apply_sr.py
    ├── archs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── arch_util.cpython-37.pyc
    │   │   ├── dfdnet_arch.cpython-37.pyc
    │   │   ├── dfdnet_util.cpython-37.pyc
    │   │   ├── discriminator_arch.cpython-37.pyc
    │   │   ├── duf_arch.cpython-37.pyc
    │   │   ├── edsr_arch.cpython-37.pyc
    │   │   ├── edvr_arch.cpython-37.pyc
    │   │   ├── rcan_arch.cpython-37.pyc
    │   │   ├── ridnet_arch.cpython-37.pyc
    │   │   ├── rrdbnet_arch.cpython-37.pyc
    │   │   ├── spynet_arch.cpython-37.pyc
    │   │   ├── srresnet_arch.cpython-37.pyc
    │   │   ├── stylegan2_arch.cpython-37.pyc
    │   │   ├── tof_arch.cpython-37.pyc
    │   │   └── vgg_arch.cpython-37.pyc
    │   ├── arch_util.py
    │   ├── dfdnet_arch.py
    │   ├── dfdnet_util.py
    │   ├── discriminator_arch.py
    │   ├── duf_arch.py
    │   ├── edsr_arch.py
    │   ├── edvr_arch.py
    │   ├── inception.py
    │   ├── rcan_arch.py
    │   ├── ridnet_arch.py
    │   ├── rrdbnet_arch.py
    │   ├── spynet_arch.py
    │   ├── srresnet_arch.py
    │   ├── stylegan2_arch.py
    │   ├── tof_arch.py
    │   └── vgg_arch.py
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── data_sampler.cpython-37.pyc
    │   │   ├── data_util.cpython-37.pyc
    │   │   ├── ffhq_dataset.cpython-37.pyc
    │   │   ├── paired_image_dataset.cpython-37.pyc
    │   │   ├── prefetch_dataloader.cpython-37.pyc
    │   │   ├── reds_dataset.cpython-37.pyc
    │   │   ├── single_image_dataset.cpython-37.pyc
    │   │   ├── transforms.cpython-37.pyc
    │   │   ├── video_test_dataset.cpython-37.pyc
    │   │   └── vimeo90k_dataset.cpython-37.pyc
    │   ├── data_sampler.py
    │   ├── data_util.py
    │   ├── degradations.py
    │   ├── ffhq_dataset.py
    │   ├── meta_info
    │   │   ├── meta_info_DIV2K800sub_GT.txt
    │   │   ├── meta_info_REDS4_test_GT.txt
    │   │   ├── meta_info_REDS_GT.txt
    │   │   ├── meta_info_REDSofficial4_test_GT.txt
    │   │   ├── meta_info_REDSval_official_test_GT.txt
    │   │   ├── meta_info_Vimeo90K_test_GT.txt
    │   │   ├── meta_info_Vimeo90K_test_fast_GT.txt
    │   │   ├── meta_info_Vimeo90K_test_medium_GT.txt
    │   │   ├── meta_info_Vimeo90K_test_slow_GT.txt
    │   │   └── meta_info_Vimeo90K_train_GT.txt
    │   ├── paired_image_dataset.py
    │   ├── prefetch_dataloader.py
    │   ├── reds_dataset.py
    │   ├── single_image_dataset.py
    │   ├── transforms.py
    │   ├── video_test_dataset.py
    │   └── vimeo90k_dataset.py
    ├── losses
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── loss_util.cpython-37.pyc
    │   │   └── losses.cpython-37.pyc
    │   ├── loss_util.py
    │   └── losses.py
    ├── metrics
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── metric_util.cpython-37.pyc
    │   │   ├── niqe.cpython-37.pyc
    │   │   └── psnr_ssim.cpython-37.pyc
    │   ├── fid.py
    │   ├── metric_util.py
    │   ├── niqe.py
    │   ├── niqe_pris_params.npz
    │   └── psnr_ssim.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── base_model.cpython-37.pyc
    │   │   ├── edvr_model.cpython-37.pyc
    │   │   ├── esrgan_model.cpython-37.pyc
    │   │   ├── lr_scheduler.cpython-37.pyc
    │   │   ├── sr_model.cpython-37.pyc
    │   │   ├── srgan_model.cpython-37.pyc
    │   │   ├── stylegan2_model.cpython-37.pyc
    │   │   ├── video_base_model.cpython-37.pyc
    │   │   └── video_gan_model.cpython-37.pyc
    │   ├── base_model.py
    │   ├── edvr_model.py
    │   ├── esrgan_model.py
    │   ├── lr_scheduler.py
    │   ├── sr_model.py
    │   ├── srgan_model.py
    │   ├── stylegan2_model.py
    │   ├── video_base_model.py
    │   └── video_gan_model.py
    ├── ops
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-37.pyc
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── deform_conv.cpython-37.pyc
    │   │   ├── deform_conv.py
    │   │   └── src
    │   │   │   ├── deform_conv_cuda.cpp
    │   │   │   ├── deform_conv_cuda_kernel.cu
    │   │   │   └── deform_conv_ext.cpp
    │   ├── fused_act
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── fused_act.cpython-37.pyc
    │   │   ├── fused_act.py
    │   │   └── src
    │   │   │   ├── fused_bias_act.cpp
    │   │   │   └── fused_bias_act_kernel.cu
    │   └── upfirdn2d
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-37.pyc
    │   │       └── upfirdn2d.cpython-37.pyc
    │   │   ├── src
    │   │       ├── upfirdn2d.cpp
    │   │       └── upfirdn2d_kernel.cu
    │   │   └── upfirdn2d.py
    ├── test.py
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-37.pyc
    │       ├── dist_util.cpython-37.pyc
    │       ├── file_client.cpython-37.pyc
    │       ├── flow_util.cpython-37.pyc
    │       ├── img_util.cpython-37.pyc
    │       ├── logger.cpython-37.pyc
    │       ├── matlab_functions.cpython-37.pyc
    │       ├── misc.cpython-37.pyc
    │       ├── options.cpython-37.pyc
    │       └── registry.cpython-37.pyc
    │   ├── dist_util.py
    │   ├── download_util.py
    │   ├── face_util.py
    │   ├── file_client.py
    │   ├── flow_util.py
    │   ├── img_util.py
    │   ├── lmdb_util.py
    │   ├── logger.py
    │   ├── matlab_functions.py
    │   ├── misc.py
    │   ├── options.py
    │   └── registry.py
├── checkpoints
    └── readme.md
├── download_models.py
├── examples
    ├── 1_hd.jpg
    ├── 1_low.jpg
    ├── kennedy_hd.jpg
    ├── kennedy_hd.mkv
    ├── kennedy_low.jpg
    ├── kennedy_low.mp4
    ├── mona_hd.jpg
    ├── mona_hd.mkv
    ├── mona_low.jpg
    └── mona_low.mp4
├── experiments
    └── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb
    │   └── models
    │       └── readme.md
├── face_detection
    ├── README.md
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── api.cpython-37.pyc
    │   ├── models.cpython-37.pyc
    │   └── utils.cpython-37.pyc
    ├── api.py
    ├── detection
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   └── core.cpython-37.pyc
    │   ├── core.py
    │   └── sfd
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-37.pyc
    │   │       ├── bbox.cpython-37.pyc
    │   │       ├── detect.cpython-37.pyc
    │   │       ├── net_s3fd.cpython-37.pyc
    │   │       └── sfd_detector.cpython-37.pyc
    │   │   ├── bbox.py
    │   │   ├── detect.py
    │   │   ├── net_s3fd.py
    │   │   ├── readme.md
    │   │   └── sfd_detector.py
    ├── models.py
    └── utils.py
├── face_parsing
    ├── README.md
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   ├── model.cpython-37.pyc
    │   ├── resnet.cpython-37.pyc
    │   └── swap.cpython-37.pyc
    ├── model.py
    ├── resnet.py
    └── swap.py
├── hparams.py
├── inference.py
├── input_audios
    └── ai.wav
├── input_videos
    ├── README.md
    ├── kennedy.mp4
    └── mona.mp4
├── output_videos_hd
    ├── kennedy.mkv
    └── mona.mkv
├── output_videos_wav2lip
    ├── kennedy.mp4
    └── mona.mp4
├── requirements.txt
├── resizeframes.py
├── results
    └── README.md
├── run_final.sh
├── tb_logger
    ├── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb
    │   ├── events.out.tfevents.1680350184.user1-Alienware-Aurora-R9.591624.0
    │   ├── events.out.tfevents.1680351829.user1-Alienware-Aurora-R9.593185.0
    │   └── events.out.tfevents.1680351874.user1-Alienware-Aurora-R9.593246.0
    ├── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_052824
    │   ├── events.out.tfevents.1680319497.user1-Alienware-Aurora-R9.580638.0
    │   └── events.out.tfevents.1680319611.user1-Alienware-Aurora-R9.580758.0
    ├── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250
    │   ├── events.out.tfevents.1680319705.user1-Alienware-Aurora-R9.580840.0
    │   ├── events.out.tfevents.1680319744.user1-Alienware-Aurora-R9.580886.0
    │   ├── events.out.tfevents.1680319774.user1-Alienware-Aurora-R9.580937.0
    │   ├── events.out.tfevents.1680319845.user1-Alienware-Aurora-R9.581036.0
    │   ├── events.out.tfevents.1680319869.user1-Alienware-Aurora-R9.581085.0
    │   ├── events.out.tfevents.1680319883.user1-Alienware-Aurora-R9.581137.0
    │   ├── events.out.tfevents.1680349247.user1-Alienware-Aurora-R9.590276.0
    │   ├── events.out.tfevents.1680349479.user1-Alienware-Aurora-R9.590615.0
    │   ├── events.out.tfevents.1680349575.user1-Alienware-Aurora-R9.590713.0
    │   ├── events.out.tfevents.1680349630.user1-Alienware-Aurora-R9.590788.0
    │   ├── events.out.tfevents.1680349693.user1-Alienware-Aurora-R9.590885.0
    │   └── events.out.tfevents.1680349735.user1-Alienware-Aurora-R9.590947.0
    ├── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135406
    │   └── events.out.tfevents.1680349971.user1-Alienware-Aurora-R9.591190.0
    ├── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135512
    │   └── events.out.tfevents.1680350047.user1-Alienware-Aurora-R9.591301.0
    └── 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135623
    │   └── events.out.tfevents.1680350113.user1-Alienware-Aurora-R9.591460.0
├── temp
    ├── README.md
    ├── result.avi
    └── temp.wav
├── train.py
├── train_basicsr.yml
├── video2frames.py
└── wav2lip_models
    ├── README.md
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-37.pyc
        ├── conv.cpython-37.pyc
        ├── syncnet.cpython-37.pyc
        └── wav2lip.cpython-37.pyc
    ├── conv.py
    ├── syncnet.py
    └── wav2lip.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Saif Hassan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | NOTE: PLEASE READ LICENSE REQUIREMENTS (COPYRIGHTS INFORMATION) FROM `WAV2LIP OFFICIAL REPO`, MENTIONED ON MAIN PAGE OF THIS REPOSITORY.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Wav2Lip-HD: Improving Wav2Lip to achieve High-Fidelity Videos
  2 | 
  3 | This repository contains code for achieving high-fidelity lip-syncing in videos, using the [Wav2Lip algorithm](https://github.com/Rudrabha/Wav2Lip) for lip-syncing and the [Real-ESRGAN algorithm](https://github.com/xinntao/Real-ESRGAN) for super-resolution. The combination of these two algorithms allows for the creation of lip-synced videos that are both highly accurate and visually stunning.
  4 | 
  5 | ## Algorithm
  6 | 
  7 | The algorithm for achieving high-fidelity lip-syncing with Wav2Lip and Real-ESRGAN can be summarized as follows:
  8 | 
  9 | 1. The input video and audio are given to `Wav2Lip` algorithm.
 10 | 2. Python script is written to extract frames from the video generated by wav2lip.
 11 | 3. Frames are provided to Real-ESRGAN algorithm to improve quality.
 12 | 4. Then, the high-quality frames are converted to video using ffmpeg, along with the original audio.
 13 | 5. The result is a high-quality lip-syncing video.
 14 | 6. The specific steps for running this algorithm are described in the [Testing Model](https://github.com/saifhassan/Wav2Lip-HD#testing-model) section of this README.
 15 | 
 16 | ## Testing Model
 17 | 
 18 | To test the "Wav2Lip-HD" model, follow these steps:
 19 | 
 20 | 1. Clone this repository and install requirements using following command (Make sure, Python and CUDA are already installed):
 21 | 
 22 |     ```
 23 |     git clone https://github.com/saifhassan/Wav2Lip-HD.git
 24 |     cd Wav2Lip-HD
 25 |     pip install -r requirements.txt
 26 |     ```
 27 |     
 28 | 2. Downloading weights
 29 | 
 30 | | Model        | Directory           | Download Link  |
 31 | | :------------- |:-------------| :-----:|
 32 | | Wav2Lip           | [checkpoints/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/checkpoints)   | [Link](https://drive.google.com/drive/folders/1tB_uz-TYMePRMZzrDMdShWUZZ0JK3SIZ?usp=sharing) |
 33 | | ESRGAN            | [experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models) | [Link](https://drive.google.com/file/d/1Al8lEpnx2K-kDX7zL2DBcAuDnSKXACPb/view?usp=sharing) |
 34 | | Face_Detection    | [face_detection/detection/sfd/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/face_detection/detection/sfd) | [Link](https://drive.google.com/file/d/1uNLYCPFFmO-og3WSHyFytJQLLYOwH5uY/view?usp=sharing) |
 35 | | Real-ESRGAN       | Real-ESRGAN/gfpgan/weights/   | [Link](https://drive.google.com/drive/folders/1BLx6aMpHgFt41fJ27_cRmT8bt53kVAYG?usp=sharing) |
 36 | | Real-ESRGAN       | Real-ESRGAN/weights/          | [Link](https://drive.google.com/file/d/1qNIf8cJl_dQo3ivelPJVWFkApyEAGnLi/view?usp=sharing) |
 37 | 
 38 | 
 39 | 3. Put input video to `input_videos` directory and input audio to `input_audios` directory.
 40 | 4. Open `run_final.sh` file and modify following parameters:
 41 |  
 42 |      `filename=kennedy` (just video file name without extension)
 43 |      
 44 |      `input_audio=input_audios/ai.wav` (audio filename with extension)
 45 | 
 46 | 5. Execute `run_final.sh` using following command:
 47 | 
 48 |     ```
 49 |     bash run_final.sh
 50 |     ```
 51 |     
 52 | 6. Outputs
 53 | 
 54 | - `output_videos_wav2lip` directory contains video output generated by wav2lip algorithm.
 55 | - `frames_wav2lip` directory contains frames extracted from video (generated by wav2lip algorithm).
 56 | - `frames_hd` directory contains frames after performing super-resolution using Real-ESRGAN algorithm.
 57 | - `output_videos_hd` directory contains final high quality video output generated by Wav2Lip-HD.
 58 | 
 59 | 
 60 | ## Results
 61 | The results produced by Wav2Lip-HD are in two forms, one is frames and other is videos. Both are shared below:
 62 | 
 63 | ### Example output frames </summary>
 64 | <table>
 65 |   <tr>
 66 |     <td>Frame by Wav2Lip</td>
 67 |      <td>Optimized Frame</td>
 68 |   </tr>
 69 |   <tr>
 70 |     <td><img src="examples/1_low.jpg" width=500></td>
 71 |     <td><img src="examples/1_hd.jpg" width=500></td>
 72 |   </tr>
 73 |     <tr>
 74 |     <td><img src="examples/kennedy_low.jpg" width=500></td>
 75 |     <td><img src="examples/kennedy_hd.jpg" width=500></td>
 76 |   </tr>
 77 | 
 78 |   </tr>
 79 |     <tr>
 80 |     <td><img src="examples/mona_low.jpg" width=500></td>
 81 |     <td><img src="examples/mona_hd.jpg" width=500></td>
 82 |   </tr>
 83 |  </table>
 84 |  </Details>
 85 | 
 86 |  ### Example output videos
 87 |     
 88 |  | Video by Wav2Lip  | Optimized Video |
 89 | | ------------- | ------------- |
 90 | | <video src="https://user-images.githubusercontent.com/11873763/229389410-56d96244-8c67-4add-a43e-a4900aa9db88.mp4" width="500">  | <video src="https://user-images.githubusercontent.com/11873763/229389414-d5cb6d33-7772-47a7-b829-9e3d5c3945a1.mp4" width="500">|
 91 | | <video src="https://user-images.githubusercontent.com/11873763/229389751-507669f1-7772-4863-ab23-8df7f206a065.mp4" width="500">  | <video src="https://user-images.githubusercontent.com/11873763/229389962-5373b765-ce3a-4af2-bd6a-8be8543ee933.mp4" width="500">|
 92 | 
 93 | ## Acknowledgements
 94 | 
 95 | We would like to thank the following repositories and libraries for their contributions to our work:
 96 | 
 97 | 1. The [Wav2Lip](https://github.com/Rudrabha/Wav2Lip) repository, which is the core model of our algorithm that performs lip-sync.
 98 | 2. The [face-parsing.PyTorch](https://github.com/zllrunning/face-parsing.PyTorch) repository, which provides us with a model for face segmentation.
 99 | 3. The [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) repository, which provides the super resolution component for our algorithm.
100 | 4. [ffmpeg](https://ffmpeg.org), which we use for converting frames to video.
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 |     
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/__pycache__/audio.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/__pycache__/audio.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/hparams.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/__pycache__/hparams.cpython-37.pyc


--------------------------------------------------------------------------------
/audio.py:
--------------------------------------------------------------------------------
  1 | import librosa
  2 | import librosa.filters
  3 | import numpy as np
  4 | # import tensorflow as tf
  5 | from scipy import signal
  6 | from scipy.io import wavfile
  7 | from hparams import hparams as hp
  8 | 
  9 | def load_wav(path, sr):
 10 |     return librosa.core.load(path, sr=sr)[0]
 11 | 
 12 | def save_wav(wav, path, sr):
 13 |     wav *= 32767 / max(0.01, np.max(np.abs(wav)))
 14 |     #proposed by @dsmiller
 15 |     wavfile.write(path, sr, wav.astype(np.int16))
 16 | 
 17 | def save_wavenet_wav(wav, path, sr):
 18 |     librosa.output.write_wav(path, wav, sr=sr)
 19 | 
 20 | def preemphasis(wav, k, preemphasize=True):
 21 |     if preemphasize:
 22 |         return signal.lfilter([1, -k], [1], wav)
 23 |     return wav
 24 | 
 25 | def inv_preemphasis(wav, k, inv_preemphasize=True):
 26 |     if inv_preemphasize:
 27 |         return signal.lfilter([1], [1, -k], wav)
 28 |     return wav
 29 | 
 30 | def get_hop_size():
 31 |     hop_size = hp.hop_size
 32 |     if hop_size is None:
 33 |         assert hp.frame_shift_ms is not None
 34 |         hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
 35 |     return hop_size
 36 | 
 37 | def linearspectrogram(wav):
 38 |     D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
 39 |     S = _amp_to_db(np.abs(D)) - hp.ref_level_db
 40 |     
 41 |     if hp.signal_normalization:
 42 |         return _normalize(S)
 43 |     return S
 44 | 
 45 | def melspectrogram(wav):
 46 |     D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
 47 |     S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
 48 |     
 49 |     if hp.signal_normalization:
 50 |         return _normalize(S)
 51 |     return S
 52 | 
 53 | def _lws_processor():
 54 |     import lws
 55 |     return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
 56 | 
 57 | def _stft(y):
 58 |     if hp.use_lws:
 59 |         return _lws_processor(hp).stft(y).T
 60 |     else:
 61 |         return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
 62 | 
 63 | ##########################################################
 64 | #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
 65 | def num_frames(length, fsize, fshift):
 66 |     """Compute number of time frames of spectrogram
 67 |     """
 68 |     pad = (fsize - fshift)
 69 |     if length % fshift == 0:
 70 |         M = (length + pad * 2 - fsize) // fshift + 1
 71 |     else:
 72 |         M = (length + pad * 2 - fsize) // fshift + 2
 73 |     return M
 74 | 
 75 | 
 76 | def pad_lr(x, fsize, fshift):
 77 |     """Compute left and right padding
 78 |     """
 79 |     M = num_frames(len(x), fsize, fshift)
 80 |     pad = (fsize - fshift)
 81 |     T = len(x) + 2 * pad
 82 |     r = (M - 1) * fshift + fsize - T
 83 |     return pad, pad + r
 84 | ##########################################################
 85 | #Librosa correct padding
 86 | def librosa_pad_lr(x, fsize, fshift):
 87 |     return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
 88 | 
 89 | # Conversions
 90 | _mel_basis = None
 91 | 
 92 | def _linear_to_mel(spectogram):
 93 |     global _mel_basis
 94 |     if _mel_basis is None:
 95 |         _mel_basis = _build_mel_basis()
 96 |     return np.dot(_mel_basis, spectogram)
 97 | 
 98 | def _build_mel_basis():
 99 |     assert hp.fmax <= hp.sample_rate // 2
100 |     return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
101 |                                fmin=hp.fmin, fmax=hp.fmax)
102 | 
103 | def _amp_to_db(x):
104 |     min_level = np.exp(hp.min_level_db / 20 * np.log(10))
105 |     return 20 * np.log10(np.maximum(min_level, x))
106 | 
107 | def _db_to_amp(x):
108 |     return np.power(10.0, (x) * 0.05)
109 | 
110 | def _normalize(S):
111 |     if hp.allow_clipping_in_normalization:
112 |         if hp.symmetric_mels:
113 |             return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
114 |                            -hp.max_abs_value, hp.max_abs_value)
115 |         else:
116 |             return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
117 |     
118 |     assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
119 |     if hp.symmetric_mels:
120 |         return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
121 |     else:
122 |         return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
123 | 
124 | def _denormalize(D):
125 |     if hp.allow_clipping_in_normalization:
126 |         if hp.symmetric_mels:
127 |             return (((np.clip(D, -hp.max_abs_value,
128 |                               hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
129 |                     + hp.min_level_db)
130 |         else:
131 |             return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
132 |     
133 |     if hp.symmetric_mels:
134 |         return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
135 |     else:
136 |         return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
137 | 


--------------------------------------------------------------------------------
/basicsr/README.md:
--------------------------------------------------------------------------------
1 | This folder is almost unmodified code from [BasicSR](https://github.com/xinntao/BasicSR.git) repository that provides solutions for image and video super resolution.


--------------------------------------------------------------------------------
/basicsr/__init__.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/xinntao/BasicSR
 2 | # flake8: noqa
 3 | from .archs import *
 4 | from .data import *
 5 | from .losses import *
 6 | from .metrics import *
 7 | from .models import *
 8 | from .ops import *
 9 | from .test import *
10 | from .train import *
11 | from .utils import *
12 | # from .version import __gitsha__, __version__
13 | 


--------------------------------------------------------------------------------
/basicsr/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/__pycache__/apply_sr.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/__pycache__/apply_sr.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/__pycache__/test.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/__pycache__/test.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/__pycache__/train.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/__pycache__/train.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/apply_sr.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | from basicsr.archs.rrdbnet_arch import RRDBNet
 6 | 
 7 | 
 8 | def init_sr_model(model_path):
 9 |     model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32)
10 |     model.load_state_dict(torch.load(model_path)['params'], strict=True)
11 |     model.eval()
12 |     model = model.cuda()
13 |     return model
14 | 
15 | 
16 | def enhance(model, image):
17 |     img = image.astype(np.float32) / 255.
18 |     img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float()
19 |     img = img.unsqueeze(0).cuda()
20 |     with torch.no_grad():
21 |         output = model(img)
22 |     output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
23 |     output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
24 |     output = (output * 255.0).round().astype(np.uint8)
25 |     return output
26 | 


--------------------------------------------------------------------------------
/basicsr/archs/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from copy import deepcopy
 3 | from os import path as osp
 4 | 
 5 | from basicsr.utils import get_root_logger, scandir
 6 | from basicsr.utils.registry import ARCH_REGISTRY
 7 | 
 8 | __all__ = ['build_network']
 9 | 
10 | # automatically scan and import arch modules for registry
11 | # scan all the files under the 'archs' folder and collect files ending with
12 | # '_arch.py'
13 | arch_folder = osp.dirname(osp.abspath(__file__))
14 | arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
15 | # import all the arch modules
16 | _arch_modules = [importlib.import_module(f'basicsr.archs.{file_name}') for file_name in arch_filenames]
17 | 
18 | 
19 | def build_network(opt):
20 |     opt = deepcopy(opt)
21 |     network_type = opt.pop('type')
22 |     net = ARCH_REGISTRY.get(network_type)(**opt)
23 |     logger = get_root_logger()
24 |     logger.info(f'Network [{net.__class__.__name__}] is created.')
25 |     return net
26 | 


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/arch_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/arch_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/dfdnet_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/dfdnet_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/dfdnet_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/dfdnet_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/discriminator_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/discriminator_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/duf_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/duf_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/edsr_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/edsr_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/edvr_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/edvr_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/rcan_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/rcan_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/ridnet_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/ridnet_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/rrdbnet_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/rrdbnet_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/spynet_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/spynet_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/srresnet_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/srresnet_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/stylegan2_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/stylegan2_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/tof_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/tof_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/__pycache__/vgg_arch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/archs/__pycache__/vgg_arch.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/archs/dfdnet_util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.nn.utils.spectral_norm as SpectralNorm
  5 | from torch.autograd import Function
  6 | 
  7 | 
  8 | class BlurFunctionBackward(Function):
  9 | 
 10 |     @staticmethod
 11 |     def forward(ctx, grad_output, kernel, kernel_flip):
 12 |         ctx.save_for_backward(kernel, kernel_flip)
 13 |         grad_input = F.conv2d(grad_output, kernel_flip, padding=1, groups=grad_output.shape[1])
 14 |         return grad_input
 15 | 
 16 |     @staticmethod
 17 |     def backward(ctx, gradgrad_output):
 18 |         kernel, kernel_flip = ctx.saved_tensors
 19 |         grad_input = F.conv2d(gradgrad_output, kernel, padding=1, groups=gradgrad_output.shape[1])
 20 |         return grad_input, None, None
 21 | 
 22 | 
 23 | class BlurFunction(Function):
 24 | 
 25 |     @staticmethod
 26 |     def forward(ctx, x, kernel, kernel_flip):
 27 |         ctx.save_for_backward(kernel, kernel_flip)
 28 |         output = F.conv2d(x, kernel, padding=1, groups=x.shape[1])
 29 |         return output
 30 | 
 31 |     @staticmethod
 32 |     def backward(ctx, grad_output):
 33 |         kernel, kernel_flip = ctx.saved_tensors
 34 |         grad_input = BlurFunctionBackward.apply(grad_output, kernel, kernel_flip)
 35 |         return grad_input, None, None
 36 | 
 37 | 
 38 | blur = BlurFunction.apply
 39 | 
 40 | 
 41 | class Blur(nn.Module):
 42 | 
 43 |     def __init__(self, channel):
 44 |         super().__init__()
 45 |         kernel = torch.tensor([[1, 2, 1], [2, 4, 2], [1, 2, 1]], dtype=torch.float32)
 46 |         kernel = kernel.view(1, 1, 3, 3)
 47 |         kernel = kernel / kernel.sum()
 48 |         kernel_flip = torch.flip(kernel, [2, 3])
 49 | 
 50 |         self.kernel = kernel.repeat(channel, 1, 1, 1)
 51 |         self.kernel_flip = kernel_flip.repeat(channel, 1, 1, 1)
 52 | 
 53 |     def forward(self, x):
 54 |         return blur(x, self.kernel.type_as(x), self.kernel_flip.type_as(x))
 55 | 
 56 | 
 57 | def calc_mean_std(feat, eps=1e-5):
 58 |     """Calculate mean and std for adaptive_instance_normalization.
 59 | 
 60 |     Args:
 61 |         feat (Tensor): 4D tensor.
 62 |         eps (float): A small value added to the variance to avoid
 63 |             divide-by-zero. Default: 1e-5.
 64 |     """
 65 |     size = feat.size()
 66 |     assert len(size) == 4, 'The input feature should be 4D tensor.'
 67 |     n, c = size[:2]
 68 |     feat_var = feat.view(n, c, -1).var(dim=2) + eps
 69 |     feat_std = feat_var.sqrt().view(n, c, 1, 1)
 70 |     feat_mean = feat.view(n, c, -1).mean(dim=2).view(n, c, 1, 1)
 71 |     return feat_mean, feat_std
 72 | 
 73 | 
 74 | def adaptive_instance_normalization(content_feat, style_feat):
 75 |     """Adaptive instance normalization.
 76 | 
 77 |     Adjust the reference features to have the similar color and illuminations
 78 |     as those in the degradate features.
 79 | 
 80 |     Args:
 81 |         content_feat (Tensor): The reference feature.
 82 |         style_feat (Tensor): The degradate features.
 83 |     """
 84 |     size = content_feat.size()
 85 |     style_mean, style_std = calc_mean_std(style_feat)
 86 |     content_mean, content_std = calc_mean_std(content_feat)
 87 |     normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
 88 |     return normalized_feat * style_std.expand(size) + style_mean.expand(size)
 89 | 
 90 | 
 91 | def AttentionBlock(in_channel):
 92 |     return nn.Sequential(
 93 |         SpectralNorm(nn.Conv2d(in_channel, in_channel, 3, 1, 1)), nn.LeakyReLU(0.2, True),
 94 |         SpectralNorm(nn.Conv2d(in_channel, in_channel, 3, 1, 1)))
 95 | 
 96 | 
 97 | def conv_block(in_channels, out_channels, kernel_size=3, stride=1, dilation=1, bias=True):
 98 |     """Conv block used in MSDilationBlock."""
 99 | 
100 |     return nn.Sequential(
101 |         SpectralNorm(
102 |             nn.Conv2d(
103 |                 in_channels,
104 |                 out_channels,
105 |                 kernel_size=kernel_size,
106 |                 stride=stride,
107 |                 dilation=dilation,
108 |                 padding=((kernel_size - 1) // 2) * dilation,
109 |                 bias=bias)),
110 |         nn.LeakyReLU(0.2),
111 |         SpectralNorm(
112 |             nn.Conv2d(
113 |                 out_channels,
114 |                 out_channels,
115 |                 kernel_size=kernel_size,
116 |                 stride=stride,
117 |                 dilation=dilation,
118 |                 padding=((kernel_size - 1) // 2) * dilation,
119 |                 bias=bias)),
120 |     )
121 | 
122 | 
123 | class MSDilationBlock(nn.Module):
124 |     """Multi-scale dilation block."""
125 | 
126 |     def __init__(self, in_channels, kernel_size=3, dilation=(1, 1, 1, 1), bias=True):
127 |         super(MSDilationBlock, self).__init__()
128 | 
129 |         self.conv_blocks = nn.ModuleList()
130 |         for i in range(4):
131 |             self.conv_blocks.append(conv_block(in_channels, in_channels, kernel_size, dilation=dilation[i], bias=bias))
132 |         self.conv_fusion = SpectralNorm(
133 |             nn.Conv2d(
134 |                 in_channels * 4,
135 |                 in_channels,
136 |                 kernel_size=kernel_size,
137 |                 stride=1,
138 |                 padding=(kernel_size - 1) // 2,
139 |                 bias=bias))
140 | 
141 |     def forward(self, x):
142 |         out = []
143 |         for i in range(4):
144 |             out.append(self.conv_blocks[i](x))
145 |         out = torch.cat(out, 1)
146 |         out = self.conv_fusion(out) + x
147 |         return out
148 | 
149 | 
150 | class UpResBlock(nn.Module):
151 | 
152 |     def __init__(self, in_channel):
153 |         super(UpResBlock, self).__init__()
154 |         self.body = nn.Sequential(
155 |             nn.Conv2d(in_channel, in_channel, 3, 1, 1),
156 |             nn.LeakyReLU(0.2, True),
157 |             nn.Conv2d(in_channel, in_channel, 3, 1, 1),
158 |         )
159 | 
160 |     def forward(self, x):
161 |         out = x + self.body(x)
162 |         return out
163 | 


--------------------------------------------------------------------------------
/basicsr/archs/discriminator_arch.py:
--------------------------------------------------------------------------------
 1 | from torch import nn as nn
 2 | 
 3 | from basicsr.utils.registry import ARCH_REGISTRY
 4 | 
 5 | 
 6 | @ARCH_REGISTRY.register()
 7 | class VGGStyleDiscriminator128(nn.Module):
 8 |     """VGG style discriminator with input size 128 x 128.
 9 | 
10 |     It is used to train SRGAN and ESRGAN.
11 | 
12 |     Args:
13 |         num_in_ch (int): Channel number of inputs. Default: 3.
14 |         num_feat (int): Channel number of base intermediate features.
15 |             Default: 64.
16 |     """
17 | 
18 |     def __init__(self, num_in_ch, num_feat):
19 |         super(VGGStyleDiscriminator128, self).__init__()
20 | 
21 |         self.conv0_0 = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1, bias=True)
22 |         self.conv0_1 = nn.Conv2d(num_feat, num_feat, 4, 2, 1, bias=False)
23 |         self.bn0_1 = nn.BatchNorm2d(num_feat, affine=True)
24 | 
25 |         self.conv1_0 = nn.Conv2d(num_feat, num_feat * 2, 3, 1, 1, bias=False)
26 |         self.bn1_0 = nn.BatchNorm2d(num_feat * 2, affine=True)
27 |         self.conv1_1 = nn.Conv2d(num_feat * 2, num_feat * 2, 4, 2, 1, bias=False)
28 |         self.bn1_1 = nn.BatchNorm2d(num_feat * 2, affine=True)
29 | 
30 |         self.conv2_0 = nn.Conv2d(num_feat * 2, num_feat * 4, 3, 1, 1, bias=False)
31 |         self.bn2_0 = nn.BatchNorm2d(num_feat * 4, affine=True)
32 |         self.conv2_1 = nn.Conv2d(num_feat * 4, num_feat * 4, 4, 2, 1, bias=False)
33 |         self.bn2_1 = nn.BatchNorm2d(num_feat * 4, affine=True)
34 | 
35 |         self.conv3_0 = nn.Conv2d(num_feat * 4, num_feat * 8, 3, 1, 1, bias=False)
36 |         self.bn3_0 = nn.BatchNorm2d(num_feat * 8, affine=True)
37 |         self.conv3_1 = nn.Conv2d(num_feat * 8, num_feat * 8, 4, 2, 1, bias=False)
38 |         self.bn3_1 = nn.BatchNorm2d(num_feat * 8, affine=True)
39 | 
40 |         self.conv4_0 = nn.Conv2d(num_feat * 8, num_feat * 8, 3, 1, 1, bias=False)
41 |         self.bn4_0 = nn.BatchNorm2d(num_feat * 8, affine=True)
42 |         self.conv4_1 = nn.Conv2d(num_feat * 8, num_feat * 8, 4, 2, 1, bias=False)
43 |         self.bn4_1 = nn.BatchNorm2d(num_feat * 8, affine=True)
44 | 
45 |         self.linear1 = nn.Linear(num_feat * 8 * 4 * 4, 100)
46 |         self.linear2 = nn.Linear(100, 1)
47 | 
48 |         # activation function
49 |         self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
50 | 
51 |     def forward(self, x):
52 |         assert x.size(2) == 128 and x.size(3) == 128, (f'Input spatial size must be 128x128, '
53 |                                                        f'but received {x.size()}.')
54 | 
55 |         feat = self.lrelu(self.conv0_0(x))
56 |         feat = self.lrelu(self.bn0_1(self.conv0_1(feat)))  # output spatial size: (64, 64)
57 | 
58 |         feat = self.lrelu(self.bn1_0(self.conv1_0(feat)))
59 |         feat = self.lrelu(self.bn1_1(self.conv1_1(feat)))  # output spatial size: (32, 32)
60 | 
61 |         feat = self.lrelu(self.bn2_0(self.conv2_0(feat)))
62 |         feat = self.lrelu(self.bn2_1(self.conv2_1(feat)))  # output spatial size: (16, 16)
63 | 
64 |         feat = self.lrelu(self.bn3_0(self.conv3_0(feat)))
65 |         feat = self.lrelu(self.bn3_1(self.conv3_1(feat)))  # output spatial size: (8, 8)
66 | 
67 |         feat = self.lrelu(self.bn4_0(self.conv4_0(feat)))
68 |         feat = self.lrelu(self.bn4_1(self.conv4_1(feat)))  # output spatial size: (4, 4)
69 | 
70 |         feat = feat.view(feat.size(0), -1)
71 |         feat = self.lrelu(self.linear1(feat))
72 |         out = self.linear2(feat)
73 |         return out
74 | 


--------------------------------------------------------------------------------
/basicsr/archs/edsr_arch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn as nn
 3 | 
 4 | from basicsr.archs.arch_util import ResidualBlockNoBN, Upsample, make_layer
 5 | from basicsr.utils.registry import ARCH_REGISTRY
 6 | 
 7 | 
 8 | @ARCH_REGISTRY.register()
 9 | class EDSR(nn.Module):
10 |     """EDSR network structure.
11 | 
12 |     Paper: Enhanced Deep Residual Networks for Single Image Super-Resolution.
13 |     Ref git repo: https://github.com/thstkdgus35/EDSR-PyTorch
14 | 
15 |     Args:
16 |         num_in_ch (int): Channel number of inputs.
17 |         num_out_ch (int): Channel number of outputs.
18 |         num_feat (int): Channel number of intermediate features.
19 |             Default: 64.
20 |         num_block (int): Block number in the trunk network. Default: 16.
21 |         upscale (int): Upsampling factor. Support 2^n and 3.
22 |             Default: 4.
23 |         res_scale (float): Used to scale the residual in residual block.
24 |             Default: 1.
25 |         img_range (float): Image range. Default: 255.
26 |         rgb_mean (tuple[float]): Image mean in RGB orders.
27 |             Default: (0.4488, 0.4371, 0.4040), calculated from DIV2K dataset.
28 |     """
29 | 
30 |     def __init__(self,
31 |                  num_in_ch,
32 |                  num_out_ch,
33 |                  num_feat=64,
34 |                  num_block=16,
35 |                  upscale=4,
36 |                  res_scale=1,
37 |                  img_range=255.,
38 |                  rgb_mean=(0.4488, 0.4371, 0.4040)):
39 |         super(EDSR, self).__init__()
40 | 
41 |         self.img_range = img_range
42 |         self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
43 | 
44 |         self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
45 |         self.body = make_layer(ResidualBlockNoBN, num_block, num_feat=num_feat, res_scale=res_scale, pytorch_init=True)
46 |         self.conv_after_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
47 |         self.upsample = Upsample(upscale, num_feat)
48 |         self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
49 | 
50 |     def forward(self, x):
51 |         self.mean = self.mean.type_as(x)
52 | 
53 |         x = (x - self.mean) * self.img_range
54 |         x = self.conv_first(x)
55 |         res = self.conv_after_body(self.body(x))
56 |         res += x
57 | 
58 |         x = self.conv_last(self.upsample(res))
59 |         x = x / self.img_range + self.mean
60 | 
61 |         return x
62 | 


--------------------------------------------------------------------------------
/basicsr/archs/rcan_arch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn as nn
  3 | 
  4 | from basicsr.utils.registry import ARCH_REGISTRY
  5 | from .arch_util import Upsample, make_layer
  6 | 
  7 | 
  8 | class ChannelAttention(nn.Module):
  9 |     """Channel attention used in RCAN.
 10 | 
 11 |     Args:
 12 |         num_feat (int): Channel number of intermediate features.
 13 |         squeeze_factor (int): Channel squeeze factor. Default: 16.
 14 |     """
 15 | 
 16 |     def __init__(self, num_feat, squeeze_factor=16):
 17 |         super(ChannelAttention, self).__init__()
 18 |         self.attention = nn.Sequential(
 19 |             nn.AdaptiveAvgPool2d(1), nn.Conv2d(num_feat, num_feat // squeeze_factor, 1, padding=0),
 20 |             nn.ReLU(inplace=True), nn.Conv2d(num_feat // squeeze_factor, num_feat, 1, padding=0), nn.Sigmoid())
 21 | 
 22 |     def forward(self, x):
 23 |         y = self.attention(x)
 24 |         return x * y
 25 | 
 26 | 
 27 | class RCAB(nn.Module):
 28 |     """Residual Channel Attention Block (RCAB) used in RCAN.
 29 | 
 30 |     Args:
 31 |         num_feat (int): Channel number of intermediate features.
 32 |         squeeze_factor (int): Channel squeeze factor. Default: 16.
 33 |         res_scale (float): Scale the residual. Default: 1.
 34 |     """
 35 | 
 36 |     def __init__(self, num_feat, squeeze_factor=16, res_scale=1):
 37 |         super(RCAB, self).__init__()
 38 |         self.res_scale = res_scale
 39 | 
 40 |         self.rcab = nn.Sequential(
 41 |             nn.Conv2d(num_feat, num_feat, 3, 1, 1), nn.ReLU(True), nn.Conv2d(num_feat, num_feat, 3, 1, 1),
 42 |             ChannelAttention(num_feat, squeeze_factor))
 43 | 
 44 |     def forward(self, x):
 45 |         res = self.rcab(x) * self.res_scale
 46 |         return res + x
 47 | 
 48 | 
 49 | class ResidualGroup(nn.Module):
 50 |     """Residual Group of RCAB.
 51 | 
 52 |     Args:
 53 |         num_feat (int): Channel number of intermediate features.
 54 |         num_block (int): Block number in the body network.
 55 |         squeeze_factor (int): Channel squeeze factor. Default: 16.
 56 |         res_scale (float): Scale the residual. Default: 1.
 57 |     """
 58 | 
 59 |     def __init__(self, num_feat, num_block, squeeze_factor=16, res_scale=1):
 60 |         super(ResidualGroup, self).__init__()
 61 | 
 62 |         self.residual_group = make_layer(
 63 |             RCAB, num_block, num_feat=num_feat, squeeze_factor=squeeze_factor, res_scale=res_scale)
 64 |         self.conv = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
 65 | 
 66 |     def forward(self, x):
 67 |         res = self.conv(self.residual_group(x))
 68 |         return res + x
 69 | 
 70 | 
 71 | @ARCH_REGISTRY.register()
 72 | class RCAN(nn.Module):
 73 |     """Residual Channel Attention Networks.
 74 | 
 75 |     Paper: Image Super-Resolution Using Very Deep Residual Channel Attention
 76 |         Networks
 77 |     Ref git repo: https://github.com/yulunzhang/RCAN.
 78 | 
 79 |     Args:
 80 |         num_in_ch (int): Channel number of inputs.
 81 |         num_out_ch (int): Channel number of outputs.
 82 |         num_feat (int): Channel number of intermediate features.
 83 |             Default: 64.
 84 |         num_group (int): Number of ResidualGroup. Default: 10.
 85 |         num_block (int): Number of RCAB in ResidualGroup. Default: 16.
 86 |         squeeze_factor (int): Channel squeeze factor. Default: 16.
 87 |         upscale (int): Upsampling factor. Support 2^n and 3.
 88 |             Default: 4.
 89 |         res_scale (float): Used to scale the residual in residual block.
 90 |             Default: 1.
 91 |         img_range (float): Image range. Default: 255.
 92 |         rgb_mean (tuple[float]): Image mean in RGB orders.
 93 |             Default: (0.4488, 0.4371, 0.4040), calculated from DIV2K dataset.
 94 |     """
 95 | 
 96 |     def __init__(self,
 97 |                  num_in_ch,
 98 |                  num_out_ch,
 99 |                  num_feat=64,
100 |                  num_group=10,
101 |                  num_block=16,
102 |                  squeeze_factor=16,
103 |                  upscale=4,
104 |                  res_scale=1,
105 |                  img_range=255.,
106 |                  rgb_mean=(0.4488, 0.4371, 0.4040)):
107 |         super(RCAN, self).__init__()
108 | 
109 |         self.img_range = img_range
110 |         self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
111 | 
112 |         self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
113 |         self.body = make_layer(
114 |             ResidualGroup,
115 |             num_group,
116 |             num_feat=num_feat,
117 |             num_block=num_block,
118 |             squeeze_factor=squeeze_factor,
119 |             res_scale=res_scale)
120 |         self.conv_after_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
121 |         self.upsample = Upsample(upscale, num_feat)
122 |         self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
123 | 
124 |     def forward(self, x):
125 |         self.mean = self.mean.type_as(x)
126 | 
127 |         x = (x - self.mean) * self.img_range
128 |         x = self.conv_first(x)
129 |         res = self.conv_after_body(self.body(x))
130 |         res += x
131 | 
132 |         x = self.conv_last(self.upsample(res))
133 |         x = x / self.img_range + self.mean
134 | 
135 |         return x
136 | 


--------------------------------------------------------------------------------
/basicsr/archs/rrdbnet_arch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn as nn
  3 | from torch.nn import functional as F
  4 | 
  5 | from basicsr.utils.registry import ARCH_REGISTRY
  6 | from .arch_util import default_init_weights, make_layer
  7 | 
  8 | 
  9 | class ResidualDenseBlock(nn.Module):
 10 |     """Residual Dense Block.
 11 | 
 12 |     Used in RRDB block in ESRGAN.
 13 | 
 14 |     Args:
 15 |         num_feat (int): Channel number of intermediate features.
 16 |         num_grow_ch (int): Channels for each growth.
 17 |     """
 18 | 
 19 |     def __init__(self, num_feat=64, num_grow_ch=32):
 20 |         super(ResidualDenseBlock, self).__init__()
 21 |         self.conv1 = nn.Conv2d(num_feat, num_grow_ch, 3, 1, 1)
 22 |         self.conv2 = nn.Conv2d(num_feat + num_grow_ch, num_grow_ch, 3, 1, 1)
 23 |         self.conv3 = nn.Conv2d(num_feat + 2 * num_grow_ch, num_grow_ch, 3, 1, 1)
 24 |         self.conv4 = nn.Conv2d(num_feat + 3 * num_grow_ch, num_grow_ch, 3, 1, 1)
 25 |         self.conv5 = nn.Conv2d(num_feat + 4 * num_grow_ch, num_feat, 3, 1, 1)
 26 | 
 27 |         self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
 28 | 
 29 |         # initialization
 30 |         default_init_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
 31 | 
 32 |     def forward(self, x):
 33 |         x1 = self.lrelu(self.conv1(x))
 34 |         x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
 35 |         x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
 36 |         x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
 37 |         x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
 38 |         # Emperically, we use 0.2 to scale the residual for better performance
 39 |         return x5 * 0.2 + x
 40 | 
 41 | 
 42 | class RRDB(nn.Module):
 43 |     """Residual in Residual Dense Block.
 44 | 
 45 |     Used in RRDB-Net in ESRGAN.
 46 | 
 47 |     Args:
 48 |         num_feat (int): Channel number of intermediate features.
 49 |         num_grow_ch (int): Channels for each growth.
 50 |     """
 51 | 
 52 |     def __init__(self, num_feat, num_grow_ch=32):
 53 |         super(RRDB, self).__init__()
 54 |         self.rdb1 = ResidualDenseBlock(num_feat, num_grow_ch)
 55 |         self.rdb2 = ResidualDenseBlock(num_feat, num_grow_ch)
 56 |         self.rdb3 = ResidualDenseBlock(num_feat, num_grow_ch)
 57 | 
 58 |     def forward(self, x):
 59 |         out = self.rdb1(x)
 60 |         out = self.rdb2(out)
 61 |         out = self.rdb3(out)
 62 |         # Emperically, we use 0.2 to scale the residual for better performance
 63 |         return out * 0.2 + x
 64 | 
 65 | 
 66 | @ARCH_REGISTRY.register()
 67 | class RRDBNet(nn.Module):
 68 |     """Networks consisting of Residual in Residual Dense Block, which is used
 69 |     in ESRGAN.
 70 | 
 71 |     ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks.
 72 |     Currently, it supports x4 upsampling scale factor.
 73 | 
 74 |     Args:
 75 |         num_in_ch (int): Channel number of inputs.
 76 |         num_out_ch (int): Channel number of outputs.
 77 |         num_feat (int): Channel number of intermediate features.
 78 |             Default: 64
 79 |         num_block (int): Block number in the trunk network. Defaults: 23
 80 |         num_grow_ch (int): Channels for each growth. Default: 32.
 81 |     """
 82 | 
 83 |     def __init__(self, num_in_ch, num_out_ch, num_feat=64, num_block=23, num_grow_ch=32):
 84 |         super(RRDBNet, self).__init__()
 85 |         self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
 86 |         self.body = make_layer(RRDB, num_block, num_feat=num_feat, num_grow_ch=num_grow_ch)
 87 |         self.conv_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
 88 |         # upsample
 89 |         self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
 90 |         self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
 91 |         self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
 92 |         self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
 93 | 
 94 |         self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
 95 | 
 96 |     def forward(self, x):
 97 |         feat = self.conv_first(x)
 98 |         body_feat = self.conv_body(self.body(feat))
 99 |         feat = feat + body_feat
100 |         # upsample
101 |         feat = self.lrelu(self.conv_up1(F.interpolate(feat, scale_factor=2, mode='nearest')))
102 |         feat = self.lrelu(self.conv_up2(F.interpolate(feat, scale_factor=2, mode='nearest')))
103 |         out = self.conv_last(self.lrelu(self.conv_hr(feat)))
104 |         return out
105 | 


--------------------------------------------------------------------------------
/basicsr/archs/spynet_arch.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch import nn as nn
 4 | from torch.nn import functional as F
 5 | 
 6 | from basicsr.utils.registry import ARCH_REGISTRY
 7 | from .arch_util import flow_warp
 8 | 
 9 | 
10 | class BasicModule(nn.Module):
11 |     """Basic Module for SpyNet.
12 |     """
13 | 
14 |     def __init__(self):
15 |         super(BasicModule, self).__init__()
16 | 
17 |         self.basic_module = nn.Sequential(
18 |             nn.Conv2d(in_channels=8, out_channels=32, kernel_size=7, stride=1, padding=3), nn.ReLU(inplace=False),
19 |             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=7, stride=1, padding=3), nn.ReLU(inplace=False),
20 |             nn.Conv2d(in_channels=64, out_channels=32, kernel_size=7, stride=1, padding=3), nn.ReLU(inplace=False),
21 |             nn.Conv2d(in_channels=32, out_channels=16, kernel_size=7, stride=1, padding=3), nn.ReLU(inplace=False),
22 |             nn.Conv2d(in_channels=16, out_channels=2, kernel_size=7, stride=1, padding=3))
23 | 
24 |     def forward(self, tensor_input):
25 |         return self.basic_module(tensor_input)
26 | 
27 | 
28 | @ARCH_REGISTRY.register()
29 | class SpyNet(nn.Module):
30 |     """SpyNet architecture.
31 | 
32 |     Args:
33 |         load_path (str): path for pretrained SpyNet. Default: None.
34 |     """
35 | 
36 |     def __init__(self, load_path=None):
37 |         super(SpyNet, self).__init__()
38 |         self.basic_module = nn.ModuleList([BasicModule() for _ in range(6)])
39 |         if load_path:
40 |             self.load_state_dict(torch.load(load_path, map_location=lambda storage, loc: storage)['params'])
41 | 
42 |         self.register_buffer('mean', torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
43 |         self.register_buffer('std', torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
44 | 
45 |     def preprocess(self, tensor_input):
46 |         tensor_output = (tensor_input - self.mean) / self.std
47 |         return tensor_output
48 | 
49 |     def process(self, ref, supp):
50 |         flow = []
51 | 
52 |         ref = [self.preprocess(ref)]
53 |         supp = [self.preprocess(supp)]
54 | 
55 |         for level in range(5):
56 |             ref.insert(0, F.avg_pool2d(input=ref[0], kernel_size=2, stride=2, count_include_pad=False))
57 |             supp.insert(0, F.avg_pool2d(input=supp[0], kernel_size=2, stride=2, count_include_pad=False))
58 | 
59 |         flow = ref[0].new_zeros(
60 |             [ref[0].size(0), 2,
61 |              int(math.floor(ref[0].size(2) / 2.0)),
62 |              int(math.floor(ref[0].size(3) / 2.0))])
63 | 
64 |         for level in range(len(ref)):
65 |             upsampled_flow = F.interpolate(input=flow, scale_factor=2, mode='bilinear', align_corners=True) * 2.0
66 | 
67 |             if upsampled_flow.size(2) != ref[level].size(2):
68 |                 upsampled_flow = F.pad(input=upsampled_flow, pad=[0, 0, 0, 1], mode='replicate')
69 |             if upsampled_flow.size(3) != ref[level].size(3):
70 |                 upsampled_flow = F.pad(input=upsampled_flow, pad=[0, 1, 0, 0], mode='replicate')
71 | 
72 |             flow = self.basic_module[level](torch.cat([
73 |                 ref[level],
74 |                 flow_warp(
75 |                     supp[level], upsampled_flow.permute(0, 2, 3, 1), interp_mode='bilinear', padding_mode='border'),
76 |                 upsampled_flow
77 |             ], 1)) + upsampled_flow
78 | 
79 |         return flow
80 | 
81 |     def forward(self, ref, supp):
82 |         assert ref.size() == supp.size()
83 | 
84 |         h, w = ref.size(2), ref.size(3)
85 |         w_floor = math.floor(math.ceil(w / 32.0) * 32.0)
86 |         h_floor = math.floor(math.ceil(h / 32.0) * 32.0)
87 | 
88 |         ref = F.interpolate(input=ref, size=(h_floor, w_floor), mode='bilinear', align_corners=False)
89 |         supp = F.interpolate(input=supp, size=(h_floor, w_floor), mode='bilinear', align_corners=False)
90 | 
91 |         flow = F.interpolate(input=self.process(ref, supp), size=(h, w), mode='bilinear', align_corners=False)
92 | 
93 |         flow[:, 0, :, :] *= float(w) / float(w_floor)
94 |         flow[:, 1, :, :] *= float(h) / float(h_floor)
95 | 
96 |         return flow
97 | 


--------------------------------------------------------------------------------
/basicsr/archs/srresnet_arch.py:
--------------------------------------------------------------------------------
 1 | from torch import nn as nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from basicsr.utils.registry import ARCH_REGISTRY
 5 | from .arch_util import ResidualBlockNoBN, default_init_weights, make_layer
 6 | 
 7 | 
 8 | @ARCH_REGISTRY.register()
 9 | class MSRResNet(nn.Module):
10 |     """Modified SRResNet.
11 | 
12 |     A compacted version modified from SRResNet in
13 |     "Photo-Realistic Single Image Super-Resolution Using a Generative
14 |     Adversarial Network"
15 |     It uses residual blocks without BN, similar to EDSR.
16 |     Currently, it supports x2, x3 and x4 upsampling scale factor.
17 | 
18 |     Args:
19 |         num_in_ch (int): Channel number of inputs. Default: 3.
20 |         num_out_ch (int): Channel number of outputs. Default: 3.
21 |         num_feat (int): Channel number of intermediate features.
22 |             Default: 64.
23 |         num_block (int): Block number in the body network. Default: 16.
24 |         upscale (int): Upsampling factor. Support x2, x3 and x4.
25 |             Default: 4.
26 |     """
27 | 
28 |     def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_block=16, upscale=4):
29 |         super(MSRResNet, self).__init__()
30 |         self.upscale = upscale
31 | 
32 |         self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
33 |         self.body = make_layer(ResidualBlockNoBN, num_block, num_feat=num_feat)
34 | 
35 |         # upsampling
36 |         if self.upscale in [2, 3]:
37 |             self.upconv1 = nn.Conv2d(num_feat, num_feat * self.upscale * self.upscale, 3, 1, 1)
38 |             self.pixel_shuffle = nn.PixelShuffle(self.upscale)
39 |         elif self.upscale == 4:
40 |             self.upconv1 = nn.Conv2d(num_feat, num_feat * 4, 3, 1, 1)
41 |             self.upconv2 = nn.Conv2d(num_feat, num_feat * 4, 3, 1, 1)
42 |             self.pixel_shuffle = nn.PixelShuffle(2)
43 | 
44 |         self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
45 |         self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
46 | 
47 |         # activation function
48 |         self.lrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
49 | 
50 |         # initialization
51 |         default_init_weights([self.conv_first, self.upconv1, self.conv_hr, self.conv_last], 0.1)
52 |         if self.upscale == 4:
53 |             default_init_weights(self.upconv2, 0.1)
54 | 
55 |     def forward(self, x):
56 |         feat = self.lrelu(self.conv_first(x))
57 |         out = self.body(feat)
58 | 
59 |         if self.upscale == 4:
60 |             out = self.lrelu(self.pixel_shuffle(self.upconv1(out)))
61 |             out = self.lrelu(self.pixel_shuffle(self.upconv2(out)))
62 |         elif self.upscale in [2, 3]:
63 |             out = self.lrelu(self.pixel_shuffle(self.upconv1(out)))
64 | 
65 |         out = self.conv_last(self.lrelu(self.conv_hr(out)))
66 |         base = F.interpolate(x, scale_factor=self.upscale, mode='bilinear', align_corners=False)
67 |         out += base
68 |         return out
69 | 


--------------------------------------------------------------------------------
/basicsr/data/__init__.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import numpy as np
  3 | import random
  4 | import torch
  5 | import torch.utils.data
  6 | from copy import deepcopy
  7 | from functools import partial
  8 | from os import path as osp
  9 | 
 10 | from basicsr.data.prefetch_dataloader import PrefetchDataLoader
 11 | from basicsr.utils import get_root_logger, scandir
 12 | from basicsr.utils.dist_util import get_dist_info
 13 | from basicsr.utils.registry import DATASET_REGISTRY
 14 | 
 15 | __all__ = ['build_dataset', 'build_dataloader']
 16 | 
 17 | # automatically scan and import dataset modules for registry
 18 | # scan all the files under the data folder with '_dataset' in file names
 19 | data_folder = osp.dirname(osp.abspath(__file__))
 20 | dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
 21 | # import all the dataset modules
 22 | _dataset_modules = [importlib.import_module(f'basicsr.data.{file_name}') for file_name in dataset_filenames]
 23 | 
 24 | 
 25 | def build_dataset(dataset_opt):
 26 |     """Build dataset from options.
 27 | 
 28 |     Args:
 29 |         dataset_opt (dict): Configuration for dataset. It must constain:
 30 |             name (str): Dataset name.
 31 |             type (str): Dataset type.
 32 |     """
 33 |     dataset_opt = deepcopy(dataset_opt)
 34 |     dataset = DATASET_REGISTRY.get(dataset_opt['type'])(dataset_opt)
 35 |     logger = get_root_logger()
 36 |     logger.info(f'Dataset [{dataset.__class__.__name__}] - {dataset_opt["name"]} ' 'is built.')
 37 |     return dataset
 38 | 
 39 | 
 40 | def build_dataloader(dataset, dataset_opt, num_gpu=1, dist=False, sampler=None, seed=None):
 41 |     """Build dataloader.
 42 | 
 43 |     Args:
 44 |         dataset (torch.utils.data.Dataset): Dataset.
 45 |         dataset_opt (dict): Dataset options. It contains the following keys:
 46 |             phase (str): 'train' or 'val'.
 47 |             num_worker_per_gpu (int): Number of workers for each GPU.
 48 |             batch_size_per_gpu (int): Training batch size for each GPU.
 49 |         num_gpu (int): Number of GPUs. Used only in the train phase.
 50 |             Default: 1.
 51 |         dist (bool): Whether in distributed training. Used only in the train
 52 |             phase. Default: False.
 53 |         sampler (torch.utils.data.sampler): Data sampler. Default: None.
 54 |         seed (int | None): Seed. Default: None
 55 |     """
 56 |     phase = dataset_opt['phase']
 57 |     rank, _ = get_dist_info()
 58 |     if phase == 'train':
 59 |         if dist:  # distributed training
 60 |             batch_size = dataset_opt['batch_size_per_gpu']
 61 |             num_workers = dataset_opt['num_worker_per_gpu']
 62 |         else:  # non-distributed training
 63 |             multiplier = 1 if num_gpu == 0 else num_gpu
 64 |             batch_size = dataset_opt['batch_size_per_gpu'] * multiplier
 65 |             num_workers = dataset_opt['num_worker_per_gpu'] * multiplier
 66 |         dataloader_args = dict(
 67 |             dataset=dataset,
 68 |             batch_size=batch_size,
 69 |             shuffle=False,
 70 |             num_workers=num_workers,
 71 |             sampler=sampler,
 72 |             drop_last=True)
 73 |         if sampler is None:
 74 |             dataloader_args['shuffle'] = True
 75 |         dataloader_args['worker_init_fn'] = partial(
 76 |             worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None
 77 |     elif phase in ['val', 'test']:  # validation
 78 |         dataloader_args = dict(dataset=dataset, batch_size=1, shuffle=False, num_workers=0)
 79 |     else:
 80 |         raise ValueError(f'Wrong dataset phase: {phase}. ' "Supported ones are 'train', 'val' and 'test'.")
 81 | 
 82 |     dataloader_args['pin_memory'] = dataset_opt.get('pin_memory', False)
 83 | 
 84 |     prefetch_mode = dataset_opt.get('prefetch_mode')
 85 |     if prefetch_mode == 'cpu':  # CPUPrefetcher
 86 |         num_prefetch_queue = dataset_opt.get('num_prefetch_queue', 1)
 87 |         logger = get_root_logger()
 88 |         logger.info(f'Use {prefetch_mode} prefetch dataloader: ' f'num_prefetch_queue = {num_prefetch_queue}')
 89 |         return PrefetchDataLoader(num_prefetch_queue=num_prefetch_queue, **dataloader_args)
 90 |     else:
 91 |         # prefetch_mode=None: Normal dataloader
 92 |         # prefetch_mode='cuda': dataloader for CUDAPrefetcher
 93 |         return torch.utils.data.DataLoader(**dataloader_args)
 94 | 
 95 | 
 96 | def worker_init_fn(worker_id, num_workers, rank, seed):
 97 |     # Set the worker seed to num_workers * rank + worker_id + seed
 98 |     worker_seed = num_workers * rank + worker_id + seed
 99 |     np.random.seed(worker_seed)
100 |     random.seed(worker_seed)
101 | 


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/data_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/data_sampler.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/data_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/data_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/ffhq_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/ffhq_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/paired_image_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/paired_image_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/prefetch_dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/prefetch_dataloader.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/reds_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/reds_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/single_image_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/single_image_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/transforms.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/transforms.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/video_test_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/video_test_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/__pycache__/vimeo90k_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/data/__pycache__/vimeo90k_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/data/data_sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch.utils.data.sampler import Sampler
 4 | 
 5 | 
 6 | class EnlargedSampler(Sampler):
 7 |     """Sampler that restricts data loading to a subset of the dataset.
 8 | 
 9 |     Modified from torch.utils.data.distributed.DistributedSampler
10 |     Support enlarging the dataset for iteration-based training, for saving
11 |     time when restart the dataloader after each epoch
12 | 
13 |     Args:
14 |         dataset (torch.utils.data.Dataset): Dataset used for sampling.
15 |         num_replicas (int | None): Number of processes participating in
16 |             the training. It is usually the world_size.
17 |         rank (int | None): Rank of the current process within num_replicas.
18 |         ratio (int): Enlarging ratio. Default: 1.
19 |     """
20 | 
21 |     def __init__(self, dataset, num_replicas, rank, ratio=1):
22 |         self.dataset = dataset
23 |         self.num_replicas = num_replicas
24 |         self.rank = rank
25 |         self.epoch = 0
26 |         self.num_samples = math.ceil(len(self.dataset) * ratio / self.num_replicas)
27 |         self.total_size = self.num_samples * self.num_replicas
28 | 
29 |     def __iter__(self):
30 |         # deterministically shuffle based on epoch
31 |         g = torch.Generator()
32 |         g.manual_seed(self.epoch)
33 |         indices = torch.randperm(self.total_size, generator=g).tolist()
34 | 
35 |         dataset_size = len(self.dataset)
36 |         indices = [v % dataset_size for v in indices]
37 | 
38 |         # subsample
39 |         indices = indices[self.rank:self.total_size:self.num_replicas]
40 |         assert len(indices) == self.num_samples
41 | 
42 |         return iter(indices)
43 | 
44 |     def __len__(self):
45 |         return self.num_samples
46 | 
47 |     def set_epoch(self, epoch):
48 |         self.epoch = epoch
49 | 


--------------------------------------------------------------------------------
/basicsr/data/ffhq_dataset.py:
--------------------------------------------------------------------------------
 1 | from os import path as osp
 2 | from torch.utils import data as data
 3 | from torchvision.transforms.functional import normalize
 4 | 
 5 | from basicsr.data.transforms import augment
 6 | from basicsr.utils import FileClient, imfrombytes, img2tensor
 7 | from basicsr.utils.registry import DATASET_REGISTRY
 8 | 
 9 | 
10 | @DATASET_REGISTRY.register()
11 | class FFHQDataset(data.Dataset):
12 |     """FFHQ dataset for StyleGAN.
13 | 
14 |     Args:
15 |         opt (dict): Config for train datasets. It contains the following keys:
16 |             dataroot_gt (str): Data root path for gt.
17 |             io_backend (dict): IO backend type and other kwarg.
18 |             mean (list | tuple): Image mean.
19 |             std (list | tuple): Image std.
20 |             use_hflip (bool): Whether to horizontally flip.
21 | 
22 |     """
23 | 
24 |     def __init__(self, opt):
25 |         super(FFHQDataset, self).__init__()
26 |         self.opt = opt
27 |         # file client (io backend)
28 |         self.file_client = None
29 |         self.io_backend_opt = opt['io_backend']
30 | 
31 |         self.gt_folder = opt['dataroot_gt']
32 |         self.mean = opt['mean']
33 |         self.std = opt['std']
34 | 
35 |         if self.io_backend_opt['type'] == 'lmdb':
36 |             self.io_backend_opt['db_paths'] = self.gt_folder
37 |             if not self.gt_folder.endswith('.lmdb'):
38 |                 raise ValueError("'dataroot_gt' should end with '.lmdb', " f'but received {self.gt_folder}')
39 |             with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin:
40 |                 self.paths = [line.split('.')[0] for line in fin]
41 |         else:
42 |             # FFHQ has 70000 images in total
43 |             self.paths = [osp.join(self.gt_folder, f'{v:08d}.png') for v in range(70000)]
44 | 
45 |     def __getitem__(self, index):
46 |         if self.file_client is None:
47 |             self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
48 | 
49 |         # load gt image
50 |         gt_path = self.paths[index]
51 |         img_bytes = self.file_client.get(gt_path)
52 |         img_gt = imfrombytes(img_bytes, float32=True)
53 | 
54 |         # random horizontal flip
55 |         img_gt = augment(img_gt, hflip=self.opt['use_hflip'], rotation=False)
56 |         # BGR to RGB, HWC to CHW, numpy to tensor
57 |         img_gt = img2tensor(img_gt, bgr2rgb=True, float32=True)
58 |         # normalize
59 |         normalize(img_gt, self.mean, self.std, inplace=True)
60 |         return {'gt': img_gt, 'gt_path': gt_path}
61 | 
62 |     def __len__(self):
63 |         return len(self.paths)
64 | 


--------------------------------------------------------------------------------
/basicsr/data/meta_info/meta_info_REDS4_test_GT.txt:
--------------------------------------------------------------------------------
1 | 000 100 (720,1280,3)
2 | 011 100 (720,1280,3)
3 | 015 100 (720,1280,3)
4 | 020 100 (720,1280,3)
5 | 


--------------------------------------------------------------------------------
/basicsr/data/meta_info/meta_info_REDSofficial4_test_GT.txt:
--------------------------------------------------------------------------------
1 | 240 100 (720,1280,3)
2 | 241 100 (720,1280,3)
3 | 246 100 (720,1280,3)
4 | 257 100 (720,1280,3)
5 | 


--------------------------------------------------------------------------------
/basicsr/data/meta_info/meta_info_REDSval_official_test_GT.txt:
--------------------------------------------------------------------------------
 1 | 240 100 (720,1280,3)
 2 | 241 100 (720,1280,3)
 3 | 242 100 (720,1280,3)
 4 | 243 100 (720,1280,3)
 5 | 244 100 (720,1280,3)
 6 | 245 100 (720,1280,3)
 7 | 246 100 (720,1280,3)
 8 | 247 100 (720,1280,3)
 9 | 248 100 (720,1280,3)
10 | 249 100 (720,1280,3)
11 | 250 100 (720,1280,3)
12 | 251 100 (720,1280,3)
13 | 252 100 (720,1280,3)
14 | 253 100 (720,1280,3)
15 | 254 100 (720,1280,3)
16 | 255 100 (720,1280,3)
17 | 256 100 (720,1280,3)
18 | 257 100 (720,1280,3)
19 | 258 100 (720,1280,3)
20 | 259 100 (720,1280,3)
21 | 260 100 (720,1280,3)
22 | 261 100 (720,1280,3)
23 | 262 100 (720,1280,3)
24 | 263 100 (720,1280,3)
25 | 264 100 (720,1280,3)
26 | 265 100 (720,1280,3)
27 | 266 100 (720,1280,3)
28 | 267 100 (720,1280,3)
29 | 268 100 (720,1280,3)
30 | 269 100 (720,1280,3)
31 | 


--------------------------------------------------------------------------------
/basicsr/data/paired_image_dataset.py:
--------------------------------------------------------------------------------
  1 | from torch.utils import data as data
  2 | from torchvision.transforms.functional import normalize
  3 | 
  4 | from basicsr.data.data_util import paired_paths_from_folder, paired_paths_from_lmdb, paired_paths_from_meta_info_file
  5 | from basicsr.data.transforms import augment, paired_random_crop
  6 | from basicsr.utils import FileClient, imfrombytes, img2tensor
  7 | from basicsr.utils.registry import DATASET_REGISTRY
  8 | 
  9 | 
 10 | @DATASET_REGISTRY.register()
 11 | class PairedImageDataset(data.Dataset):
 12 |     """Paired image dataset for image restoration.
 13 | 
 14 |     Read LQ (Low Quality, e.g. LR (Low Resolution), blurry, noisy, etc) and
 15 |     GT image pairs.
 16 | 
 17 |     There are three modes:
 18 |     1. 'lmdb': Use lmdb files.
 19 |         If opt['io_backend'] == lmdb.
 20 |     2. 'meta_info_file': Use meta information file to generate paths.
 21 |         If opt['io_backend'] != lmdb and opt['meta_info_file'] is not None.
 22 |     3. 'folder': Scan folders to generate paths.
 23 |         The rest.
 24 | 
 25 |     Args:
 26 |         opt (dict): Config for train datasets. It contains the following keys:
 27 |             dataroot_gt (str): Data root path for gt.
 28 |             dataroot_lq (str): Data root path for lq.
 29 |             meta_info_file (str): Path for meta information file.
 30 |             io_backend (dict): IO backend type and other kwarg.
 31 |             filename_tmpl (str): Template for each filename. Note that the
 32 |                 template excludes the file extension. Default: '{}'.
 33 |             gt_size (int): Cropped patched size for gt patches.
 34 |             use_flip (bool): Use horizontal flips.
 35 |             use_rot (bool): Use rotation (use vertical flip and transposing h
 36 |                 and w for implementation).
 37 | 
 38 |             scale (bool): Scale, which will be added automatically.
 39 |             phase (str): 'train' or 'val'.
 40 |     """
 41 | 
 42 |     def __init__(self, opt):
 43 |         super(PairedImageDataset, self).__init__()
 44 |         self.opt = opt
 45 |         # file client (io backend)
 46 |         self.file_client = None
 47 |         self.io_backend_opt = opt['io_backend']
 48 |         self.mean = opt['mean'] if 'mean' in opt else None
 49 |         self.std = opt['std'] if 'std' in opt else None
 50 | 
 51 |         self.gt_folder, self.lq_folder = opt['dataroot_gt'], opt['dataroot_lq']
 52 |         if 'filename_tmpl' in opt:
 53 |             self.filename_tmpl = opt['filename_tmpl']
 54 |         else:
 55 |             self.filename_tmpl = '{}'
 56 | 
 57 |         if self.io_backend_opt['type'] == 'lmdb':
 58 |             self.io_backend_opt['db_paths'] = [self.lq_folder, self.gt_folder]
 59 |             self.io_backend_opt['client_keys'] = ['lq', 'gt']
 60 |             self.paths = paired_paths_from_lmdb([self.lq_folder, self.gt_folder], ['lq', 'gt'])
 61 |         elif 'meta_info_file' in self.opt and self.opt['meta_info_file'] is not None:
 62 |             self.paths = paired_paths_from_meta_info_file([self.lq_folder, self.gt_folder], ['lq', 'gt'],
 63 |                                                           self.opt['meta_info_file'], self.filename_tmpl)
 64 |         else:
 65 |             self.paths = paired_paths_from_folder([self.lq_folder, self.gt_folder], ['lq', 'gt'], self.filename_tmpl)
 66 | 
 67 |     def __getitem__(self, index):
 68 |         if self.file_client is None:
 69 |             self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
 70 | 
 71 |         scale = self.opt['scale']
 72 | 
 73 |         # Load gt and lq images. Dimension order: HWC; channel order: BGR;
 74 |         # image range: [0, 1], float32.
 75 |         gt_path = self.paths[index]['gt_path']
 76 |         img_bytes = self.file_client.get(gt_path, 'gt')
 77 |         img_gt = imfrombytes(img_bytes, float32=True)
 78 |         lq_path = self.paths[index]['lq_path']
 79 |         img_bytes = self.file_client.get(lq_path, 'lq')
 80 |         img_lq = imfrombytes(img_bytes, float32=True)
 81 | 
 82 |         # augmentation for training
 83 |         if self.opt['phase'] == 'train':
 84 |             gt_size = self.opt['gt_size']
 85 |             # random crop
 86 |             img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path)
 87 |             # flip, rotation
 88 |             img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_flip'], self.opt['use_rot'])
 89 | 
 90 |         # TODO: color space transform
 91 |         # BGR to RGB, HWC to CHW, numpy to tensor
 92 |         img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True)
 93 |         # normalize
 94 |         if self.mean is not None or self.std is not None:
 95 |             normalize(img_lq, self.mean, self.std, inplace=True)
 96 |             normalize(img_gt, self.mean, self.std, inplace=True)
 97 | 
 98 |         return {'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path}
 99 | 
100 |     def __len__(self):
101 |         return len(self.paths)
102 | 


--------------------------------------------------------------------------------
/basicsr/data/prefetch_dataloader.py:
--------------------------------------------------------------------------------
  1 | import queue as Queue
  2 | import threading
  3 | import torch
  4 | from torch.utils.data import DataLoader
  5 | 
  6 | 
  7 | class PrefetchGenerator(threading.Thread):
  8 |     """A general prefetch generator.
  9 | 
 10 |     Ref:
 11 |     https://stackoverflow.com/questions/7323664/python-generator-pre-fetch
 12 | 
 13 |     Args:
 14 |         generator: Python generator.
 15 |         num_prefetch_queue (int): Number of prefetch queue.
 16 |     """
 17 | 
 18 |     def __init__(self, generator, num_prefetch_queue):
 19 |         threading.Thread.__init__(self)
 20 |         self.queue = Queue.Queue(num_prefetch_queue)
 21 |         self.generator = generator
 22 |         self.daemon = True
 23 |         self.start()
 24 | 
 25 |     def run(self):
 26 |         for item in self.generator:
 27 |             self.queue.put(item)
 28 |         self.queue.put(None)
 29 | 
 30 |     def __next__(self):
 31 |         next_item = self.queue.get()
 32 |         if next_item is None:
 33 |             raise StopIteration
 34 |         return next_item
 35 | 
 36 |     def __iter__(self):
 37 |         return self
 38 | 
 39 | 
 40 | class PrefetchDataLoader(DataLoader):
 41 |     """Prefetch version of dataloader.
 42 | 
 43 |     Ref:
 44 |     https://github.com/IgorSusmelj/pytorch-styleguide/issues/5#
 45 | 
 46 |     TODO:
 47 |     Need to test on single gpu and ddp (multi-gpu). There is a known issue in
 48 |     ddp.
 49 | 
 50 |     Args:
 51 |         num_prefetch_queue (int): Number of prefetch queue.
 52 |         kwargs (dict): Other arguments for dataloader.
 53 |     """
 54 | 
 55 |     def __init__(self, num_prefetch_queue, **kwargs):
 56 |         self.num_prefetch_queue = num_prefetch_queue
 57 |         super(PrefetchDataLoader, self).__init__(**kwargs)
 58 | 
 59 |     def __iter__(self):
 60 |         return PrefetchGenerator(super().__iter__(), self.num_prefetch_queue)
 61 | 
 62 | 
 63 | class CPUPrefetcher():
 64 |     """CPU prefetcher.
 65 | 
 66 |     Args:
 67 |         loader: Dataloader.
 68 |     """
 69 | 
 70 |     def __init__(self, loader):
 71 |         self.ori_loader = loader
 72 |         self.loader = iter(loader)
 73 | 
 74 |     def next(self):
 75 |         try:
 76 |             return next(self.loader)
 77 |         except StopIteration:
 78 |             return None
 79 | 
 80 |     def reset(self):
 81 |         self.loader = iter(self.ori_loader)
 82 | 
 83 | 
 84 | class CUDAPrefetcher():
 85 |     """CUDA prefetcher.
 86 | 
 87 |     Ref:
 88 |     https://github.com/NVIDIA/apex/issues/304#
 89 | 
 90 |     It may consums more GPU memory.
 91 | 
 92 |     Args:
 93 |         loader: Dataloader.
 94 |         opt (dict): Options.
 95 |     """
 96 | 
 97 |     def __init__(self, loader, opt):
 98 |         self.ori_loader = loader
 99 |         self.loader = iter(loader)
100 |         self.opt = opt
101 |         self.stream = torch.cuda.Stream()
102 |         self.device = torch.device('cuda' if opt['num_gpu'] != 0 else 'cpu')
103 |         self.preload()
104 | 
105 |     def preload(self):
106 |         try:
107 |             self.batch = next(self.loader)  # self.batch is a dict
108 |         except StopIteration:
109 |             self.batch = None
110 |             return None
111 |         # put tensors to gpu
112 |         with torch.cuda.stream(self.stream):
113 |             for k, v in self.batch.items():
114 |                 if torch.is_tensor(v):
115 |                     self.batch[k] = self.batch[k].to(device=self.device, non_blocking=True)
116 | 
117 |     def next(self):
118 |         torch.cuda.current_stream().wait_stream(self.stream)
119 |         batch = self.batch
120 |         self.preload()
121 |         return batch
122 | 
123 |     def reset(self):
124 |         self.loader = iter(self.ori_loader)
125 |         self.preload()
126 | 


--------------------------------------------------------------------------------
/basicsr/data/single_image_dataset.py:
--------------------------------------------------------------------------------
 1 | from os import path as osp
 2 | from torch.utils import data as data
 3 | from torchvision.transforms.functional import normalize
 4 | 
 5 | from basicsr.data.data_util import paths_from_lmdb
 6 | from basicsr.utils import FileClient, imfrombytes, img2tensor, scandir
 7 | from basicsr.utils.registry import DATASET_REGISTRY
 8 | 
 9 | 
10 | @DATASET_REGISTRY.register()
11 | class SingleImageDataset(data.Dataset):
12 |     """Read only lq images in the test phase.
13 | 
14 |     Read LQ (Low Quality, e.g. LR (Low Resolution), blurry, noisy, etc).
15 | 
16 |     There are two modes:
17 |     1. 'meta_info_file': Use meta information file to generate paths.
18 |     2. 'folder': Scan folders to generate paths.
19 | 
20 |     Args:
21 |         opt (dict): Config for train datasets. It contains the following keys:
22 |             dataroot_lq (str): Data root path for lq.
23 |             meta_info_file (str): Path for meta information file.
24 |             io_backend (dict): IO backend type and other kwarg.
25 |     """
26 | 
27 |     def __init__(self, opt):
28 |         super(SingleImageDataset, self).__init__()
29 |         self.opt = opt
30 |         # file client (io backend)
31 |         self.file_client = None
32 |         self.io_backend_opt = opt['io_backend']
33 |         self.mean = opt['mean'] if 'mean' in opt else None
34 |         self.std = opt['std'] if 'std' in opt else None
35 |         self.lq_folder = opt['dataroot_lq']
36 | 
37 |         if self.io_backend_opt['type'] == 'lmdb':
38 |             self.io_backend_opt['db_paths'] = [self.lq_folder]
39 |             self.io_backend_opt['client_keys'] = ['lq']
40 |             self.paths = paths_from_lmdb(self.lq_folder)
41 |         elif 'meta_info_file' in self.opt:
42 |             with open(self.opt['meta_info_file'], 'r') as fin:
43 |                 self.paths = [osp.join(self.lq_folder, line.split(' ')[0]) for line in fin]
44 |         else:
45 |             self.paths = sorted(list(scandir(self.lq_folder, full_path=True)))
46 | 
47 |     def __getitem__(self, index):
48 |         if self.file_client is None:
49 |             self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
50 | 
51 |         # load lq image
52 |         lq_path = self.paths[index]
53 |         img_bytes = self.file_client.get(lq_path, 'lq')
54 |         img_lq = imfrombytes(img_bytes, float32=True)
55 | 
56 |         # TODO: color space transform
57 |         # BGR to RGB, HWC to CHW, numpy to tensor
58 |         img_lq = img2tensor(img_lq, bgr2rgb=True, float32=True)
59 |         # normalize
60 |         if self.mean is not None or self.std is not None:
61 |             normalize(img_lq, self.mean, self.std, inplace=True)
62 |         return {'lq': img_lq, 'lq_path': lq_path}
63 | 
64 |     def __len__(self):
65 |         return len(self.paths)
66 | 


--------------------------------------------------------------------------------
/basicsr/data/vimeo90k_dataset.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import torch
  3 | from pathlib import Path
  4 | from torch.utils import data as data
  5 | 
  6 | from basicsr.data.transforms import augment, paired_random_crop
  7 | from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
  8 | from basicsr.utils.registry import DATASET_REGISTRY
  9 | 
 10 | 
 11 | @DATASET_REGISTRY.register()
 12 | class Vimeo90KDataset(data.Dataset):
 13 |     """Vimeo90K dataset for training.
 14 | 
 15 |     The keys are generated from a meta info txt file.
 16 |     basicsr/data/meta_info/meta_info_Vimeo90K_train_GT.txt
 17 | 
 18 |     Each line contains:
 19 |     1. clip name; 2. frame number; 3. image shape, seperated by a white space.
 20 |     Examples:
 21 |         00001/0001 7 (256,448,3)
 22 |         00001/0002 7 (256,448,3)
 23 | 
 24 |     Key examples: "00001/0001"
 25 |     GT (gt): Ground-Truth;
 26 |     LQ (lq): Low-Quality, e.g., low-resolution/blurry/noisy/compressed frames.
 27 | 
 28 |     The neighboring frame list for different num_frame:
 29 |     num_frame | frame list
 30 |              1 | 4
 31 |              3 | 3,4,5
 32 |              5 | 2,3,4,5,6
 33 |              7 | 1,2,3,4,5,6,7
 34 | 
 35 |     Args:
 36 |         opt (dict): Config for train dataset. It contains the following keys:
 37 |             dataroot_gt (str): Data root path for gt.
 38 |             dataroot_lq (str): Data root path for lq.
 39 |             meta_info_file (str): Path for meta information file.
 40 |             io_backend (dict): IO backend type and other kwarg.
 41 | 
 42 |             num_frame (int): Window size for input frames.
 43 |             gt_size (int): Cropped patched size for gt patches.
 44 |             random_reverse (bool): Random reverse input frames.
 45 |             use_flip (bool): Use horizontal flips.
 46 |             use_rot (bool): Use rotation (use vertical flip and transposing h
 47 |                 and w for implementation).
 48 | 
 49 |             scale (bool): Scale, which will be added automatically.
 50 |     """
 51 | 
 52 |     def __init__(self, opt):
 53 |         super(Vimeo90KDataset, self).__init__()
 54 |         self.opt = opt
 55 |         self.gt_root, self.lq_root = Path(opt['dataroot_gt']), Path(opt['dataroot_lq'])
 56 | 
 57 |         with open(opt['meta_info_file'], 'r') as fin:
 58 |             self.keys = [line.split(' ')[0] for line in fin]
 59 | 
 60 |         # file client (io backend)
 61 |         self.file_client = None
 62 |         self.io_backend_opt = opt['io_backend']
 63 |         self.is_lmdb = False
 64 |         if self.io_backend_opt['type'] == 'lmdb':
 65 |             self.is_lmdb = True
 66 |             self.io_backend_opt['db_paths'] = [self.lq_root, self.gt_root]
 67 |             self.io_backend_opt['client_keys'] = ['lq', 'gt']
 68 | 
 69 |         # indices of input images
 70 |         self.neighbor_list = [i + (9 - opt['num_frame']) // 2 for i in range(opt['num_frame'])]
 71 | 
 72 |         # temporal augmentation configs
 73 |         self.random_reverse = opt['random_reverse']
 74 |         logger = get_root_logger()
 75 |         logger.info(f'Random reverse is {self.random_reverse}.')
 76 | 
 77 |     def __getitem__(self, index):
 78 |         if self.file_client is None:
 79 |             self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
 80 | 
 81 |         # random reverse
 82 |         if self.random_reverse and random.random() < 0.5:
 83 |             self.neighbor_list.reverse()
 84 | 
 85 |         scale = self.opt['scale']
 86 |         gt_size = self.opt['gt_size']
 87 |         key = self.keys[index]
 88 |         clip, seq = key.split('/')  # key example: 00001/0001
 89 | 
 90 |         # get the GT frame (im4.png)
 91 |         if self.is_lmdb:
 92 |             img_gt_path = f'{key}/im4'
 93 |         else:
 94 |             img_gt_path = self.gt_root / clip / seq / 'im4.png'
 95 |         img_bytes = self.file_client.get(img_gt_path, 'gt')
 96 |         img_gt = imfrombytes(img_bytes, float32=True)
 97 | 
 98 |         # get the neighboring LQ frames
 99 |         img_lqs = []
100 |         for neighbor in self.neighbor_list:
101 |             if self.is_lmdb:
102 |                 img_lq_path = f'{clip}/{seq}/im{neighbor}'
103 |             else:
104 |                 img_lq_path = self.lq_root / clip / seq / f'im{neighbor}.png'
105 |             img_bytes = self.file_client.get(img_lq_path, 'lq')
106 |             img_lq = imfrombytes(img_bytes, float32=True)
107 |             img_lqs.append(img_lq)
108 | 
109 |         # randomly crop
110 |         img_gt, img_lqs = paired_random_crop(img_gt, img_lqs, gt_size, scale, img_gt_path)
111 | 
112 |         # augmentation - flip, rotate
113 |         img_lqs.append(img_gt)
114 |         img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot'])
115 | 
116 |         img_results = img2tensor(img_results)
117 |         img_lqs = torch.stack(img_results[0:-1], dim=0)
118 |         img_gt = img_results[-1]
119 | 
120 |         # img_lqs: (t, c, h, w)
121 |         # img_gt: (c, h, w)
122 |         # key: str
123 |         return {'lq': img_lqs, 'gt': img_gt, 'key': key}
124 | 
125 |     def __len__(self):
126 |         return len(self.keys)
127 | 


--------------------------------------------------------------------------------
/basicsr/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | 
 3 | from basicsr.utils import get_root_logger
 4 | from basicsr.utils.registry import LOSS_REGISTRY
 5 | from .losses import (CharbonnierLoss, GANLoss, L1Loss, MSELoss, PerceptualLoss, WeightedTVLoss, g_path_regularize,
 6 |                      gradient_penalty_loss, r1_penalty)
 7 | 
 8 | __all__ = [
 9 |     'L1Loss', 'MSELoss', 'CharbonnierLoss', 'WeightedTVLoss', 'PerceptualLoss', 'GANLoss', 'gradient_penalty_loss',
10 |     'r1_penalty', 'g_path_regularize'
11 | ]
12 | 
13 | 
14 | def build_loss(opt):
15 |     """Build loss from options.
16 | 
17 |     Args:
18 |         opt (dict): Configuration. It must constain:
19 |             type (str): Model type.
20 |     """
21 |     opt = deepcopy(opt)
22 |     loss_type = opt.pop('type')
23 |     loss = LOSS_REGISTRY.get(loss_type)(**opt)
24 |     logger = get_root_logger()
25 |     logger.info(f'Loss [{loss.__class__.__name__}] is created.')
26 |     return loss
27 | 


--------------------------------------------------------------------------------
/basicsr/losses/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/losses/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/losses/__pycache__/loss_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/losses/__pycache__/loss_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/losses/__pycache__/losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/losses/__pycache__/losses.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/losses/loss_util.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | def reduce_loss(loss, reduction):
 6 |     """Reduce loss as specified.
 7 | 
 8 |     Args:
 9 |         loss (Tensor): Elementwise loss tensor.
10 |         reduction (str): Options are 'none', 'mean' and 'sum'.
11 | 
12 |     Returns:
13 |         Tensor: Reduced loss tensor.
14 |     """
15 |     reduction_enum = F._Reduction.get_enum(reduction)
16 |     # none: 0, elementwise_mean:1, sum: 2
17 |     if reduction_enum == 0:
18 |         return loss
19 |     elif reduction_enum == 1:
20 |         return loss.mean()
21 |     else:
22 |         return loss.sum()
23 | 
24 | 
25 | def weight_reduce_loss(loss, weight=None, reduction='mean'):
26 |     """Apply element-wise weight and reduce loss.
27 | 
28 |     Args:
29 |         loss (Tensor): Element-wise loss.
30 |         weight (Tensor): Element-wise weights. Default: None.
31 |         reduction (str): Same as built-in losses of PyTorch. Options are
32 |             'none', 'mean' and 'sum'. Default: 'mean'.
33 | 
34 |     Returns:
35 |         Tensor: Loss values.
36 |     """
37 |     # if weight is specified, apply element-wise weight
38 |     if weight is not None:
39 |         assert weight.dim() == loss.dim()
40 |         assert weight.size(1) == 1 or weight.size(1) == loss.size(1)
41 |         loss = loss * weight
42 | 
43 |     # if weight is not specified or reduction is sum, just reduce the loss
44 |     if weight is None or reduction == 'sum':
45 |         loss = reduce_loss(loss, reduction)
46 |     # if reduction is mean, then compute mean over weight region
47 |     elif reduction == 'mean':
48 |         if weight.size(1) > 1:
49 |             weight = weight.sum()
50 |         else:
51 |             weight = weight.sum() * loss.size(1)
52 |         loss = loss.sum() / weight
53 | 
54 |     return loss
55 | 
56 | 
57 | def weighted_loss(loss_func):
58 |     """Create a weighted version of a given loss function.
59 | 
60 |     To use this decorator, the loss function must have the signature like
61 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
62 |     element-wise loss without any reduction. This decorator will add weight
63 |     and reduction arguments to the function. The decorated function will have
64 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
65 |     **kwargs)`.
66 | 
67 |     :Example:
68 | 
69 |     >>> import torch
70 |     >>> @weighted_loss
71 |     >>> def l1_loss(pred, target):
72 |     >>>     return (pred - target).abs()
73 | 
74 |     >>> pred = torch.Tensor([0, 2, 3])
75 |     >>> target = torch.Tensor([1, 1, 1])
76 |     >>> weight = torch.Tensor([1, 0, 1])
77 | 
78 |     >>> l1_loss(pred, target)
79 |     tensor(1.3333)
80 |     >>> l1_loss(pred, target, weight)
81 |     tensor(1.5000)
82 |     >>> l1_loss(pred, target, reduction='none')
83 |     tensor([1., 1., 2.])
84 |     >>> l1_loss(pred, target, weight, reduction='sum')
85 |     tensor(3.)
86 |     """
87 | 
88 |     @functools.wraps(loss_func)
89 |     def wrapper(pred, target, weight=None, reduction='mean', **kwargs):
90 |         # get element-wise loss
91 |         loss = loss_func(pred, target, **kwargs)
92 |         loss = weight_reduce_loss(loss, weight, reduction)
93 |         return loss
94 | 
95 |     return wrapper
96 | 


--------------------------------------------------------------------------------
/basicsr/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | 
 3 | from basicsr.utils.registry import METRIC_REGISTRY
 4 | from .niqe import calculate_niqe
 5 | from .psnr_ssim import calculate_psnr, calculate_ssim
 6 | 
 7 | __all__ = ['calculate_psnr', 'calculate_ssim', 'calculate_niqe']
 8 | 
 9 | 
10 | def calculate_metric(data, opt):
11 |     """Calculate metric from data and options.
12 | 
13 |     Args:
14 |         opt (dict): Configuration. It must constain:
15 |             type (str): Model type.
16 |     """
17 |     opt = deepcopy(opt)
18 |     metric_type = opt.pop('type')
19 |     metric = METRIC_REGISTRY.get(metric_type)(**data, **opt)
20 |     return metric
21 | 


--------------------------------------------------------------------------------
/basicsr/metrics/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/metrics/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/metrics/__pycache__/metric_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/metrics/__pycache__/metric_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/metrics/__pycache__/niqe.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/metrics/__pycache__/niqe.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/metrics/__pycache__/psnr_ssim.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/metrics/__pycache__/psnr_ssim.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/metrics/fid.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from scipy import linalg
 5 | from tqdm import tqdm
 6 | 
 7 | from basicsr.archs.inception import InceptionV3
 8 | 
 9 | 
10 | def load_patched_inception_v3(device='cuda', resize_input=True, normalize_input=False):
11 |     # we may not resize the input, but in [rosinality/stylegan2-pytorch] it
12 |     # does resize the input.
13 |     inception = InceptionV3([3], resize_input=resize_input, normalize_input=normalize_input)
14 |     inception = nn.DataParallel(inception).eval().to(device)
15 |     return inception
16 | 
17 | 
18 | @torch.no_grad()
19 | def extract_inception_features(data_generator, inception, len_generator=None, device='cuda'):
20 |     """Extract inception features.
21 | 
22 |     Args:
23 |         data_generator (generator): A data generator.
24 |         inception (nn.Module): Inception model.
25 |         len_generator (int): Length of the data_generator to show the
26 |             progressbar. Default: None.
27 |         device (str): Device. Default: cuda.
28 | 
29 |     Returns:
30 |         Tensor: Extracted features.
31 |     """
32 |     if len_generator is not None:
33 |         pbar = tqdm(total=len_generator, unit='batch', desc='Extract')
34 |     else:
35 |         pbar = None
36 |     features = []
37 | 
38 |     for data in data_generator:
39 |         if pbar:
40 |             pbar.update(1)
41 |         data = data.to(device)
42 |         feature = inception(data)[0].view(data.shape[0], -1)
43 |         features.append(feature.to('cpu'))
44 |     if pbar:
45 |         pbar.close()
46 |     features = torch.cat(features, 0)
47 |     return features
48 | 
49 | 
50 | def calculate_fid(mu1, sigma1, mu2, sigma2, eps=1e-6):
51 |     """Numpy implementation of the Frechet Distance.
52 | 
53 |     The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
54 |     and X_2 ~ N(mu_2, C_2) is
55 |         d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
56 |     Stable version by Dougal J. Sutherland.
57 | 
58 |     Args:
59 |         mu1 (np.array): The sample mean over activations.
60 |         sigma1 (np.array): The covariance matrix over activations for
61 |             generated samples.
62 |         mu2 (np.array): The sample mean over activations, precalculated on an
63 |                representative data set.
64 |         sigma2 (np.array): The covariance matrix over activations,
65 |             precalculated on an representative data set.
66 | 
67 |     Returns:
68 |         float: The Frechet Distance.
69 |     """
70 |     assert mu1.shape == mu2.shape, 'Two mean vectors have different lengths'
71 |     assert sigma1.shape == sigma2.shape, ('Two covariances have different dimensions')
72 | 
73 |     cov_sqrt, _ = linalg.sqrtm(sigma1 @ sigma2, disp=False)
74 | 
75 |     # Product might be almost singular
76 |     if not np.isfinite(cov_sqrt).all():
77 |         print('Product of cov matrices is singular. Adding {eps} to diagonal ' 'of cov estimates')
78 |         offset = np.eye(sigma1.shape[0]) * eps
79 |         cov_sqrt = linalg.sqrtm((sigma1 + offset) @ (sigma2 + offset))
80 | 
81 |     # Numerical error might give slight imaginary component
82 |     if np.iscomplexobj(cov_sqrt):
83 |         if not np.allclose(np.diagonal(cov_sqrt).imag, 0, atol=1e-3):
84 |             m = np.max(np.abs(cov_sqrt.imag))
85 |             raise ValueError(f'Imaginary component {m}')
86 |         cov_sqrt = cov_sqrt.real
87 | 
88 |     mean_diff = mu1 - mu2
89 |     mean_norm = mean_diff @ mean_diff
90 |     trace = np.trace(sigma1) + np.trace(sigma2) - 2 * np.trace(cov_sqrt)
91 |     fid = mean_norm + trace
92 | 
93 |     return fid
94 | 


--------------------------------------------------------------------------------
/basicsr/metrics/metric_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from basicsr.utils.matlab_functions import bgr2ycbcr
 4 | 
 5 | 
 6 | def reorder_image(img, input_order='HWC'):
 7 |     """Reorder images to 'HWC' order.
 8 | 
 9 |     If the input_order is (h, w), return (h, w, 1);
10 |     If the input_order is (c, h, w), return (h, w, c);
11 |     If the input_order is (h, w, c), return as it is.
12 | 
13 |     Args:
14 |         img (ndarray): Input image.
15 |         input_order (str): Whether the input order is 'HWC' or 'CHW'.
16 |             If the input image shape is (h, w), input_order will not have
17 |             effects. Default: 'HWC'.
18 | 
19 |     Returns:
20 |         ndarray: reordered image.
21 |     """
22 | 
23 |     if input_order not in ['HWC', 'CHW']:
24 |         raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are ' "'HWC' and 'CHW'")
25 |     if len(img.shape) == 2:
26 |         img = img[..., None]
27 |     if input_order == 'CHW':
28 |         img = img.transpose(1, 2, 0)
29 |     return img
30 | 
31 | 
32 | def to_y_channel(img):
33 |     """Change to Y channel of YCbCr.
34 | 
35 |     Args:
36 |         img (ndarray): Images with range [0, 255].
37 | 
38 |     Returns:
39 |         (ndarray): Images with range [0, 255] (float type) without round.
40 |     """
41 |     img = img.astype(np.float32) / 255.
42 |     if img.ndim == 3 and img.shape[2] == 3:
43 |         img = bgr2ycbcr(img, y_only=True)
44 |         img = img[..., None]
45 |     return img * 255.
46 | 


--------------------------------------------------------------------------------
/basicsr/metrics/niqe_pris_params.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/metrics/niqe_pris_params.npz


--------------------------------------------------------------------------------
/basicsr/metrics/psnr_ssim.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | from basicsr.metrics.metric_util import reorder_image, to_y_channel
  5 | from basicsr.utils.registry import METRIC_REGISTRY
  6 | 
  7 | 
  8 | @METRIC_REGISTRY.register()
  9 | def calculate_psnr(img1, img2, crop_border, input_order='HWC', test_y_channel=False):
 10 |     """Calculate PSNR (Peak Signal-to-Noise Ratio).
 11 | 
 12 |     Ref: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
 13 | 
 14 |     Args:
 15 |         img1 (ndarray): Images with range [0, 255].
 16 |         img2 (ndarray): Images with range [0, 255].
 17 |         crop_border (int): Cropped pixels in each edge of an image. These
 18 |             pixels are not involved in the PSNR calculation.
 19 |         input_order (str): Whether the input order is 'HWC' or 'CHW'.
 20 |             Default: 'HWC'.
 21 |         test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
 22 | 
 23 |     Returns:
 24 |         float: psnr result.
 25 |     """
 26 | 
 27 |     assert img1.shape == img2.shape, (f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
 28 |     if input_order not in ['HWC', 'CHW']:
 29 |         raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are ' '"HWC" and "CHW"')
 30 |     img1 = reorder_image(img1, input_order=input_order)
 31 |     img2 = reorder_image(img2, input_order=input_order)
 32 |     img1 = img1.astype(np.float64)
 33 |     img2 = img2.astype(np.float64)
 34 | 
 35 |     if crop_border != 0:
 36 |         img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...]
 37 |         img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...]
 38 | 
 39 |     if test_y_channel:
 40 |         img1 = to_y_channel(img1)
 41 |         img2 = to_y_channel(img2)
 42 | 
 43 |     mse = np.mean((img1 - img2)**2)
 44 |     if mse == 0:
 45 |         return float('inf')
 46 |     return 20. * np.log10(255. / np.sqrt(mse))
 47 | 
 48 | 
 49 | def _ssim(img1, img2):
 50 |     """Calculate SSIM (structural similarity) for one channel images.
 51 | 
 52 |     It is called by func:`calculate_ssim`.
 53 | 
 54 |     Args:
 55 |         img1 (ndarray): Images with range [0, 255] with order 'HWC'.
 56 |         img2 (ndarray): Images with range [0, 255] with order 'HWC'.
 57 | 
 58 |     Returns:
 59 |         float: ssim result.
 60 |     """
 61 | 
 62 |     C1 = (0.01 * 255)**2
 63 |     C2 = (0.03 * 255)**2
 64 | 
 65 |     img1 = img1.astype(np.float64)
 66 |     img2 = img2.astype(np.float64)
 67 |     kernel = cv2.getGaussianKernel(11, 1.5)
 68 |     window = np.outer(kernel, kernel.transpose())
 69 | 
 70 |     mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]
 71 |     mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
 72 |     mu1_sq = mu1**2
 73 |     mu2_sq = mu2**2
 74 |     mu1_mu2 = mu1 * mu2
 75 |     sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
 76 |     sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
 77 |     sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
 78 | 
 79 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
 80 |     return ssim_map.mean()
 81 | 
 82 | 
 83 | @METRIC_REGISTRY.register()
 84 | def calculate_ssim(img1, img2, crop_border, input_order='HWC', test_y_channel=False):
 85 |     """Calculate SSIM (structural similarity).
 86 | 
 87 |     Ref:
 88 |     Image quality assessment: From error visibility to structural similarity
 89 | 
 90 |     The results are the same as that of the official released MATLAB code in
 91 |     https://ece.uwaterloo.ca/~z70wang/research/ssim/.
 92 | 
 93 |     For three-channel images, SSIM is calculated for each channel and then
 94 |     averaged.
 95 | 
 96 |     Args:
 97 |         img1 (ndarray): Images with range [0, 255].
 98 |         img2 (ndarray): Images with range [0, 255].
 99 |         crop_border (int): Cropped pixels in each edge of an image. These
100 |             pixels are not involved in the SSIM calculation.
101 |         input_order (str): Whether the input order is 'HWC' or 'CHW'.
102 |             Default: 'HWC'.
103 |         test_y_channel (bool): Test on Y channel of YCbCr. Default: False.
104 | 
105 |     Returns:
106 |         float: ssim result.
107 |     """
108 | 
109 |     assert img1.shape == img2.shape, (f'Image shapes are differnet: {img1.shape}, {img2.shape}.')
110 |     if input_order not in ['HWC', 'CHW']:
111 |         raise ValueError(f'Wrong input_order {input_order}. Supported input_orders are ' '"HWC" and "CHW"')
112 |     img1 = reorder_image(img1, input_order=input_order)
113 |     img2 = reorder_image(img2, input_order=input_order)
114 |     img1 = img1.astype(np.float64)
115 |     img2 = img2.astype(np.float64)
116 | 
117 |     if crop_border != 0:
118 |         img1 = img1[crop_border:-crop_border, crop_border:-crop_border, ...]
119 |         img2 = img2[crop_border:-crop_border, crop_border:-crop_border, ...]
120 | 
121 |     if test_y_channel:
122 |         img1 = to_y_channel(img1)
123 |         img2 = to_y_channel(img2)
124 | 
125 |     ssims = []
126 |     for i in range(img1.shape[2]):
127 |         ssims.append(_ssim(img1[..., i], img2[..., i]))
128 |     return np.array(ssims).mean()
129 | 


--------------------------------------------------------------------------------
/basicsr/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from copy import deepcopy
 3 | from os import path as osp
 4 | 
 5 | from basicsr.utils import get_root_logger, scandir
 6 | from basicsr.utils.registry import MODEL_REGISTRY
 7 | 
 8 | __all__ = ['build_model']
 9 | 
10 | # automatically scan and import model modules for registry
11 | # scan all the files under the 'models' folder and collect files ending with
12 | # '_model.py'
13 | model_folder = osp.dirname(osp.abspath(__file__))
14 | model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')]
15 | # import all the model modules
16 | _model_modules = [importlib.import_module(f'basicsr.models.{file_name}') for file_name in model_filenames]
17 | 
18 | 
19 | def build_model(opt):
20 |     """Build model from options.
21 | 
22 |     Args:
23 |         opt (dict): Configuration. It must constain:
24 |             model_type (str): Model type.
25 |     """
26 |     opt = deepcopy(opt)
27 |     model = MODEL_REGISTRY.get(opt['model_type'])(opt)
28 |     logger = get_root_logger()
29 |     logger.info(f'Model [{model.__class__.__name__}] is created.')
30 |     return model
31 | 


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/base_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/base_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/edvr_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/edvr_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/esrgan_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/esrgan_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/lr_scheduler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/lr_scheduler.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/sr_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/sr_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/srgan_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/srgan_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/stylegan2_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/stylegan2_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/video_base_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/video_base_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/__pycache__/video_gan_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/models/__pycache__/video_gan_model.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/models/edvr_model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from torch.nn.parallel import DistributedDataParallel
 3 | 
 4 | from basicsr.utils.registry import MODEL_REGISTRY
 5 | from .video_base_model import VideoBaseModel
 6 | 
 7 | logger = logging.getLogger('basicsr')
 8 | 
 9 | 
10 | @MODEL_REGISTRY.register()
11 | class EDVRModel(VideoBaseModel):
12 |     """EDVR Model.
13 | 
14 |     Paper: EDVR: Video Restoration with Enhanced Deformable Convolutional Networks.  # noqa: E501
15 |     """
16 | 
17 |     def __init__(self, opt):
18 |         super(EDVRModel, self).__init__(opt)
19 |         if self.is_train:
20 |             self.train_tsa_iter = opt['train'].get('tsa_iter')
21 | 
22 |     def setup_optimizers(self):
23 |         train_opt = self.opt['train']
24 |         dcn_lr_mul = train_opt.get('dcn_lr_mul', 1)
25 |         logger.info(f'Multiple the learning rate for dcn with {dcn_lr_mul}.')
26 |         if dcn_lr_mul == 1:
27 |             optim_params = self.net_g.parameters()
28 |         else:  # separate dcn params and normal params for differnet lr
29 |             normal_params = []
30 |             dcn_params = []
31 |             for name, param in self.net_g.named_parameters():
32 |                 if 'dcn' in name:
33 |                     dcn_params.append(param)
34 |                 else:
35 |                     normal_params.append(param)
36 |             optim_params = [
37 |                 {  # add normal params first
38 |                     'params': normal_params,
39 |                     'lr': train_opt['optim_g']['lr']
40 |                 },
41 |                 {
42 |                     'params': dcn_params,
43 |                     'lr': train_opt['optim_g']['lr'] * dcn_lr_mul
44 |                 },
45 |             ]
46 | 
47 |         optim_type = train_opt['optim_g'].pop('type')
48 |         self.optimizer_g = self.get_optimizer(optim_type, optim_params, **train_opt['optim_g'])
49 |         self.optimizers.append(self.optimizer_g)
50 | 
51 |     def optimize_parameters(self, current_iter):
52 |         if self.train_tsa_iter:
53 |             if current_iter == 1:
54 |                 logger.info(f'Only train TSA module for {self.train_tsa_iter} iters.')
55 |                 for name, param in self.net_g.named_parameters():
56 |                     if 'fusion' not in name:
57 |                         param.requires_grad = False
58 |             elif current_iter == self.train_tsa_iter:
59 |                 logger.warning('Train all the parameters.')
60 |                 for param in self.net_g.parameters():
61 |                     param.requires_grad = True
62 |                 if isinstance(self.net_g, DistributedDataParallel):
63 |                     logger.warning('Set net_g.find_unused_parameters = False.')
64 |                     self.net_g.find_unused_parameters = False
65 | 
66 |         super(VideoBaseModel, self).optimize_parameters(current_iter)
67 | 


--------------------------------------------------------------------------------
/basicsr/models/esrgan_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from collections import OrderedDict
 3 | 
 4 | from basicsr.utils.registry import MODEL_REGISTRY
 5 | from .srgan_model import SRGANModel
 6 | 
 7 | 
 8 | @MODEL_REGISTRY.register()
 9 | class ESRGANModel(SRGANModel):
10 |     """ESRGAN model for single image super-resolution."""
11 | 
12 |     def optimize_parameters(self, current_iter):
13 |         # optimize net_g
14 |         for p in self.net_d.parameters():
15 |             p.requires_grad = False
16 | 
17 |         self.optimizer_g.zero_grad()
18 |         self.output = self.net_g(self.lq)
19 | 
20 |         l_g_total = 0
21 |         loss_dict = OrderedDict()
22 |         if (current_iter % self.net_d_iters == 0 and current_iter > self.net_d_init_iters):
23 |             # pixel loss
24 |             if self.cri_pix:
25 |                 l_g_pix = self.cri_pix(self.output, self.gt)
26 |                 l_g_total += l_g_pix
27 |                 loss_dict['l_g_pix'] = l_g_pix
28 |             # perceptual loss
29 |             if self.cri_perceptual:
30 |                 l_g_percep, l_g_style = self.cri_perceptual(self.output, self.gt)
31 |                 if l_g_percep is not None:
32 |                     l_g_total += l_g_percep
33 |                     loss_dict['l_g_percep'] = l_g_percep
34 |                 if l_g_style is not None:
35 |                     l_g_total += l_g_style
36 |                     loss_dict['l_g_style'] = l_g_style
37 |             # gan loss (relativistic gan)
38 |             real_d_pred = self.net_d(self.gt).detach()
39 |             fake_g_pred = self.net_d(self.output)
40 |             l_g_real = self.cri_gan(real_d_pred - torch.mean(fake_g_pred), False, is_disc=False)
41 |             l_g_fake = self.cri_gan(fake_g_pred - torch.mean(real_d_pred), True, is_disc=False)
42 |             l_g_gan = (l_g_real + l_g_fake) / 2
43 | 
44 |             l_g_total += l_g_gan
45 |             loss_dict['l_g_gan'] = l_g_gan
46 | 
47 |             l_g_total.backward()
48 |             self.optimizer_g.step()
49 | 
50 |         # optimize net_d
51 |         for p in self.net_d.parameters():
52 |             p.requires_grad = True
53 | 
54 |         self.optimizer_d.zero_grad()
55 |         # gan loss (relativistic gan)
56 | 
57 |         # In order to avoid the error in distributed training:
58 |         # "Error detected in CudnnBatchNormBackward: RuntimeError: one of
59 |         # the variables needed for gradient computation has been modified by
60 |         # an inplace operation",
61 |         # we separate the backwards for real and fake, and also detach the
62 |         # tensor for calculating mean.
63 | 
64 |         # real
65 |         fake_d_pred = self.net_d(self.output).detach()
66 |         real_d_pred = self.net_d(self.gt)
67 |         l_d_real = self.cri_gan(real_d_pred - torch.mean(fake_d_pred), True, is_disc=True) * 0.5
68 |         l_d_real.backward()
69 |         # fake
70 |         fake_d_pred = self.net_d(self.output.detach())
71 |         l_d_fake = self.cri_gan(fake_d_pred - torch.mean(real_d_pred.detach()), False, is_disc=True) * 0.5
72 |         l_d_fake.backward()
73 |         self.optimizer_d.step()
74 | 
75 |         loss_dict['l_d_real'] = l_d_real
76 |         loss_dict['l_d_fake'] = l_d_fake
77 |         loss_dict['out_d_real'] = torch.mean(real_d_pred.detach())
78 |         loss_dict['out_d_fake'] = torch.mean(fake_d_pred.detach())
79 | 
80 |         self.log_dict = self.reduce_loss_dict(loss_dict)
81 | 
82 |         if self.ema_decay > 0:
83 |             self.model_ema(decay=self.ema_decay)
84 | 


--------------------------------------------------------------------------------
/basicsr/models/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from collections import Counter
 3 | from torch.optim.lr_scheduler import _LRScheduler
 4 | 
 5 | 
 6 | class MultiStepRestartLR(_LRScheduler):
 7 |     """ MultiStep with restarts learning rate scheme.
 8 | 
 9 |     Args:
10 |         optimizer (torch.nn.optimizer): Torch optimizer.
11 |         milestones (list): Iterations that will decrease learning rate.
12 |         gamma (float): Decrease ratio. Default: 0.1.
13 |         restarts (list): Restart iterations. Default: [0].
14 |         restart_weights (list): Restart weights at each restart iteration.
15 |             Default: [1].
16 |         last_epoch (int): Used in _LRScheduler. Default: -1.
17 |     """
18 | 
19 |     def __init__(self, optimizer, milestones, gamma=0.1, restarts=(0, ), restart_weights=(1, ), last_epoch=-1):
20 |         self.milestones = Counter(milestones)
21 |         self.gamma = gamma
22 |         self.restarts = restarts
23 |         self.restart_weights = restart_weights
24 |         assert len(self.restarts) == len(self.restart_weights), 'restarts and their weights do not match.'
25 |         super(MultiStepRestartLR, self).__init__(optimizer, last_epoch)
26 | 
27 |     def get_lr(self):
28 |         if self.last_epoch in self.restarts:
29 |             weight = self.restart_weights[self.restarts.index(self.last_epoch)]
30 |             return [group['initial_lr'] * weight for group in self.optimizer.param_groups]
31 |         if self.last_epoch not in self.milestones:
32 |             return [group['lr'] for group in self.optimizer.param_groups]
33 |         return [group['lr'] * self.gamma**self.milestones[self.last_epoch] for group in self.optimizer.param_groups]
34 | 
35 | 
36 | def get_position_from_periods(iteration, cumulative_period):
37 |     """Get the position from a period list.
38 | 
39 |     It will return the index of the right-closest number in the period list.
40 |     For example, the cumulative_period = [100, 200, 300, 400],
41 |     if iteration == 50, return 0;
42 |     if iteration == 210, return 2;
43 |     if iteration == 300, return 2.
44 | 
45 |     Args:
46 |         iteration (int): Current iteration.
47 |         cumulative_period (list[int]): Cumulative period list.
48 | 
49 |     Returns:
50 |         int: The position of the right-closest number in the period list.
51 |     """
52 |     for i, period in enumerate(cumulative_period):
53 |         if iteration <= period:
54 |             return i
55 | 
56 | 
57 | class CosineAnnealingRestartLR(_LRScheduler):
58 |     """ Cosine annealing with restarts learning rate scheme.
59 | 
60 |     An example of config:
61 |     periods = [10, 10, 10, 10]
62 |     restart_weights = [1, 0.5, 0.5, 0.5]
63 |     eta_min=1e-7
64 | 
65 |     It has four cycles, each has 10 iterations. At 10th, 20th, 30th, the
66 |     scheduler will restart with the weights in restart_weights.
67 | 
68 |     Args:
69 |         optimizer (torch.nn.optimizer): Torch optimizer.
70 |         periods (list): Period for each cosine anneling cycle.
71 |         restart_weights (list): Restart weights at each restart iteration.
72 |             Default: [1].
73 |         eta_min (float): The mimimum lr. Default: 0.
74 |         last_epoch (int): Used in _LRScheduler. Default: -1.
75 |     """
76 | 
77 |     def __init__(self, optimizer, periods, restart_weights=(1, ), eta_min=0, last_epoch=-1):
78 |         self.periods = periods
79 |         self.restart_weights = restart_weights
80 |         self.eta_min = eta_min
81 |         assert (len(self.periods) == len(
82 |             self.restart_weights)), 'periods and restart_weights should have the same length.'
83 |         self.cumulative_period = [sum(self.periods[0:i + 1]) for i in range(0, len(self.periods))]
84 |         super(CosineAnnealingRestartLR, self).__init__(optimizer, last_epoch)
85 | 
86 |     def get_lr(self):
87 |         idx = get_position_from_periods(self.last_epoch, self.cumulative_period)
88 |         current_weight = self.restart_weights[idx]
89 |         nearest_restart = 0 if idx == 0 else self.cumulative_period[idx - 1]
90 |         current_period = self.periods[idx]
91 | 
92 |         return [
93 |             self.eta_min + current_weight * 0.5 * (base_lr - self.eta_min) *
94 |             (1 + math.cos(math.pi * ((self.last_epoch - nearest_restart) / current_period)))
95 |             for base_lr in self.base_lrs
96 |         ]
97 | 


--------------------------------------------------------------------------------
/basicsr/models/video_gan_model.py:
--------------------------------------------------------------------------------
 1 | from basicsr.utils.registry import MODEL_REGISTRY
 2 | from .srgan_model import SRGANModel
 3 | from .video_base_model import VideoBaseModel
 4 | 
 5 | 
 6 | @MODEL_REGISTRY.register()
 7 | class VideoGANModel(SRGANModel, VideoBaseModel):
 8 |     """Video GAN model.
 9 | 
10 |     Use multiple inheritance.
11 |     It will first use the functions of SRGANModel:
12 |         init_training_settings
13 |         setup_optimizers
14 |         optimize_parameters
15 |         save
16 |     Then find functions in VideoBaseModel.
17 |     """
18 | 


--------------------------------------------------------------------------------
/basicsr/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/__init__.py


--------------------------------------------------------------------------------
/basicsr/ops/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, ModulatedDeformConvPack, deform_conv,
2 |                           modulated_deform_conv)
3 | 
4 | __all__ = [
5 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 'ModulatedDeformConvPack', 'deform_conv',
6 |     'modulated_deform_conv'
7 | ]
8 | 


--------------------------------------------------------------------------------
/basicsr/ops/dcn/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/dcn/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/dcn/__pycache__/deform_conv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/dcn/__pycache__/deform_conv.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/fused_act/__init__.py:
--------------------------------------------------------------------------------
1 | from .fused_act import FusedLeakyReLU, fused_leaky_relu
2 | 
3 | __all__ = ['FusedLeakyReLU', 'fused_leaky_relu']
4 | 


--------------------------------------------------------------------------------
/basicsr/ops/fused_act/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/fused_act/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/fused_act/__pycache__/fused_act.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/fused_act/__pycache__/fused_act.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/fused_act/fused_act.py:
--------------------------------------------------------------------------------
 1 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | from torch.autograd import Function
 6 | 
 7 | try:
 8 |     from . import fused_act_ext
 9 | except ImportError:
10 |     import os
11 |     BASICSR_JIT = os.getenv('BASICSR_JIT')
12 |     if BASICSR_JIT == 'True':
13 |         from torch.utils.cpp_extension import load
14 |         module_path = os.path.dirname(__file__)
15 |         fused_act_ext = load(
16 |             'fused',
17 |             sources=[
18 |                 os.path.join(module_path, 'src', 'fused_bias_act.cpp'),
19 |                 os.path.join(module_path, 'src', 'fused_bias_act_kernel.cu'),
20 |             ],
21 |         )
22 | 
23 | 
24 | class FusedLeakyReLUFunctionBackward(Function):
25 | 
26 |     @staticmethod
27 |     def forward(ctx, grad_output, out, negative_slope, scale):
28 |         ctx.save_for_backward(out)
29 |         ctx.negative_slope = negative_slope
30 |         ctx.scale = scale
31 | 
32 |         empty = grad_output.new_empty(0)
33 | 
34 |         grad_input = fused_act_ext.fused_bias_act(grad_output, empty, out, 3, 1, negative_slope, scale)
35 | 
36 |         dim = [0]
37 | 
38 |         if grad_input.ndim > 2:
39 |             dim += list(range(2, grad_input.ndim))
40 | 
41 |         grad_bias = grad_input.sum(dim).detach()
42 | 
43 |         return grad_input, grad_bias
44 | 
45 |     @staticmethod
46 |     def backward(ctx, gradgrad_input, gradgrad_bias):
47 |         out, = ctx.saved_tensors
48 |         gradgrad_out = fused_act_ext.fused_bias_act(gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope,
49 |                                                     ctx.scale)
50 | 
51 |         return gradgrad_out, None, None, None
52 | 
53 | 
54 | class FusedLeakyReLUFunction(Function):
55 | 
56 |     @staticmethod
57 |     def forward(ctx, input, bias, negative_slope, scale):
58 |         empty = input.new_empty(0)
59 |         out = fused_act_ext.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale)
60 |         ctx.save_for_backward(out)
61 |         ctx.negative_slope = negative_slope
62 |         ctx.scale = scale
63 | 
64 |         return out
65 | 
66 |     @staticmethod
67 |     def backward(ctx, grad_output):
68 |         out, = ctx.saved_tensors
69 | 
70 |         grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(grad_output, out, ctx.negative_slope, ctx.scale)
71 | 
72 |         return grad_input, grad_bias, None, None
73 | 
74 | 
75 | class FusedLeakyReLU(nn.Module):
76 | 
77 |     def __init__(self, channel, negative_slope=0.2, scale=2**0.5):
78 |         super().__init__()
79 | 
80 |         self.bias = nn.Parameter(torch.zeros(channel))
81 |         self.negative_slope = negative_slope
82 |         self.scale = scale
83 | 
84 |     def forward(self, input):
85 |         return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
86 | 
87 | 
88 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5):
89 |     return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
90 | 


--------------------------------------------------------------------------------
/basicsr/ops/fused_act/src/fused_bias_act.cpp:
--------------------------------------------------------------------------------
 1 | // from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp
 2 | #include <torch/extension.h>
 3 | 
 4 | 
 5 | torch::Tensor fused_bias_act_op(const torch::Tensor& input,
 6 |                                 const torch::Tensor& bias,
 7 |                                 const torch::Tensor& refer,
 8 |                                 int act, int grad, float alpha, float scale);
 9 | 
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
13 | 
14 | torch::Tensor fused_bias_act(const torch::Tensor& input,
15 |                              const torch::Tensor& bias,
16 |                              const torch::Tensor& refer,
17 |                              int act, int grad, float alpha, float scale) {
18 |     CHECK_CUDA(input);
19 |     CHECK_CUDA(bias);
20 | 
21 |     return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale);
22 | }
23 | 
24 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
25 |     m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)");
26 | }
27 | 


--------------------------------------------------------------------------------
/basicsr/ops/fused_act/src/fused_bias_act_kernel.cu:
--------------------------------------------------------------------------------
  1 | // from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act_kernel.cu
  2 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  3 | //
  4 | // This work is made available under the Nvidia Source Code License-NC.
  5 | // To view a copy of this license, visit
  6 | // https://nvlabs.github.io/stylegan2/license.html
  7 | 
  8 | #include <torch/types.h>
  9 | 
 10 | #include <ATen/ATen.h>
 11 | #include <ATen/AccumulateType.h>
 12 | #include <ATen/cuda/CUDAContext.h>
 13 | #include <ATen/cuda/CUDAApplyUtils.cuh>
 14 | 
 15 | #include <cuda.h>
 16 | #include <cuda_runtime.h>
 17 | 
 18 | 
 19 | template <typename scalar_t>
 20 | static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref,
 21 |     int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) {
 22 |     int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x;
 23 | 
 24 |     scalar_t zero = 0.0;
 25 | 
 26 |     for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) {
 27 |         scalar_t x = p_x[xi];
 28 | 
 29 |         if (use_bias) {
 30 |             x += p_b[(xi / step_b) % size_b];
 31 |         }
 32 | 
 33 |         scalar_t ref = use_ref ? p_ref[xi] : zero;
 34 | 
 35 |         scalar_t y;
 36 | 
 37 |         switch (act * 10 + grad) {
 38 |             default:
 39 |             case 10: y = x; break;
 40 |             case 11: y = x; break;
 41 |             case 12: y = 0.0; break;
 42 | 
 43 |             case 30: y = (x > 0.0) ? x : x * alpha; break;
 44 |             case 31: y = (ref > 0.0) ? x : x * alpha; break;
 45 |             case 32: y = 0.0; break;
 46 |         }
 47 | 
 48 |         out[xi] = y * scale;
 49 |     }
 50 | }
 51 | 
 52 | 
 53 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
 54 |     int act, int grad, float alpha, float scale) {
 55 |     int curDevice = -1;
 56 |     cudaGetDevice(&curDevice);
 57 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);
 58 | 
 59 |     auto x = input.contiguous();
 60 |     auto b = bias.contiguous();
 61 |     auto ref = refer.contiguous();
 62 | 
 63 |     int use_bias = b.numel() ? 1 : 0;
 64 |     int use_ref = ref.numel() ? 1 : 0;
 65 | 
 66 |     int size_x = x.numel();
 67 |     int size_b = b.numel();
 68 |     int step_b = 1;
 69 | 
 70 |     for (int i = 1 + 1; i < x.dim(); i++) {
 71 |         step_b *= x.size(i);
 72 |     }
 73 | 
 74 |     int loop_x = 4;
 75 |     int block_size = 4 * 32;
 76 |     int grid_size = (size_x - 1) / (loop_x * block_size) + 1;
 77 | 
 78 |     auto y = torch::empty_like(x);
 79 | 
 80 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] {
 81 |         fused_bias_act_kernel<scalar_t><<<grid_size, block_size, 0, stream>>>(
 82 |             y.data_ptr<scalar_t>(),
 83 |             x.data_ptr<scalar_t>(),
 84 |             b.data_ptr<scalar_t>(),
 85 |             ref.data_ptr<scalar_t>(),
 86 |             act,
 87 |             grad,
 88 |             alpha,
 89 |             scale,
 90 |             loop_x,
 91 |             size_x,
 92 |             step_b,
 93 |             size_b,
 94 |             use_bias,
 95 |             use_ref
 96 |         );
 97 |     });
 98 | 
 99 |     return y;
100 | }
101 | 


--------------------------------------------------------------------------------
/basicsr/ops/upfirdn2d/__init__.py:
--------------------------------------------------------------------------------
1 | from .upfirdn2d import upfirdn2d
2 | 
3 | __all__ = ['upfirdn2d']
4 | 


--------------------------------------------------------------------------------
/basicsr/ops/upfirdn2d/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/upfirdn2d/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/upfirdn2d/__pycache__/upfirdn2d.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/ops/upfirdn2d/__pycache__/upfirdn2d.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/ops/upfirdn2d/src/upfirdn2d.cpp:
--------------------------------------------------------------------------------
 1 | // from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp
 2 | #include <torch/extension.h>
 3 | 
 4 | 
 5 | torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel,
 6 |                             int up_x, int up_y, int down_x, int down_y,
 7 |                             int pad_x0, int pad_x1, int pad_y0, int pad_y1);
 8 | 
 9 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
10 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
11 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
12 | 
13 | torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
14 |                         int up_x, int up_y, int down_x, int down_y,
15 |                         int pad_x0, int pad_x1, int pad_y0, int pad_y1) {
16 |     CHECK_CUDA(input);
17 |     CHECK_CUDA(kernel);
18 | 
19 |     return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
20 | }
21 | 
22 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
23 |     m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)");
24 | }
25 | 


--------------------------------------------------------------------------------
/basicsr/test.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch
 3 | from os import path as osp
 4 | 
 5 | from basicsr.data import build_dataloader, build_dataset
 6 | from basicsr.models import build_model
 7 | from basicsr.train import parse_options
 8 | from basicsr.utils import get_env_info, get_root_logger, get_time_str, make_exp_dirs
 9 | from basicsr.utils.options import dict2str
10 | 
11 | 
12 | def test_pipeline(root_path):
13 |     # parse options, set distributed setting, set ramdom seed
14 |     opt = parse_options(root_path, is_train=False)
15 | 
16 |     torch.backends.cudnn.benchmark = True
17 |     # torch.backends.cudnn.deterministic = True
18 | 
19 |     # mkdir and initialize loggers
20 |     make_exp_dirs(opt)
21 |     log_file = osp.join(opt['path']['log'], f"test_{opt['name']}_{get_time_str()}.log")
22 |     logger = get_root_logger(logger_name='basicsr', log_level=logging.INFO, log_file=log_file)
23 |     logger.info(get_env_info())
24 |     logger.info(dict2str(opt))
25 | 
26 |     # create test dataset and dataloader
27 |     test_loaders = []
28 |     for phase, dataset_opt in sorted(opt['datasets'].items()):
29 |         test_set = build_dataset(dataset_opt)
30 |         test_loader = build_dataloader(
31 |             test_set, dataset_opt, num_gpu=opt['num_gpu'], dist=opt['dist'], sampler=None, seed=opt['manual_seed'])
32 |         logger.info(f"Number of test images in {dataset_opt['name']}: {len(test_set)}")
33 |         test_loaders.append(test_loader)
34 | 
35 |     # create model
36 |     model = build_model(opt)
37 | 
38 |     for test_loader in test_loaders:
39 |         test_set_name = test_loader.dataset.opt['name']
40 |         logger.info(f'Testing {test_set_name}...')
41 |         model.validation(test_loader, current_iter=opt['name'], tb_logger=None, save_img=opt['val']['save_img'])
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
46 |     test_pipeline(root_path)
47 | 


--------------------------------------------------------------------------------
/basicsr/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .file_client import FileClient
 2 | from .img_util import crop_border, imfrombytes, img2tensor, imwrite, tensor2img
 3 | from .logger import MessageLogger, get_env_info, get_root_logger, init_tb_logger, init_wandb_logger
 4 | from .misc import check_resume, get_time_str, make_exp_dirs, mkdir_and_rename, scandir, set_random_seed, sizeof_fmt
 5 | 
 6 | __all__ = [
 7 |     # file_client.py
 8 |     'FileClient',
 9 |     # img_util.py
10 |     'img2tensor',
11 |     'tensor2img',
12 |     'imfrombytes',
13 |     'imwrite',
14 |     'crop_border',
15 |     # logger.py
16 |     'MessageLogger',
17 |     'init_tb_logger',
18 |     'init_wandb_logger',
19 |     'get_root_logger',
20 |     'get_env_info',
21 |     # misc.py
22 |     'set_random_seed',
23 |     'get_time_str',
24 |     'mkdir_and_rename',
25 |     'make_exp_dirs',
26 |     'scandir',
27 |     'check_resume',
28 |     'sizeof_fmt'
29 | ]
30 | 


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/dist_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/dist_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/file_client.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/file_client.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/flow_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/flow_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/img_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/img_util.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/matlab_functions.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/matlab_functions.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/misc.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/options.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/options.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/__pycache__/registry.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/basicsr/utils/__pycache__/registry.cpython-37.pyc


--------------------------------------------------------------------------------
/basicsr/utils/dist_util.py:
--------------------------------------------------------------------------------
 1 | # Modified from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py  # noqa: E501
 2 | import functools
 3 | import os
 4 | import subprocess
 5 | import torch
 6 | import torch.distributed as dist
 7 | import torch.multiprocessing as mp
 8 | 
 9 | 
10 | def init_dist(launcher, backend='nccl', **kwargs):
11 |     if mp.get_start_method(allow_none=True) is None:
12 |         mp.set_start_method('spawn')
13 |     if launcher == 'pytorch':
14 |         _init_dist_pytorch(backend, **kwargs)
15 |     elif launcher == 'slurm':
16 |         _init_dist_slurm(backend, **kwargs)
17 |     else:
18 |         raise ValueError(f'Invalid launcher type: {launcher}')
19 | 
20 | 
21 | def _init_dist_pytorch(backend, **kwargs):
22 |     rank = int(os.environ['RANK'])
23 |     num_gpus = torch.cuda.device_count()
24 |     torch.cuda.set_device(rank % num_gpus)
25 |     dist.init_process_group(backend=backend, **kwargs)
26 | 
27 | 
28 | def _init_dist_slurm(backend, port=None):
29 |     """Initialize slurm distributed training environment.
30 | 
31 |     If argument ``port`` is not specified, then the master port will be system
32 |     environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
33 |     environment variable, then a default port ``29500`` will be used.
34 | 
35 |     Args:
36 |         backend (str): Backend of torch.distributed.
37 |         port (int, optional): Master port. Defaults to None.
38 |     """
39 |     proc_id = int(os.environ['SLURM_PROCID'])
40 |     ntasks = int(os.environ['SLURM_NTASKS'])
41 |     node_list = os.environ['SLURM_NODELIST']
42 |     num_gpus = torch.cuda.device_count()
43 |     torch.cuda.set_device(proc_id % num_gpus)
44 |     addr = subprocess.getoutput(f'scontrol show hostname {node_list} | head -n1')
45 |     # specify master port
46 |     if port is not None:
47 |         os.environ['MASTER_PORT'] = str(port)
48 |     elif 'MASTER_PORT' in os.environ:
49 |         pass  # use MASTER_PORT in the environment variable
50 |     else:
51 |         # 29500 is torch.distributed default port
52 |         os.environ['MASTER_PORT'] = '29500'
53 |     os.environ['MASTER_ADDR'] = addr
54 |     os.environ['WORLD_SIZE'] = str(ntasks)
55 |     os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
56 |     os.environ['RANK'] = str(proc_id)
57 |     dist.init_process_group(backend=backend)
58 | 
59 | 
60 | def get_dist_info():
61 |     if dist.is_available():
62 |         initialized = dist.is_initialized()
63 |     else:
64 |         initialized = False
65 |     if initialized:
66 |         rank = dist.get_rank()
67 |         world_size = dist.get_world_size()
68 |     else:
69 |         rank = 0
70 |         world_size = 1
71 |     return rank, world_size
72 | 
73 | 
74 | def master_only(func):
75 | 
76 |     @functools.wraps(func)
77 |     def wrapper(*args, **kwargs):
78 |         rank, _ = get_dist_info()
79 |         if rank == 0:
80 |             return func(*args, **kwargs)
81 | 
82 |     return wrapper
83 | 


--------------------------------------------------------------------------------
/basicsr/utils/download_util.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import requests
 3 | from tqdm import tqdm
 4 | 
 5 | from .misc import sizeof_fmt
 6 | 
 7 | 
 8 | def download_file_from_google_drive(file_id, save_path):
 9 |     """Download files from google drive.
10 | 
11 |     Ref:
12 |     https://stackoverflow.com/questions/25010369/wget-curl-large-file-from-google-drive  # noqa E501
13 | 
14 |     Args:
15 |         file_id (str): File id.
16 |         save_path (str): Save path.
17 |     """
18 | 
19 |     session = requests.Session()
20 |     URL = 'https://docs.google.com/uc?export=download'
21 |     params = {'id': file_id}
22 | 
23 |     response = session.get(URL, params=params, stream=True)
24 |     token = get_confirm_token(response)
25 |     if token:
26 |         params['confirm'] = token
27 |         response = session.get(URL, params=params, stream=True)
28 | 
29 |     # get file size
30 |     response_file_size = session.get(URL, params=params, stream=True, headers={'Range': 'bytes=0-2'})
31 |     if 'Content-Range' in response_file_size.headers:
32 |         file_size = int(response_file_size.headers['Content-Range'].split('/')[1])
33 |     else:
34 |         file_size = None
35 | 
36 |     save_response_content(response, save_path, file_size)
37 | 
38 | 
39 | def get_confirm_token(response):
40 |     for key, value in response.cookies.items():
41 |         if key.startswith('download_warning'):
42 |             return value
43 |     return None
44 | 
45 | 
46 | def save_response_content(response, destination, file_size=None, chunk_size=32768):
47 |     if file_size is not None:
48 |         pbar = tqdm(total=math.ceil(file_size / chunk_size), unit='chunk')
49 | 
50 |         readable_file_size = sizeof_fmt(file_size)
51 |     else:
52 |         pbar = None
53 | 
54 |     with open(destination, 'wb') as f:
55 |         downloaded_size = 0
56 |         for chunk in response.iter_content(chunk_size):
57 |             downloaded_size += chunk_size
58 |             if pbar is not None:
59 |                 pbar.update(1)
60 |                 pbar.set_description(f'Download {sizeof_fmt(downloaded_size)} ' f'/ {readable_file_size}')
61 |             if chunk:  # filter out keep-alive new chunks
62 |                 f.write(chunk)
63 |         if pbar is not None:
64 |             pbar.close()
65 | 


--------------------------------------------------------------------------------
/basicsr/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import random
  4 | import time
  5 | import torch
  6 | from os import path as osp
  7 | 
  8 | from .dist_util import master_only
  9 | from .logger import get_root_logger
 10 | 
 11 | 
 12 | def set_random_seed(seed):
 13 |     """Set random seeds."""
 14 |     random.seed(seed)
 15 |     np.random.seed(seed)
 16 |     torch.manual_seed(seed)
 17 |     torch.cuda.manual_seed(seed)
 18 |     torch.cuda.manual_seed_all(seed)
 19 | 
 20 | 
 21 | def get_time_str():
 22 |     return time.strftime('%Y%m%d_%H%M%S', time.localtime())
 23 | 
 24 | 
 25 | def mkdir_and_rename(path):
 26 |     """mkdirs. If path exists, rename it with timestamp and create a new one.
 27 | 
 28 |     Args:
 29 |         path (str): Folder path.
 30 |     """
 31 |     if osp.exists(path):
 32 |         new_name = path + '_archived_' + get_time_str()
 33 |         print(f'Path already exists. Rename it to {new_name}', flush=True)
 34 |         os.rename(path, new_name)
 35 |     os.makedirs(path, exist_ok=True)
 36 | 
 37 | 
 38 | @master_only
 39 | def make_exp_dirs(opt):
 40 |     """Make dirs for experiments."""
 41 |     path_opt = opt['path'].copy()
 42 |     if opt['is_train']:
 43 |         mkdir_and_rename(path_opt.pop('experiments_root'))
 44 |     else:
 45 |         mkdir_and_rename(path_opt.pop('results_root'))
 46 |     for key, path in path_opt.items():
 47 |         if ('strict_load' not in key) and ('pretrain_network' not in key) and ('resume' not in key):
 48 |             os.makedirs(path, exist_ok=True)
 49 | 
 50 | 
 51 | def scandir(dir_path, suffix=None, recursive=False, full_path=False):
 52 |     """Scan a directory to find the interested files.
 53 | 
 54 |     Args:
 55 |         dir_path (str): Path of the directory.
 56 |         suffix (str | tuple(str), optional): File suffix that we are
 57 |             interested in. Default: None.
 58 |         recursive (bool, optional): If set to True, recursively scan the
 59 |             directory. Default: False.
 60 |         full_path (bool, optional): If set to True, include the dir_path.
 61 |             Default: False.
 62 | 
 63 |     Returns:
 64 |         A generator for all the interested files with relative pathes.
 65 |     """
 66 | 
 67 |     if (suffix is not None) and not isinstance(suffix, (str, tuple)):
 68 |         raise TypeError('"suffix" must be a string or tuple of strings')
 69 | 
 70 |     root = dir_path
 71 | 
 72 |     def _scandir(dir_path, suffix, recursive):
 73 |         import os
 74 |         print('hey...', os.getcwd())
 75 |         for entry in os.scandir(dir_path):
 76 |             if not entry.name.startswith('.') and entry.is_file():
 77 |                 if full_path:
 78 |                     return_path = entry.path
 79 |                 else:
 80 |                     return_path = osp.relpath(entry.path, root)
 81 | 
 82 |                 if suffix is None:
 83 |                     yield return_path
 84 |                 elif return_path.endswith(suffix):
 85 |                     yield return_path
 86 |             else:
 87 |                 if recursive:
 88 |                     yield from _scandir(entry.path, suffix=suffix, recursive=recursive)
 89 |                 else:
 90 |                     continue
 91 | 
 92 |     return _scandir(dir_path, suffix=suffix, recursive=recursive)
 93 | 
 94 | 
 95 | def check_resume(opt, resume_iter):
 96 |     """Check resume states and pretrain_network paths.
 97 | 
 98 |     Args:
 99 |         opt (dict): Options.
100 |         resume_iter (int): Resume iteration.
101 |     """
102 |     logger = get_root_logger()
103 |     if opt['path']['resume_state']:
104 |         # get all the networks
105 |         networks = [key for key in opt.keys() if key.startswith('network_')]
106 |         flag_pretrain = False
107 |         for network in networks:
108 |             if opt['path'].get(f'pretrain_{network}') is not None:
109 |                 flag_pretrain = True
110 |         if flag_pretrain:
111 |             logger.warning('pretrain_network path will be ignored during resuming.')
112 |         # set pretrained model paths
113 |         for network in networks:
114 |             name = f'pretrain_{network}'
115 |             basename = network.replace('network_', '')
116 |             if opt['path'].get('ignore_resume_networks') is None or (basename
117 |                                                                      not in opt['path']['ignore_resume_networks']):
118 |                 opt['path'][name] = osp.join(opt['path']['models'], f'net_{basename}_{resume_iter}.pth')
119 |                 logger.info(f"Set {name} to {opt['path'][name]}")
120 | 
121 | 
122 | def sizeof_fmt(size, suffix='B'):
123 |     """Get human readable file size.
124 | 
125 |     Args:
126 |         size (int): File size.
127 |         suffix (str): Suffix. Default: 'B'.
128 | 
129 |     Return:
130 |         str: Formated file siz.
131 |     """
132 |     for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
133 |         if abs(size) < 1024.0:
134 |             return f'{size:3.1f} {unit}{suffix}'
135 |         size /= 1024.0
136 |     return f'{size:3.1f} Y{suffix}'
137 | 


--------------------------------------------------------------------------------
/basicsr/utils/options.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | from collections import OrderedDict
  3 | from os import path as osp
  4 | 
  5 | 
  6 | def ordered_yaml():
  7 |     """Support OrderedDict for yaml.
  8 | 
  9 |     Returns:
 10 |         yaml Loader and Dumper.
 11 |     """
 12 |     try:
 13 |         from yaml import CDumper as Dumper
 14 |         from yaml import CLoader as Loader
 15 |     except ImportError:
 16 |         from yaml import Dumper, Loader
 17 | 
 18 |     _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG
 19 | 
 20 |     def dict_representer(dumper, data):
 21 |         return dumper.represent_dict(data.items())
 22 | 
 23 |     def dict_constructor(loader, node):
 24 |         return OrderedDict(loader.construct_pairs(node))
 25 | 
 26 |     Dumper.add_representer(OrderedDict, dict_representer)
 27 |     Loader.add_constructor(_mapping_tag, dict_constructor)
 28 |     return Loader, Dumper
 29 | 
 30 | 
 31 | def parse(opt_path, root_path, is_train=True):
 32 |     """Parse option file.
 33 | 
 34 |     Args:
 35 |         opt_path (str): Option file path.
 36 |         is_train (str): Indicate whether in training or not. Default: True.
 37 | 
 38 |     Returns:
 39 |         (dict): Options.
 40 |     """
 41 |     with open(opt_path, mode='r') as f:
 42 |         Loader, _ = ordered_yaml()
 43 |         opt = yaml.load(f, Loader=Loader)
 44 | 
 45 |     opt['is_train'] = is_train
 46 | 
 47 |     # datasets
 48 |     for phase, dataset in opt['datasets'].items():
 49 |         # for several datasets, e.g., test_1, test_2
 50 |         phase = phase.split('_')[0]
 51 |         dataset['phase'] = phase
 52 |         if 'scale' in opt:
 53 |             dataset['scale'] = opt['scale']
 54 |         if dataset.get('dataroot_gt') is not None:
 55 |             dataset['dataroot_gt'] = osp.expanduser(dataset['dataroot_gt'])
 56 |         if dataset.get('dataroot_lq') is not None:
 57 |             dataset['dataroot_lq'] = osp.expanduser(dataset['dataroot_lq'])
 58 | 
 59 |     # paths
 60 |     for key, val in opt['path'].items():
 61 |         if (val is not None) and ('resume_state' in key or 'pretrain_network' in key):
 62 |             opt['path'][key] = osp.expanduser(val)
 63 | 
 64 |     if is_train:
 65 |         experiments_root = osp.join(root_path, 'experiments', opt['name'])
 66 |         opt['path']['experiments_root'] = experiments_root
 67 |         opt['path']['models'] = osp.join(experiments_root, 'models')
 68 |         opt['path']['training_states'] = osp.join(experiments_root, 'training_states')
 69 |         opt['path']['log'] = experiments_root
 70 |         opt['path']['visualization'] = osp.join(experiments_root, 'visualization')
 71 | 
 72 |         # change some options for debug mode
 73 |         if 'debug' in opt['name']:
 74 |             if 'val' in opt:
 75 |                 opt['val']['val_freq'] = 8
 76 |             opt['logger']['print_freq'] = 1
 77 |             opt['logger']['save_checkpoint_freq'] = 8
 78 |     else:  # test
 79 |         results_root = osp.join(root_path, 'results', opt['name'])
 80 |         opt['path']['results_root'] = results_root
 81 |         opt['path']['log'] = results_root
 82 |         opt['path']['visualization'] = osp.join(results_root, 'visualization')
 83 | 
 84 |     return opt
 85 | 
 86 | 
 87 | def dict2str(opt, indent_level=1):
 88 |     """dict to string for printing options.
 89 | 
 90 |     Args:
 91 |         opt (dict): Option dict.
 92 |         indent_level (int): Indent level. Default: 1.
 93 | 
 94 |     Return:
 95 |         (str): Option string for printing.
 96 |     """
 97 |     msg = '\n'
 98 |     for k, v in opt.items():
 99 |         if isinstance(v, dict):
100 |             msg += ' ' * (indent_level * 2) + k + ':['
101 |             msg += dict2str(v, indent_level + 1)
102 |             msg += ' ' * (indent_level * 2) + ']\n'
103 |         else:
104 |             msg += ' ' * (indent_level * 2) + k + ': ' + str(v) + '\n'
105 |     return msg
106 | 


--------------------------------------------------------------------------------
/basicsr/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Modified from: https://github.com/facebookresearch/fvcore/blob/master/fvcore/common/registry.py  # noqa: E501
 2 | 
 3 | 
 4 | class Registry():
 5 |     """
 6 |     The registry that provides name -> object mapping, to support third-party
 7 |     users' custom modules.
 8 | 
 9 |     To create a registry (e.g. a backbone registry):
10 | 
11 |     .. code-block:: python
12 | 
13 |         BACKBONE_REGISTRY = Registry('BACKBONE')
14 | 
15 |     To register an object:
16 | 
17 |     .. code-block:: python
18 | 
19 |         @BACKBONE_REGISTRY.register()
20 |         class MyBackbone():
21 |             ...
22 | 
23 |     Or:
24 | 
25 |     .. code-block:: python
26 | 
27 |         BACKBONE_REGISTRY.register(MyBackbone)
28 |     """
29 | 
30 |     def __init__(self, name):
31 |         """
32 |         Args:
33 |             name (str): the name of this registry
34 |         """
35 |         self._name = name
36 |         self._obj_map = {}
37 | 
38 |     def _do_register(self, name, obj):
39 |         assert (name not in self._obj_map), (f"An object named '{name}' was already registered "
40 |                                              f"in '{self._name}' registry!")
41 |         self._obj_map[name] = obj
42 | 
43 |     def register(self, obj=None):
44 |         """
45 |         Register the given object under the the name `obj.__name__`.
46 |         Can be used as either a decorator or not.
47 |         See docstring of this class for usage.
48 |         """
49 |         if obj is None:
50 |             # used as a decorator
51 |             def deco(func_or_class):
52 |                 name = func_or_class.__name__
53 |                 self._do_register(name, func_or_class)
54 |                 return func_or_class
55 | 
56 |             return deco
57 | 
58 |         # used as a function call
59 |         name = obj.__name__
60 |         self._do_register(name, obj)
61 | 
62 |     def get(self, name):
63 |         ret = self._obj_map.get(name)
64 |         if ret is None:
65 |             raise KeyError(f"No object named '{name}' found in '{self._name}' registry!")
66 |         return ret
67 | 
68 |     def __contains__(self, name):
69 |         return name in self._obj_map
70 | 
71 |     def __iter__(self):
72 |         return iter(self._obj_map.items())
73 | 
74 |     def keys(self):
75 |         return self._obj_map.keys()
76 | 
77 | 
78 | DATASET_REGISTRY = Registry('dataset')
79 | ARCH_REGISTRY = Registry('arch')
80 | MODEL_REGISTRY = Registry('model')
81 | LOSS_REGISTRY = Registry('loss')
82 | METRIC_REGISTRY = Registry('metric')
83 | 


--------------------------------------------------------------------------------
/checkpoints/readme.md:
--------------------------------------------------------------------------------
1 | ### Downloading weights
2 | 
3 | | Model        | Description           | Download Link  |
4 | | :------------- |:-------------| :-----:|
5 | | Wav2Lip           | Download and store in this directory   | [Link](https://drive.google.com/drive/folders/1tB_uz-TYMePRMZzrDMdShWUZZ0JK3SIZ?usp=sharing) |
6 | 


--------------------------------------------------------------------------------
/download_models.py:
--------------------------------------------------------------------------------
 1 | import gdown
 2 | 
 3 | ######### downloading models ########
 4 | 
 5 | urls = {
 6 |     "wav2lip_gan.pth": "10Iu05Modfti3pDbxCFPnofmfVlbkvrCm", 
 7 |     "face_segmentation.pth": "154JgKpzCPW82qINcVieuPH3fZ2e0P812",
 8 |     "esrgan_yunying.pth": "1aB-jqBikcZPJnFrJXWUEpvF2RFCuerSe",
 9 |     "pretrained.state": "1_MGeOLdARWHylC1PCU2p5_FQztD4Bo7B"
10 | }
11 | 
12 | for name, id in urls.items():
13 |     url = f"https://drive.google.com/uc?id={id}"
14 |     output = f"checkpoints/{name}"
15 |     gdown.download(url, output, quiet=False)
16 |     print(f"Loaded {name}")
17 | 
18 | ######### downloading videos ########
19 | # If you load files from Drive, run this cell
20 | 
21 | # Paste the filename and Google Drive ID of your video below.
22 | urls = {
23 |     "yunying_30s.mp4": "1dggydm07RHrxiFUIH_51RXmkMcD_bMPE",
24 | }
25 | 
26 | for name, id in urls.items():
27 |     url = f"https://drive.google.com/uc?id={id}"
28 |     output = f"videos/{name}"
29 |     gdown.download(url, output, quiet=False)
30 |     print(f"Loaded {name}")


--------------------------------------------------------------------------------
/examples/1_hd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/1_hd.jpg


--------------------------------------------------------------------------------
/examples/1_low.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/1_low.jpg


--------------------------------------------------------------------------------
/examples/kennedy_hd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/kennedy_hd.jpg


--------------------------------------------------------------------------------
/examples/kennedy_hd.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/kennedy_hd.mkv


--------------------------------------------------------------------------------
/examples/kennedy_low.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/kennedy_low.jpg


--------------------------------------------------------------------------------
/examples/kennedy_low.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/kennedy_low.mp4


--------------------------------------------------------------------------------
/examples/mona_hd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/mona_hd.jpg


--------------------------------------------------------------------------------
/examples/mona_hd.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/mona_hd.mkv


--------------------------------------------------------------------------------
/examples/mona_low.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/mona_low.jpg


--------------------------------------------------------------------------------
/examples/mona_low.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/examples/mona_low.mp4


--------------------------------------------------------------------------------
/experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models/readme.md:
--------------------------------------------------------------------------------
1 | ### Downloading weights
2 | 
3 | | Model        | Description           | Download Link  |
4 | | :------------- |:-------------| :-----:|
5 | | ESRGAN  | Download and store in this directory | [Link](https://drive.google.com/file/d/1Al8lEpnx2K-kDX7zL2DBcAuDnSKXACPb/view?usp=sharing) |
6 | 


--------------------------------------------------------------------------------
/face_detection/README.md:
--------------------------------------------------------------------------------
1 | The code for Face Detection in this folder has been taken from the wonderful [face_alignment](https://github.com/1adrianb/face-alignment) repository. This has been modified to take batches of faces at a time. 


--------------------------------------------------------------------------------
/face_detection/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | __author__ = """Adrian Bulat"""
4 | __email__ = 'adrian.bulat@nottingham.ac.uk'
5 | __version__ = '1.0.1'
6 | 
7 | from .api import FaceAlignment, LandmarksType, NetworkSize
8 | 


--------------------------------------------------------------------------------
/face_detection/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/__pycache__/api.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/__pycache__/api.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/__pycache__/models.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/api.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.model_zoo import load_url
 5 | from enum import Enum
 6 | import numpy as np
 7 | import cv2
 8 | try:
 9 |     import urllib.request as request_file
10 | except BaseException:
11 |     import urllib as request_file
12 | 
13 | from .models import FAN, ResNetDepth
14 | from .utils import *
15 | 
16 | 
17 | class LandmarksType(Enum):
18 |     """Enum class defining the type of landmarks to detect.
19 | 
20 |     ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
21 |     ``_2halfD`` - this points represent the projection of the 3D points into 3D
22 |     ``_3D`` - detect the points ``(x,y,z)``` in a 3D space
23 | 
24 |     """
25 |     _2D = 1
26 |     _2halfD = 2
27 |     _3D = 3
28 | 
29 | 
30 | class NetworkSize(Enum):
31 |     # TINY = 1
32 |     # SMALL = 2
33 |     # MEDIUM = 3
34 |     LARGE = 4
35 | 
36 |     def __new__(cls, value):
37 |         member = object.__new__(cls)
38 |         member._value_ = value
39 |         return member
40 | 
41 |     def __int__(self):
42 |         return self.value
43 | 
44 | ROOT = os.path.dirname(os.path.abspath(__file__))
45 | 
46 | class FaceAlignment:
47 |     def __init__(self, landmarks_type, network_size=NetworkSize.LARGE,
48 |                  device='cuda', flip_input=False, face_detector='sfd', verbose=False):
49 |         self.device = device
50 |         self.flip_input = flip_input
51 |         self.landmarks_type = landmarks_type
52 |         self.verbose = verbose
53 | 
54 |         network_size = int(network_size)
55 | 
56 |         if 'cuda' in device:
57 |             torch.backends.cudnn.benchmark = True
58 | 
59 |         # Get the face detector
60 |         face_detector_module = __import__('face_detection.detection.' + face_detector,
61 |                                           globals(), locals(), [face_detector], 0)
62 |         self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose)
63 | 
64 |     def get_detections_for_batch(self, images):
65 |         images = images[..., ::-1]
66 |         detected_faces = self.face_detector.detect_from_batch(images.copy())
67 |         results = []
68 | 
69 |         for i, d in enumerate(detected_faces):
70 |             if len(d) == 0:
71 |                 results.append(None)
72 |                 continue
73 |             d = d[0]
74 |             d = np.clip(d, 0, None)
75 |             
76 |             x1, y1, x2, y2 = map(int, d[:-1])
77 |             results.append((x1, y1, x2, y2))
78 | 
79 |         return results


--------------------------------------------------------------------------------
/face_detection/detection/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import FaceDetector


--------------------------------------------------------------------------------
/face_detection/detection/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/__pycache__/core.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/__pycache__/core.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/core.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import glob
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | import torch
  6 | import cv2
  7 | 
  8 | 
  9 | class FaceDetector(object):
 10 |     """An abstract class representing a face detector.
 11 | 
 12 |     Any other face detection implementation must subclass it. All subclasses
 13 |     must implement ``detect_from_image``, that return a list of detected
 14 |     bounding boxes. Optionally, for speed considerations detect from path is
 15 |     recommended.
 16 |     """
 17 | 
 18 |     def __init__(self, device, verbose):
 19 |         self.device = device
 20 |         self.verbose = verbose
 21 | 
 22 |         if verbose:
 23 |             if 'cpu' in device:
 24 |                 logger = logging.getLogger(__name__)
 25 |                 logger.warning("Detection running on CPU, this may be potentially slow.")
 26 | 
 27 |         if 'cpu' not in device and 'cuda' not in device:
 28 |             if verbose:
 29 |                 logger.error("Expected values for device are: {cpu, cuda} but got: %s", device)
 30 |             raise ValueError
 31 | 
 32 |     def detect_from_image(self, tensor_or_path):
 33 |         """Detects faces in a given image.
 34 | 
 35 |         This function detects the faces present in a provided BGR(usually)
 36 |         image. The input can be either the image itself or the path to it.
 37 | 
 38 |         Arguments:
 39 |             tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path
 40 |             to an image or the image itself.
 41 | 
 42 |         Example::
 43 | 
 44 |             >>> path_to_image = 'data/image_01.jpg'
 45 |             ...   detected_faces = detect_from_image(path_to_image)
 46 |             [A list of bounding boxes (x1, y1, x2, y2)]
 47 |             >>> image = cv2.imread(path_to_image)
 48 |             ...   detected_faces = detect_from_image(image)
 49 |             [A list of bounding boxes (x1, y1, x2, y2)]
 50 | 
 51 |         """
 52 |         raise NotImplementedError
 53 | 
 54 |     def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True):
 55 |         """Detects faces from all the images present in a given directory.
 56 | 
 57 |         Arguments:
 58 |             path {string} -- a string containing a path that points to the folder containing the images
 59 | 
 60 |         Keyword Arguments:
 61 |             extensions {list} -- list of string containing the extensions to be
 62 |             consider in the following format: ``.extension_name`` (default:
 63 |             {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the
 64 |             folder recursively (default: {False}) show_progress_bar {bool} --
 65 |             display a progressbar (default: {True})
 66 | 
 67 |         Example:
 68 |         >>> directory = 'data'
 69 |         ...   detected_faces = detect_from_directory(directory)
 70 |         {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]}
 71 | 
 72 |         """
 73 |         if self.verbose:
 74 |             logger = logging.getLogger(__name__)
 75 | 
 76 |         if len(extensions) == 0:
 77 |             if self.verbose:
 78 |                 logger.error("Expected at list one extension, but none was received.")
 79 |             raise ValueError
 80 | 
 81 |         if self.verbose:
 82 |             logger.info("Constructing the list of images.")
 83 |         additional_pattern = '/**/*' if recursive else '/*'
 84 |         files = []
 85 |         for extension in extensions:
 86 |             files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive))
 87 | 
 88 |         if self.verbose:
 89 |             logger.info("Finished searching for images. %s images found", len(files))
 90 |             logger.info("Preparing to run the detection.")
 91 | 
 92 |         predictions = {}
 93 |         for image_path in tqdm(files, disable=not show_progress_bar):
 94 |             if self.verbose:
 95 |                 logger.info("Running the face detector on image: %s", image_path)
 96 |             predictions[image_path] = self.detect_from_image(image_path)
 97 | 
 98 |         if self.verbose:
 99 |             logger.info("The detector was successfully run on all %s images", len(files))
100 | 
101 |         return predictions
102 | 
103 |     @property
104 |     def reference_scale(self):
105 |         raise NotImplementedError
106 | 
107 |     @property
108 |     def reference_x_shift(self):
109 |         raise NotImplementedError
110 | 
111 |     @property
112 |     def reference_y_shift(self):
113 |         raise NotImplementedError
114 | 
115 |     @staticmethod
116 |     def tensor_or_path_to_ndarray(tensor_or_path, rgb=True):
117 |         """Convert path (represented as a string) or torch.tensor to a numpy.ndarray
118 | 
119 |         Arguments:
120 |             tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself
121 |         """
122 |         if isinstance(tensor_or_path, str):
123 |             return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1]
124 |         elif torch.is_tensor(tensor_or_path):
125 |             # Call cpu in case its coming from cuda
126 |             return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy()
127 |         elif isinstance(tensor_or_path, np.ndarray):
128 |             return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path
129 |         else:
130 |             raise TypeError
131 | 


--------------------------------------------------------------------------------
/face_detection/detection/sfd/__init__.py:
--------------------------------------------------------------------------------
1 | from .sfd_detector import SFDDetector as FaceDetector


--------------------------------------------------------------------------------
/face_detection/detection/sfd/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/sfd/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/sfd/__pycache__/bbox.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/sfd/__pycache__/bbox.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/sfd/__pycache__/detect.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/sfd/__pycache__/detect.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/sfd/__pycache__/net_s3fd.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/sfd/__pycache__/net_s3fd.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/sfd/__pycache__/sfd_detector.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_detection/detection/sfd/__pycache__/sfd_detector.cpython-37.pyc


--------------------------------------------------------------------------------
/face_detection/detection/sfd/bbox.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import sys
  4 | import cv2
  5 | import random
  6 | import datetime
  7 | import time
  8 | import math
  9 | import argparse
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | try:
 14 |     from iou import IOU
 15 | except BaseException:
 16 |     # IOU cython speedup 10x
 17 |     def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2):
 18 |         sa = abs((ax2 - ax1) * (ay2 - ay1))
 19 |         sb = abs((bx2 - bx1) * (by2 - by1))
 20 |         x1, y1 = max(ax1, bx1), max(ay1, by1)
 21 |         x2, y2 = min(ax2, bx2), min(ay2, by2)
 22 |         w = x2 - x1
 23 |         h = y2 - y1
 24 |         if w < 0 or h < 0:
 25 |             return 0.0
 26 |         else:
 27 |             return 1.0 * w * h / (sa + sb - w * h)
 28 | 
 29 | 
 30 | def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
 31 |     xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
 32 |     dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
 33 |     dw, dh = math.log(ww / aww), math.log(hh / ahh)
 34 |     return dx, dy, dw, dh
 35 | 
 36 | 
 37 | def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
 38 |     xc, yc = dx * aww + axc, dy * ahh + ayc
 39 |     ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
 40 |     x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
 41 |     return x1, y1, x2, y2
 42 | 
 43 | 
 44 | def nms(dets, thresh):
 45 |     if 0 == len(dets):
 46 |         return []
 47 |     x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4]
 48 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 49 |     order = scores.argsort()[::-1]
 50 | 
 51 |     keep = []
 52 |     while order.size > 0:
 53 |         i = order[0]
 54 |         keep.append(i)
 55 |         xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
 56 |         xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
 57 | 
 58 |         w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1)
 59 |         ovr = w * h / (areas[i] + areas[order[1:]] - w * h)
 60 | 
 61 |         inds = np.where(ovr <= thresh)[0]
 62 |         order = order[inds + 1]
 63 | 
 64 |     return keep
 65 | 
 66 | 
 67 | def encode(matched, priors, variances):
 68 |     """Encode the variances from the priorbox layers into the ground truth boxes
 69 |     we have matched (based on jaccard overlap) with the prior boxes.
 70 |     Args:
 71 |         matched: (tensor) Coords of ground truth for each prior in point-form
 72 |             Shape: [num_priors, 4].
 73 |         priors: (tensor) Prior boxes in center-offset form
 74 |             Shape: [num_priors,4].
 75 |         variances: (list[float]) Variances of priorboxes
 76 |     Return:
 77 |         encoded boxes (tensor), Shape: [num_priors, 4]
 78 |     """
 79 | 
 80 |     # dist b/t match center and prior's center
 81 |     g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
 82 |     # encode variance
 83 |     g_cxcy /= (variances[0] * priors[:, 2:])
 84 |     # match wh / prior wh
 85 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
 86 |     g_wh = torch.log(g_wh) / variances[1]
 87 |     # return target for smooth_l1_loss
 88 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
 89 | 
 90 | 
 91 | def decode(loc, priors, variances):
 92 |     """Decode locations from predictions using priors to undo
 93 |     the encoding we did for offset regression at train time.
 94 |     Args:
 95 |         loc (tensor): location predictions for loc layers,
 96 |             Shape: [num_priors,4]
 97 |         priors (tensor): Prior boxes in center-offset form.
 98 |             Shape: [num_priors,4].
 99 |         variances: (list[float]) Variances of priorboxes
100 |     Return:
101 |         decoded bounding box predictions
102 |     """
103 | 
104 |     boxes = torch.cat((
105 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
106 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
107 |     boxes[:, :2] -= boxes[:, 2:] / 2
108 |     boxes[:, 2:] += boxes[:, :2]
109 |     return boxes
110 | 
111 | def batch_decode(loc, priors, variances):
112 |     """Decode locations from predictions using priors to undo
113 |     the encoding we did for offset regression at train time.
114 |     Args:
115 |         loc (tensor): location predictions for loc layers,
116 |             Shape: [num_priors,4]
117 |         priors (tensor): Prior boxes in center-offset form.
118 |             Shape: [num_priors,4].
119 |         variances: (list[float]) Variances of priorboxes
120 |     Return:
121 |         decoded bounding box predictions
122 |     """
123 | 
124 |     boxes = torch.cat((
125 |         priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:],
126 |         priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
127 |     boxes[:, :, :2] -= boxes[:, :, 2:] / 2
128 |     boxes[:, :, 2:] += boxes[:, :, :2]
129 |     return boxes
130 | 


--------------------------------------------------------------------------------
/face_detection/detection/sfd/detect.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | import os
  5 | import sys
  6 | import cv2
  7 | import random
  8 | import datetime
  9 | import math
 10 | import argparse
 11 | import numpy as np
 12 | 
 13 | import scipy.io as sio
 14 | import zipfile
 15 | from .net_s3fd import s3fd
 16 | from .bbox import *
 17 | 
 18 | 
 19 | def detect(net, img, device):
 20 |     img = img - np.array([104, 117, 123])
 21 |     img = img.transpose(2, 0, 1)
 22 |     img = img.reshape((1,) + img.shape)
 23 | 
 24 |     if 'cuda' in device:
 25 |         torch.backends.cudnn.benchmark = True
 26 | 
 27 |     img = torch.from_numpy(img.copy()).to(device, dtype=torch.float32)
 28 |     BB, CC, HH, WW = img.size()
 29 |     with torch.no_grad():
 30 |         olist = net(img)
 31 | 
 32 |     bboxlist = []
 33 |     for i in range(len(olist) // 2):
 34 |         olist[i * 2] = F.softmax(olist[i * 2], dim=1)
 35 |     olist = [oelem.data.cpu() for oelem in olist]
 36 |     for i in range(len(olist) // 2):
 37 |         ocls, oreg = olist[i * 2], olist[i * 2 + 1]
 38 |         FB, FC, FH, FW = ocls.size()  # feature map size
 39 |         stride = 2**(i + 2)    # 4,8,16,32,64,128
 40 |         anchor = stride * 4
 41 |         poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
 42 |         for Iindex, hindex, windex in poss:
 43 |             axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
 44 |             score = ocls[0, 1, hindex, windex]
 45 |             loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
 46 |             priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
 47 |             variances = [0.1, 0.2]
 48 |             box = decode(loc, priors, variances)
 49 |             x1, y1, x2, y2 = box[0] * 1.0
 50 |             # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
 51 |             bboxlist.append([x1, y1, x2, y2, score])
 52 |     bboxlist = np.array(bboxlist)
 53 |     if 0 == len(bboxlist):
 54 |         bboxlist = np.zeros((1, 5))
 55 | 
 56 |     return bboxlist
 57 | 
 58 | def batch_detect(net, imgs, device):
 59 |     imgs = imgs - np.array([104, 117, 123])
 60 |     imgs = imgs.transpose(0, 3, 1, 2)
 61 | 
 62 |     if 'cuda' in device:
 63 |         torch.backends.cudnn.benchmark = True
 64 | 
 65 |     imgs = torch.from_numpy(imgs.copy()).to(device, dtype=torch.float32)
 66 |     BB, CC, HH, WW = imgs.size()
 67 |     with torch.no_grad():
 68 |         olist = net(imgs)
 69 | 
 70 |     bboxlist = []
 71 |     for i in range(len(olist) // 2):
 72 |         olist[i * 2] = F.softmax(olist[i * 2], dim=1)
 73 |     olist = [oelem.data.cpu() for oelem in olist]
 74 |     for i in range(len(olist) // 2):
 75 |         ocls, oreg = olist[i * 2], olist[i * 2 + 1]
 76 |         FB, FC, FH, FW = ocls.size()  # feature map size
 77 |         stride = 2**(i + 2)    # 4,8,16,32,64,128
 78 |         anchor = stride * 4
 79 |         poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
 80 |         for Iindex, hindex, windex in poss:
 81 |             axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
 82 |             score = ocls[:, 1, hindex, windex]
 83 |             loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
 84 |             priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
 85 |             variances = [0.1, 0.2]
 86 |             box = batch_decode(loc, priors, variances)
 87 |             box = box[:, 0] * 1.0
 88 |             # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
 89 |             bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
 90 |     bboxlist = np.array(bboxlist)
 91 |     if 0 == len(bboxlist):
 92 |         bboxlist = np.zeros((1, BB, 5))
 93 | 
 94 |     return bboxlist
 95 | 
 96 | def flip_detect(net, img, device):
 97 |     img = cv2.flip(img, 1)
 98 |     b = detect(net, img, device)
 99 | 
100 |     bboxlist = np.zeros(b.shape)
101 |     bboxlist[:, 0] = img.shape[1] - b[:, 2]
102 |     bboxlist[:, 1] = b[:, 1]
103 |     bboxlist[:, 2] = img.shape[1] - b[:, 0]
104 |     bboxlist[:, 3] = b[:, 3]
105 |     bboxlist[:, 4] = b[:, 4]
106 |     return bboxlist
107 | 
108 | 
109 | def pts_to_bb(pts):
110 |     min_x, min_y = np.min(pts, axis=0)
111 |     max_x, max_y = np.max(pts, axis=0)
112 |     return np.array([min_x, min_y, max_x, max_y])
113 | 


--------------------------------------------------------------------------------
/face_detection/detection/sfd/net_s3fd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class L2Norm(nn.Module):
  7 |     def __init__(self, n_channels, scale=1.0):
  8 |         super(L2Norm, self).__init__()
  9 |         self.n_channels = n_channels
 10 |         self.scale = scale
 11 |         self.eps = 1e-10
 12 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
 13 |         self.weight.data *= 0.0
 14 |         self.weight.data += self.scale
 15 | 
 16 |     def forward(self, x):
 17 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
 18 |         x = x / norm * self.weight.view(1, -1, 1, 1)
 19 |         return x
 20 | 
 21 | 
 22 | class s3fd(nn.Module):
 23 |     def __init__(self):
 24 |         super(s3fd, self).__init__()
 25 |         self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
 26 |         self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
 27 | 
 28 |         self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
 29 |         self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
 30 | 
 31 |         self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
 32 |         self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 33 |         self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 34 | 
 35 |         self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
 36 |         self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 37 |         self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 38 | 
 39 |         self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 40 |         self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 41 |         self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 42 | 
 43 |         self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3)
 44 |         self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 45 | 
 46 |         self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
 47 |         self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
 48 | 
 49 |         self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0)
 50 |         self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
 51 | 
 52 |         self.conv3_3_norm = L2Norm(256, scale=10)
 53 |         self.conv4_3_norm = L2Norm(512, scale=8)
 54 |         self.conv5_3_norm = L2Norm(512, scale=5)
 55 | 
 56 |         self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 57 |         self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 58 |         self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
 59 |         self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
 60 |         self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
 61 |         self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
 62 | 
 63 |         self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1)
 64 |         self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1)
 65 |         self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
 66 |         self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
 67 |         self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1)
 68 |         self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 69 | 
 70 |     def forward(self, x):
 71 |         h = F.relu(self.conv1_1(x))
 72 |         h = F.relu(self.conv1_2(h))
 73 |         h = F.max_pool2d(h, 2, 2)
 74 | 
 75 |         h = F.relu(self.conv2_1(h))
 76 |         h = F.relu(self.conv2_2(h))
 77 |         h = F.max_pool2d(h, 2, 2)
 78 | 
 79 |         h = F.relu(self.conv3_1(h))
 80 |         h = F.relu(self.conv3_2(h))
 81 |         h = F.relu(self.conv3_3(h))
 82 |         f3_3 = h
 83 |         h = F.max_pool2d(h, 2, 2)
 84 | 
 85 |         h = F.relu(self.conv4_1(h))
 86 |         h = F.relu(self.conv4_2(h))
 87 |         h = F.relu(self.conv4_3(h))
 88 |         f4_3 = h
 89 |         h = F.max_pool2d(h, 2, 2)
 90 | 
 91 |         h = F.relu(self.conv5_1(h))
 92 |         h = F.relu(self.conv5_2(h))
 93 |         h = F.relu(self.conv5_3(h))
 94 |         f5_3 = h
 95 |         h = F.max_pool2d(h, 2, 2)
 96 | 
 97 |         h = F.relu(self.fc6(h))
 98 |         h = F.relu(self.fc7(h))
 99 |         ffc7 = h
100 |         h = F.relu(self.conv6_1(h))
101 |         h = F.relu(self.conv6_2(h))
102 |         f6_2 = h
103 |         h = F.relu(self.conv7_1(h))
104 |         h = F.relu(self.conv7_2(h))
105 |         f7_2 = h
106 | 
107 |         f3_3 = self.conv3_3_norm(f3_3)
108 |         f4_3 = self.conv4_3_norm(f4_3)
109 |         f5_3 = self.conv5_3_norm(f5_3)
110 | 
111 |         cls1 = self.conv3_3_norm_mbox_conf(f3_3)
112 |         reg1 = self.conv3_3_norm_mbox_loc(f3_3)
113 |         cls2 = self.conv4_3_norm_mbox_conf(f4_3)
114 |         reg2 = self.conv4_3_norm_mbox_loc(f4_3)
115 |         cls3 = self.conv5_3_norm_mbox_conf(f5_3)
116 |         reg3 = self.conv5_3_norm_mbox_loc(f5_3)
117 |         cls4 = self.fc7_mbox_conf(ffc7)
118 |         reg4 = self.fc7_mbox_loc(ffc7)
119 |         cls5 = self.conv6_2_mbox_conf(f6_2)
120 |         reg5 = self.conv6_2_mbox_loc(f6_2)
121 |         cls6 = self.conv7_2_mbox_conf(f7_2)
122 |         reg6 = self.conv7_2_mbox_loc(f7_2)
123 | 
124 |         # max-out background label
125 |         chunk = torch.chunk(cls1, 4, 1)
126 |         bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2])
127 |         cls1 = torch.cat([bmax, chunk[3]], dim=1)
128 | 
129 |         return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6]
130 | 


--------------------------------------------------------------------------------
/face_detection/detection/sfd/readme.md:
--------------------------------------------------------------------------------
1 | ### Downloading weights
2 | 
3 | | Model        | Description           | Download Link  |
4 | | :------------- |:-------------| :-----:|
5 | | Face_Detection    | Download and store in this directory | [Link](https://drive.google.com/file/d/1uNLYCPFFmO-og3WSHyFytJQLLYOwH5uY/view?usp=sharing) |


--------------------------------------------------------------------------------
/face_detection/detection/sfd/sfd_detector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from torch.utils.model_zoo import load_url
 4 | 
 5 | from ..core import FaceDetector
 6 | 
 7 | from .net_s3fd import s3fd
 8 | from .bbox import *
 9 | from .detect import *
10 | 
11 | models_urls = {
12 |     's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth',
13 | }
14 | 
15 | 
16 | class SFDDetector(FaceDetector):
17 |     def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
18 |         super(SFDDetector, self).__init__(device, verbose)
19 | 
20 |         # Initialise the face detector
21 |         if not os.path.isfile(path_to_detector):
22 |             model_weights = load_url(models_urls['s3fd'])
23 |         else:
24 |             model_weights = torch.load(path_to_detector)
25 | 
26 |         self.face_detector = s3fd()
27 |         self.face_detector.load_state_dict(model_weights)
28 |         self.face_detector.to(device)
29 |         self.face_detector.eval()
30 | 
31 |     def detect_from_image(self, tensor_or_path):
32 |         image = self.tensor_or_path_to_ndarray(tensor_or_path)
33 | 
34 |         bboxlist = detect(self.face_detector, image, device=self.device)
35 |         keep = nms(bboxlist, 0.3)
36 |         bboxlist = bboxlist[keep, :]
37 |         bboxlist = [x for x in bboxlist if x[-1] > 0.5]
38 | 
39 |         return bboxlist
40 | 
41 |     def detect_from_batch(self, images):
42 |         bboxlists = batch_detect(self.face_detector, images, device=self.device)
43 |         keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])]
44 |         bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)]
45 |         bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists]
46 | 
47 |         return bboxlists
48 | 
49 |     @property
50 |     def reference_scale(self):
51 |         return 195
52 | 
53 |     @property
54 |     def reference_x_shift(self):
55 |         return 0
56 | 
57 |     @property
58 |     def reference_y_shift(self):
59 |         return 0
60 | 


--------------------------------------------------------------------------------
/face_parsing/README.md:
--------------------------------------------------------------------------------
1 | Most of the code in this folder was taken from the awesome [face parsing](https://github.com/zllrunning/face-parsing.PyTorch.git) repository.


--------------------------------------------------------------------------------
/face_parsing/__init__.py:
--------------------------------------------------------------------------------
1 | from .swap import init_parser, swap_regions


--------------------------------------------------------------------------------
/face_parsing/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_parsing/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/face_parsing/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_parsing/__pycache__/model.cpython-37.pyc


--------------------------------------------------------------------------------
/face_parsing/__pycache__/resnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_parsing/__pycache__/resnet.cpython-37.pyc


--------------------------------------------------------------------------------
/face_parsing/__pycache__/swap.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/face_parsing/__pycache__/swap.cpython-37.pyc


--------------------------------------------------------------------------------
/face_parsing/resnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.utils.model_zoo as modelzoo
  8 | 
  9 | # from modules.bn import InPlaceABNSync as BatchNorm2d
 10 | 
 11 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
 12 | 
 13 | 
 14 | def conv3x3(in_planes, out_planes, stride=1):
 15 |     """3x3 convolution with padding"""
 16 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 17 |                      padding=1, bias=False)
 18 | 
 19 | 
 20 | class BasicBlock(nn.Module):
 21 |     def __init__(self, in_chan, out_chan, stride=1):
 22 |         super(BasicBlock, self).__init__()
 23 |         self.conv1 = conv3x3(in_chan, out_chan, stride)
 24 |         self.bn1 = nn.BatchNorm2d(out_chan)
 25 |         self.conv2 = conv3x3(out_chan, out_chan)
 26 |         self.bn2 = nn.BatchNorm2d(out_chan)
 27 |         self.relu = nn.ReLU(inplace=True)
 28 |         self.downsample = None
 29 |         if in_chan != out_chan or stride != 1:
 30 |             self.downsample = nn.Sequential(
 31 |                 nn.Conv2d(in_chan, out_chan,
 32 |                           kernel_size=1, stride=stride, bias=False),
 33 |                 nn.BatchNorm2d(out_chan),
 34 |                 )
 35 | 
 36 |     def forward(self, x):
 37 |         residual = self.conv1(x)
 38 |         residual = F.relu(self.bn1(residual))
 39 |         residual = self.conv2(residual)
 40 |         residual = self.bn2(residual)
 41 | 
 42 |         shortcut = x
 43 |         if self.downsample is not None:
 44 |             shortcut = self.downsample(x)
 45 | 
 46 |         out = shortcut + residual
 47 |         out = self.relu(out)
 48 |         return out
 49 | 
 50 | 
 51 | def create_layer_basic(in_chan, out_chan, bnum, stride=1):
 52 |     layers = [BasicBlock(in_chan, out_chan, stride=stride)]
 53 |     for i in range(bnum-1):
 54 |         layers.append(BasicBlock(out_chan, out_chan, stride=1))
 55 |     return nn.Sequential(*layers)
 56 | 
 57 | 
 58 | class Resnet18(nn.Module):
 59 |     def __init__(self):
 60 |         super(Resnet18, self).__init__()
 61 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 62 |                                bias=False)
 63 |         self.bn1 = nn.BatchNorm2d(64)
 64 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 65 |         self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
 66 |         self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
 67 |         self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
 68 |         self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
 69 |         self.init_weight()
 70 | 
 71 |     def forward(self, x):
 72 |         x = self.conv1(x)
 73 |         x = F.relu(self.bn1(x))
 74 |         x = self.maxpool(x)
 75 | 
 76 |         x = self.layer1(x)
 77 |         feat8 = self.layer2(x) # 1/8
 78 |         feat16 = self.layer3(feat8) # 1/16
 79 |         feat32 = self.layer4(feat16) # 1/32
 80 |         return feat8, feat16, feat32
 81 | 
 82 |     def init_weight(self):
 83 |         state_dict = modelzoo.load_url(resnet18_url)
 84 |         self_state_dict = self.state_dict()
 85 |         for k, v in state_dict.items():
 86 |             if 'fc' in k: continue
 87 |             self_state_dict.update({k: v})
 88 |         self.load_state_dict(self_state_dict)
 89 | 
 90 |     def get_params(self):
 91 |         wd_params, nowd_params = [], []
 92 |         for name, module in self.named_modules():
 93 |             if isinstance(module, (nn.Linear, nn.Conv2d)):
 94 |                 wd_params.append(module.weight)
 95 |                 if not module.bias is None:
 96 |                     nowd_params.append(module.bias)
 97 |             elif isinstance(module,  nn.BatchNorm2d):
 98 |                 nowd_params += list(module.parameters())
 99 |         return wd_params, nowd_params
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     net = Resnet18()
104 |     x = torch.randn(16, 3, 224, 224)
105 |     out = net(x)
106 |     print(out[0].size())
107 |     print(out[1].size())
108 |     print(out[2].size())
109 |     net.get_params()
110 | 


--------------------------------------------------------------------------------
/face_parsing/swap.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | from .model import BiSeNet
 7 | 
 8 | 
 9 | def init_parser(pth_path):
10 |     n_classes = 19
11 |     net = BiSeNet(n_classes=n_classes)
12 |     net.cuda()
13 |     net.load_state_dict(torch.load(pth_path))
14 |     net.eval()
15 |     return net
16 | 
17 | 
18 | def image_to_parsing(img, net):
19 |     img = cv2.resize(img, (512, 512))
20 |     img = img[:,:,::-1]
21 |     transform = transforms.Compose([
22 |         transforms.ToTensor(),
23 |         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
24 |     ])
25 |     img = transform(img.copy())
26 |     img = torch.unsqueeze(img, 0)
27 | 
28 |     with torch.no_grad():
29 |         img = img.cuda()
30 |         out = net(img)[0]
31 |         parsing = out.squeeze(0).cpu().numpy().argmax(0)
32 |         return parsing
33 | 
34 | 
35 | def get_mask(parsing, classes):
36 |     res = parsing == classes[0]
37 |     for val in classes[1:]:
38 |         res += parsing == val
39 |     return res
40 | 
41 | 
42 | def swap_regions(source, target, net):
43 |     parsing = image_to_parsing(source, net)
44 |     face_classes = [1, 11, 12, 13]
45 | 
46 |     mask = get_mask(parsing, face_classes)
47 |     mask = np.repeat(np.expand_dims(mask, axis=2), 3, 2)
48 |     result = (1 - mask) * cv2.resize(source, (512, 512)) + mask * cv2.resize(target, (512, 512))
49 |     result = cv2.resize(result.astype("float32"), (source.shape[1], source.shape[0]))
50 |     return result
51 | 


--------------------------------------------------------------------------------
/hparams.py:
--------------------------------------------------------------------------------
  1 | from glob import glob
  2 | import os
  3 | 
  4 | def get_image_list(data_root, split):
  5 | 	filelist = []
  6 | 
  7 | 	with open('filelists/{}.txt'.format(split)) as f:
  8 | 		for line in f:
  9 | 			line = line.strip()
 10 | 			if ' ' in line: line = line.split()[0]
 11 | 			filelist.append(os.path.join(data_root, line))
 12 | 
 13 | 	return filelist
 14 | 
 15 | class HParams:
 16 | 	def __init__(self, **kwargs):
 17 | 		self.data = {}
 18 | 
 19 | 		for key, value in kwargs.items():
 20 | 			self.data[key] = value
 21 | 
 22 | 	def __getattr__(self, key):
 23 | 		if key not in self.data:
 24 | 			raise AttributeError("'HParams' object has no attribute %s" % key)
 25 | 		return self.data[key]
 26 | 
 27 | 	def set_hparam(self, key, value):
 28 | 		self.data[key] = value
 29 | 
 30 | 
 31 | # Default hyperparameters
 32 | hparams = HParams(
 33 | 	num_mels=80,  # Number of mel-spectrogram channels and local conditioning dimensionality
 34 | 	#  network
 35 | 	rescale=True,  # Whether to rescale audio prior to preprocessing
 36 | 	rescaling_max=0.9,  # Rescaling value
 37 | 	
 38 | 	# Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction
 39 | 	# It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder
 40 | 	# Does not work if n_ffit is not multiple of hop_size!!
 41 | 	use_lws=False,
 42 | 	
 43 | 	n_fft=800,  # Extra window size is filled with 0 paddings to match this parameter
 44 | 	hop_size=200,  # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate)
 45 | 	win_size=800,  # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate)
 46 | 	sample_rate=16000,  # 16000Hz (corresponding to librispeech) (sox --i <filename>)
 47 | 	
 48 | 	frame_shift_ms=None,  # Can replace hop_size parameter. (Recommended: 12.5)
 49 | 	
 50 | 	# Mel and Linear spectrograms normalization/scaling and clipping
 51 | 	signal_normalization=True,
 52 | 	# Whether to normalize mel spectrograms to some predefined range (following below parameters)
 53 | 	allow_clipping_in_normalization=True,  # Only relevant if mel_normalization = True
 54 | 	symmetric_mels=True,
 55 | 	# Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 
 56 | 	# faster and cleaner convergence)
 57 | 	max_abs_value=4.,
 58 | 	# max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 
 59 | 	# be too big to avoid gradient explosion, 
 60 | 	# not too small for fast convergence)
 61 | 	# Contribution by @begeekmyfriend
 62 | 	# Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 
 63 | 	# levels. Also allows for better G&L phase reconstruction)
 64 | 	preemphasize=True,  # whether to apply filter
 65 | 	preemphasis=0.97,  # filter coefficient.
 66 | 	
 67 | 	# Limits
 68 | 	min_level_db=-100,
 69 | 	ref_level_db=20,
 70 | 	fmin=55,
 71 | 	# Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 
 72 | 	# test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
 73 | 	fmax=7600,  # To be increased/reduced depending on data.
 74 | 
 75 | 	###################### Our training parameters #################################
 76 | 	img_size=96,
 77 | 	fps=25,
 78 | 	
 79 | 	batch_size=16,
 80 | 	initial_learning_rate=1e-4,
 81 | 	nepochs=200000000000000000,  ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs
 82 | 	num_workers=16,
 83 | 	checkpoint_interval=3000,
 84 | 	eval_interval=3000,
 85 |     save_optimizer_state=True,
 86 | 
 87 |     syncnet_wt=0.0, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 
 88 | 	syncnet_batch_size=64,
 89 | 	syncnet_lr=1e-4,
 90 | 	syncnet_eval_interval=10000,
 91 | 	syncnet_checkpoint_interval=10000,
 92 | 
 93 | 	disc_wt=0.07,
 94 | 	disc_initial_learning_rate=1e-4,
 95 | )
 96 | 
 97 | 
 98 | def hparams_debug_string():
 99 | 	values = hparams.values()
100 | 	hp = ["  %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"]
101 | 	return "Hyperparameters:\n" + "\n".join(hp)
102 | 


--------------------------------------------------------------------------------
/input_audios/ai.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/input_audios/ai.wav


--------------------------------------------------------------------------------
/input_videos/README.md:
--------------------------------------------------------------------------------
1 | This is the folder where you can place all your files for inference.


--------------------------------------------------------------------------------
/input_videos/kennedy.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/input_videos/kennedy.mp4


--------------------------------------------------------------------------------
/input_videos/mona.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/input_videos/mona.mp4


--------------------------------------------------------------------------------
/output_videos_hd/kennedy.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/output_videos_hd/kennedy.mkv


--------------------------------------------------------------------------------
/output_videos_hd/mona.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/output_videos_hd/mona.mkv


--------------------------------------------------------------------------------
/output_videos_wav2lip/kennedy.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/output_videos_wav2lip/kennedy.mp4


--------------------------------------------------------------------------------
/output_videos_wav2lip/mona.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/output_videos_wav2lip/mona.mp4


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | addict
 2 | future
 3 | librosa==0.7.0
 4 | lmdb
 5 | numba==0.48
 6 | numpy
 7 | opencv-contrib-python>=4.2.0.34
 8 | opencv-python
 9 | Pillow
10 | pyyaml
11 | requests
12 | scikit-image
13 | scipy
14 | tb-nightly
15 | torch>=1.3
16 | torchvision
17 | tqdm
18 | yapf


--------------------------------------------------------------------------------
/resizeframes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tqdm import tqdm
 3 | import cv2
 4 | 
 5 | paths = os.listdir("basicsr/data/gt")
 6 | 
 7 | for img_path in tqdm(paths):
 8 |     img = cv2.imread("basicsr/data/gt/" + img_path)
 9 |     img = cv2.resize(img, (384, 384))
10 |     cv2.imwrite("basicsr/data/hq/" + img_path, img)


--------------------------------------------------------------------------------
/results/README.md:
--------------------------------------------------------------------------------
1 | This is a folder for result videos.


--------------------------------------------------------------------------------
/run_final.sh:
--------------------------------------------------------------------------------
 1 | export filename=mona
 2 | export input_video=input_videos
 3 | export input_audio=input_audios/ai.wav
 4 | export frames_wav2lip=frames_wav2lip
 5 | export frames_hd=frames_hd
 6 | export output_videos_wav2lip=output_videos_wav2lip
 7 | export output_videos_hd=output_videos_hd
 8 | export back_dir=..
 9 | 
10 | python3 inference.py --checkpoint_path "checkpoints/wav2lip_gan.pth" --segmentation_path "checkpoints/face_segmentation.pth" --sr_path "checkpoints/esrgan_yunying.pth" --face ${input_video}/${filename}.mp4 --audio ${input_audio} --save_frames --gt_path "data/gt" --pred_path "data/lq" --no_sr --no_segmentation --outfile ${output_videos_wav2lip}/${filename}.mp4
11 | python video2frames.py --input_video ${output_videos_wav2lip}/${filename}.mp4 --frames_path ${frames_wav2lip}/${filename}
12 | cd Real-ESRGAN
13 | python inference_realesrgan.py -n RealESRGAN_x4plus -i ${back_dir}/${frames_wav2lip}/${filename} --output ${back_dir}/${frames_hd}/${filename} --outscale 3.5 --face_enhance
14 | #ffmpeg -r 20 -i ${back_dir}/${frames_hd}/${filename}/frame_%05d_out.jpg -i ${back_dir}/${input_audios} -vcodec libx264 -crf 25 -preset veryslow -acodec copy ${back_dir}/${output_videos_hd}/${filename}.mkv


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/events.out.tfevents.1680350184.user1-Alienware-Aurora-R9.591624.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/events.out.tfevents.1680350184.user1-Alienware-Aurora-R9.591624.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/events.out.tfevents.1680351829.user1-Alienware-Aurora-R9.593185.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/events.out.tfevents.1680351829.user1-Alienware-Aurora-R9.593185.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/events.out.tfevents.1680351874.user1-Alienware-Aurora-R9.593246.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/events.out.tfevents.1680351874.user1-Alienware-Aurora-R9.593246.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_052824/events.out.tfevents.1680319497.user1-Alienware-Aurora-R9.580638.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_052824/events.out.tfevents.1680319497.user1-Alienware-Aurora-R9.580638.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_052824/events.out.tfevents.1680319611.user1-Alienware-Aurora-R9.580758.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_052824/events.out.tfevents.1680319611.user1-Alienware-Aurora-R9.580758.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319705.user1-Alienware-Aurora-R9.580840.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319705.user1-Alienware-Aurora-R9.580840.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319744.user1-Alienware-Aurora-R9.580886.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319744.user1-Alienware-Aurora-R9.580886.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319774.user1-Alienware-Aurora-R9.580937.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319774.user1-Alienware-Aurora-R9.580937.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319845.user1-Alienware-Aurora-R9.581036.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319845.user1-Alienware-Aurora-R9.581036.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319869.user1-Alienware-Aurora-R9.581085.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319869.user1-Alienware-Aurora-R9.581085.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319883.user1-Alienware-Aurora-R9.581137.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680319883.user1-Alienware-Aurora-R9.581137.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349247.user1-Alienware-Aurora-R9.590276.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349247.user1-Alienware-Aurora-R9.590276.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349479.user1-Alienware-Aurora-R9.590615.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349479.user1-Alienware-Aurora-R9.590615.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349575.user1-Alienware-Aurora-R9.590713.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349575.user1-Alienware-Aurora-R9.590713.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349630.user1-Alienware-Aurora-R9.590788.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349630.user1-Alienware-Aurora-R9.590788.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349693.user1-Alienware-Aurora-R9.590885.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349693.user1-Alienware-Aurora-R9.590885.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349735.user1-Alienware-Aurora-R9.590947.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135250/events.out.tfevents.1680349735.user1-Alienware-Aurora-R9.590947.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135406/events.out.tfevents.1680349971.user1-Alienware-Aurora-R9.591190.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135406/events.out.tfevents.1680349971.user1-Alienware-Aurora-R9.591190.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135512/events.out.tfevents.1680350047.user1-Alienware-Aurora-R9.591301.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135512/events.out.tfevents.1680350047.user1-Alienware-Aurora-R9.591301.0


--------------------------------------------------------------------------------
/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135623/events.out.tfevents.1680350113.user1-Alienware-Aurora-R9.591460.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/tb_logger/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb_archived_20230401_135623/events.out.tfevents.1680350113.user1-Alienware-Aurora-R9.591460.0


--------------------------------------------------------------------------------
/temp/README.md:
--------------------------------------------------------------------------------
1 | This is a folder for various temporary stuff.


--------------------------------------------------------------------------------
/temp/result.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/temp/result.avi


--------------------------------------------------------------------------------
/temp/temp.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/temp/temp.wav


--------------------------------------------------------------------------------
/train_basicsr.yml:
--------------------------------------------------------------------------------
  1 | # general settings
  2 | name: 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb
  3 | model_type: ESRGANModel
  4 | scale: 4
  5 | num_gpu: 1  # set num_gpu: 0 for cpu mode
  6 | manual_seed: 0
  7 | 
  8 | # dataset and data loader settings
  9 | datasets:
 10 |   train:
 11 |     name: face_dataset
 12 |     type: PairedImageDataset
 13 |     dataroot_gt: basicsr/data/hq
 14 |     dataroot_lq: basicsr/data/lq
 15 |     filename_tmpl: '{}'
 16 |     io_backend:
 17 |       type: disk
 18 | 
 19 |     gt_size: 384
 20 |     use_flip: true
 21 |     use_rot: true
 22 | 
 23 |     # data loader
 24 |     use_shuffle: true
 25 |     num_worker_per_gpu: 1
 26 |     batch_size_per_gpu: 4
 27 |     dataset_enlarge_ratio: 1
 28 |     prefetch_mode: ~
 29 | 
 30 | # network structures
 31 | network_g:
 32 |   type: RRDBNet
 33 |   num_in_ch: 3
 34 |   num_out_ch: 3
 35 |   num_feat: 64
 36 |   num_block: 23
 37 | 
 38 | network_d:
 39 |   type: VGGStyleDiscriminator128
 40 |   num_in_ch: 3
 41 |   num_feat: 64
 42 | 
 43 | # path
 44 | path:
 45 |   pretrain_network_g: ~
 46 |   strict_load_g: true
 47 |   resume_state: checkpoints/pretrained.state
 48 | 
 49 | # training settings
 50 | train:
 51 |   optim_g:
 52 |     type: Adam
 53 |     lr: !!float 1e-4
 54 |     weight_decay: 0
 55 |     betas: [0.9, 0.99]
 56 |   optim_d:
 57 |     type: Adam
 58 |     lr: !!float 1e-4
 59 |     weight_decay: 0
 60 |     betas: [0.9, 0.99]
 61 | 
 62 |   scheduler:
 63 |     type: MultiStepLR
 64 |     milestones: [50000, 100000, 200000, 300000]
 65 |     gamma: 0.5
 66 | 
 67 |   total_iter: 150000
 68 |   warmup_iter: -1  # no warm up
 69 | 
 70 |   # losses
 71 |   pixel_opt:
 72 |     type: L1Loss
 73 |     loss_weight: !!float 1e-2
 74 |     reduction: mean
 75 |   perceptual_opt:
 76 |     type: PerceptualLoss
 77 |     layer_weights:
 78 |       'conv5_4': 1  # before relu
 79 |     vgg_type: vgg19
 80 |     use_input_norm: true
 81 |     range_norm: false
 82 |     perceptual_weight: 1.0
 83 |     style_weight: 0
 84 |     criterion: l1
 85 |   gan_opt:
 86 |     type: GANLoss
 87 |     gan_type: vanilla
 88 |     real_label_val: 1.0
 89 |     fake_label_val: 0.0
 90 |     loss_weight: !!float 5e-3
 91 | 
 92 |   net_d_iters: 1
 93 |   net_d_init_iters: 0
 94 | 
 95 | # validation settings
 96 | val:
 97 |   val_freq: !!float 25e2
 98 |   save_img: true
 99 | 
100 |   metrics:
101 |     psnr: # metric name, can be arbitrary
102 |       type: calculate_psnr
103 |       crop_border: 4
104 |       test_y_channel: false
105 | 
106 | # logging settings
107 | logger:
108 |   print_freq: 100
109 |   save_checkpoint_freq: !!float 25e2
110 |   use_tb_logger: true
111 |   wandb:
112 |     project: ~
113 |     resume_id: ~
114 | 
115 | # dist training settings
116 | dist_params:
117 |   backend: nccl
118 |   port: 29500
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------
/video2frames.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import argparse
 4 | 
 5 | parser = argparse.ArgumentParser(description='code for extracting frames from video')
 6 | 
 7 | parser.add_argument('--input_video', type=str, help='Video path to save result. See default for an e.g.', 
 8 |                                 default='output_videos_wav2lip/1.mp4')
 9 | 
10 | parser.add_argument('--frames_path', type=str, help='Video path to save result. See default for an e.g.', 
11 |                                 default='frames_wav2lip/1/')
12 | 
13 | args = parser.parse_args()
14 | 
15 | # Read the video file
16 | video_path = args.input_video
17 | video = cv2.VideoCapture(video_path)
18 | 
19 | # Get the frames per second (fps) and duration of the video
20 | fps = int(video.get(cv2.CAP_PROP_FPS))
21 | duration = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
22 | 
23 | # Create a folder to store the extracted frames
24 | frame_folder = args.frames_path
25 | os.makedirs(frame_folder, exist_ok=True)
26 | 
27 | # Initialize a counter for the frame index
28 | frame_index = 0
29 | 
30 | # Loop through each frame of the video and save it as an image file
31 | for i in range(duration):
32 |     ret, frame = video.read()
33 |     if not ret:
34 |         break
35 |     # Save the frame as an image file in the frame folder
36 |     frame_file = os.path.join(frame_folder, f'frame_{frame_index:05d}.jpg')
37 |     cv2.imwrite(frame_file, frame)
38 |     frame_index += 1
39 | 
40 | print("Frames extracted and stored at ", args.frames_path)
41 | # Release the video object
42 | video.release()


--------------------------------------------------------------------------------
/wav2lip_models/README.md:
--------------------------------------------------------------------------------
1 | Code in this folder is taken from the Wav2Lip repository.


--------------------------------------------------------------------------------
/wav2lip_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .wav2lip import Wav2Lip, Wav2Lip_disc_qual
2 | from .syncnet import SyncNet_color


--------------------------------------------------------------------------------
/wav2lip_models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/wav2lip_models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/wav2lip_models/__pycache__/conv.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/wav2lip_models/__pycache__/conv.cpython-37.pyc


--------------------------------------------------------------------------------
/wav2lip_models/__pycache__/syncnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/wav2lip_models/__pycache__/syncnet.cpython-37.pyc


--------------------------------------------------------------------------------
/wav2lip_models/__pycache__/wav2lip.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saifhassan/Wav2Lip-HD/e9716e14dc6d74c0957be7a9f3210e29eafea73f/wav2lip_models/__pycache__/wav2lip.cpython-37.pyc


--------------------------------------------------------------------------------
/wav2lip_models/conv.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | class Conv2d(nn.Module):
 6 |     def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 |         self.conv_block = nn.Sequential(
 9 |                             nn.Conv2d(cin, cout, kernel_size, stride, padding),
10 |                             nn.BatchNorm2d(cout)
11 |                             )
12 |         self.act = nn.ReLU()
13 |         self.residual = residual
14 | 
15 |     def forward(self, x):
16 |         out = self.conv_block(x)
17 |         if self.residual:
18 |             out += x
19 |         return self.act(out)
20 | 
21 | class nonorm_Conv2d(nn.Module):
22 |     def __init__(self, cin, cout, kernel_size, stride, padding, residual=False, *args, **kwargs):
23 |         super().__init__(*args, **kwargs)
24 |         self.conv_block = nn.Sequential(
25 |                             nn.Conv2d(cin, cout, kernel_size, stride, padding),
26 |                             )
27 |         self.act = nn.LeakyReLU(0.01, inplace=True)
28 | 
29 |     def forward(self, x):
30 |         out = self.conv_block(x)
31 |         return self.act(out)
32 | 
33 | class Conv2dTranspose(nn.Module):
34 |     def __init__(self, cin, cout, kernel_size, stride, padding, output_padding=0, *args, **kwargs):
35 |         super().__init__(*args, **kwargs)
36 |         self.conv_block = nn.Sequential(
37 |                             nn.ConvTranspose2d(cin, cout, kernel_size, stride, padding, output_padding),
38 |                             nn.BatchNorm2d(cout)
39 |                             )
40 |         self.act = nn.ReLU()
41 | 
42 |     def forward(self, x):
43 |         out = self.conv_block(x)
44 |         return self.act(out)
45 | 


--------------------------------------------------------------------------------
/wav2lip_models/syncnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from .conv import Conv2d
 6 | 
 7 | class SyncNet_color(nn.Module):
 8 |     def __init__(self):
 9 |         super(SyncNet_color, self).__init__()
10 | 
11 |         self.face_encoder = nn.Sequential(
12 |             Conv2d(15, 32, kernel_size=(7, 7), stride=1, padding=3),
13 | 
14 |             Conv2d(32, 64, kernel_size=5, stride=(1, 2), padding=1),
15 |             Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
16 |             Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
17 | 
18 |             Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
19 |             Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
20 |             Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
21 |             Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
22 | 
23 |             Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
24 |             Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True),
25 |             Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True),
26 | 
27 |             Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
28 |             Conv2d(512, 512, kernel_size=3, stride=1, padding=1, residual=True),
29 |             Conv2d(512, 512, kernel_size=3, stride=1, padding=1, residual=True),
30 | 
31 |             Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
32 |             Conv2d(512, 512, kernel_size=3, stride=1, padding=0),
33 |             Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
34 | 
35 |         self.audio_encoder = nn.Sequential(
36 |             Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
37 |             Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True),
38 |             Conv2d(32, 32, kernel_size=3, stride=1, padding=1, residual=True),
39 | 
40 |             Conv2d(32, 64, kernel_size=3, stride=(3, 1), padding=1),
41 |             Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
42 |             Conv2d(64, 64, kernel_size=3, stride=1, padding=1, residual=True),
43 | 
44 |             Conv2d(64, 128, kernel_size=3, stride=3, padding=1),
45 |             Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
46 |             Conv2d(128, 128, kernel_size=3, stride=1, padding=1, residual=True),
47 | 
48 |             Conv2d(128, 256, kernel_size=3, stride=(3, 2), padding=1),
49 |             Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True),
50 |             Conv2d(256, 256, kernel_size=3, stride=1, padding=1, residual=True),
51 | 
52 |             Conv2d(256, 512, kernel_size=3, stride=1, padding=0),
53 |             Conv2d(512, 512, kernel_size=1, stride=1, padding=0),)
54 | 
55 |     def forward(self, audio_sequences, face_sequences): # audio_sequences := (B, dim, T)
56 |         face_embedding = self.face_encoder(face_sequences)
57 |         audio_embedding = self.audio_encoder(audio_sequences)
58 | 
59 |         audio_embedding = audio_embedding.view(audio_embedding.size(0), -1)
60 |         face_embedding = face_embedding.view(face_embedding.size(0), -1)
61 | 
62 |         audio_embedding = F.normalize(audio_embedding, p=2, dim=1)
63 |         face_embedding = F.normalize(face_embedding, p=2, dim=1)
64 | 
65 | 
66 |         return audio_embedding, face_embedding
67 | 


--------------------------------------------------------------------------------