├── .DS_Store
├── 1.jpeg
├── README.md
├── README_en.md
├── README_tts_f2f.MD
├── app.py
├── check_env
    └── check_onnx_cuda.py
├── config
    └── config.ini
├── download.sh
├── example
    ├── audio.wav
    └── video.mp4
├── face_attr_detect
    ├── .DS_Store
    ├── __init__.py
    └── face_attr.cpython-38-x86_64-linux-gnu.so
├── face_detect_utils
    ├── __init__.py
    ├── face_detect.cpython-38-x86_64-linux-gnu.so
    ├── head_pose.cpython-38-x86_64-linux-gnu.so
    └── scrfd.cpython-38-x86_64-linux-gnu.so
├── face_lib
    ├── __init__.py
    ├── face_detect_and_align
    │   ├── __init__.py
    │   ├── face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so
    │   ├── face_align_utils.cpython-38-x86_64-linux-gnu.so
    │   └── scrfd_insightface
    │   │   ├── __init__.py
    │   │   └── scrfd.cpython-38-x86_64-linux-gnu.so
    ├── face_parsing
    │   ├── __init__.py
    │   └── face_parsing_api.cpython-38-x86_64-linux-gnu.so
    └── face_restore
    │   ├── __init__.py
    │   └── gfpgan_onnx
    │       └── gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so
├── h_utils
    ├── __init__.py
    ├── custom.cpython-38-x86_64-linux-gnu.so
    ├── obs_client.cpython-38-x86_64-linux-gnu.so
    ├── request_utils.cpython-38-x86_64-linux-gnu.so
    ├── sweep_bot.cpython-38-x86_64-linux-gnu.so
    └── zip_utils.cpython-38-x86_64-linux-gnu.so
├── inference_from_text.sh
├── landmark2face_wy
    ├── audio_handler.cpython-38-x86_64-linux-gnu.so
    ├── checkpoints
    │   └── test
    │   │   └── opt.txt
    ├── data
    │   ├── Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so
    │   ├── Facereala3dmmexp512_dataset.py
    │   ├── Facereala3dmmexpwenet512_dataset.py
    │   ├── __init__.py
    │   ├── base_dataset.cpython-38-x86_64-linux-gnu.so
    │   ├── image_folder.cpython-38-x86_64-linux-gnu.so
    │   ├── l2faceaudio512_dataset.py
    │   └── l2faceaudio_dataset.py
    ├── digitalhuman_interface.cpython-38-x86_64-linux-gnu.so
    ├── loss
    │   ├── __init__.py
    │   └── perceptual.cpython-38-x86_64-linux-gnu.so
    ├── models
    │   ├── DINet.cpython-38-x86_64-linux-gnu.so
    │   ├── __init__.py
    │   ├── base_function.cpython-38-x86_64-linux-gnu.so
    │   ├── base_model.cpython-38-x86_64-linux-gnu.so
    │   ├── face3d2face_model.cpython-38-x86_64-linux-gnu.so
    │   ├── face_model.cpython-38-x86_64-linux-gnu.so
    │   ├── l2faceaudio_model.cpython-38-x86_64-linux-gnu.so
    │   ├── networks.cpython-38-x86_64-linux-gnu.so
    │   ├── networks_HD.cpython-38-x86_64-linux-gnu.so
    │   ├── networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so
    │   ├── pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so
    │   └── pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so
    ├── options
    │   ├── __init__.py
    │   ├── base_options.cpython-38-x86_64-linux-gnu.so
    │   ├── test_options.cpython-38-x86_64-linux-gnu.so
    │   └── train_options.cpython-38-x86_64-linux-gnu.so
    ├── sync_batchnorm
    │   ├── __init__.py
    │   ├── batchnorm.cpython-38-x86_64-linux-gnu.so
    │   ├── batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so
    │   ├── comm.cpython-38-x86_64-linux-gnu.so
    │   ├── replicate.cpython-38-x86_64-linux-gnu.so
    │   └── unittest.cpython-38-x86_64-linux-gnu.so
    ├── test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so
    ├── test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so
    └── util
    │   ├── __init__.py
    │   ├── flow_util.cpython-38-x86_64-linux-gnu.so
    │   ├── get_data.cpython-38-x86_64-linux-gnu.so
    │   ├── html.cpython-38-x86_64-linux-gnu.so
    │   ├── image_pool.cpython-38-x86_64-linux-gnu.so
    │   ├── util.cpython-38-x86_64-linux-gnu.so
    │   └── visualizer.cpython-38-x86_64-linux-gnu.so
├── license.txt
├── log
    └── dh.log
├── model_lib
    ├── __init__.py
    ├── base_wrapper
    │   ├── __init__.py
    │   └── onnx_model.cpython-38-x86_64-linux-gnu.so
    └── model_base.py
├── preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so
├── requirements.txt
├── requirements_0.txt
├── run.py
├── service
    ├── __init__.py
    ├── server.cpython-38-x86_64-linux-gnu.so
    └── trans_dh_service.cpython-38-x86_64-linux-gnu.so
├── sources.list
├── wenet
    ├── compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so
    ├── examples
    │   └── aishell
    │   │   └── aidata
    │   │       └── conf
    │   │           ├── train_conformer_multi_cn.yaml
    │   │           └── train_conformer_multi_cn_linear.yaml
    ├── tools
    │   └── _extract_feats.py
    ├── transformer
    │   ├── __init__.py
    │   ├── asr_model.cpython-38-x86_64-linux-gnu.so
    │   ├── attention.cpython-38-x86_64-linux-gnu.so
    │   ├── cmvn.cpython-38-x86_64-linux-gnu.so
    │   ├── convolution.cpython-38-x86_64-linux-gnu.so
    │   ├── ctc.cpython-38-x86_64-linux-gnu.so
    │   ├── decoder.cpython-38-x86_64-linux-gnu.so
    │   ├── decoder_layer.cpython-38-x86_64-linux-gnu.so
    │   ├── embedding.cpython-38-x86_64-linux-gnu.so
    │   ├── encoder.cpython-38-x86_64-linux-gnu.so
    │   ├── encoder_layer.cpython-38-x86_64-linux-gnu.so
    │   ├── label_smoothing_loss.cpython-38-x86_64-linux-gnu.so
    │   ├── positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so
    │   ├── subsampling.cpython-38-x86_64-linux-gnu.so
    │   └── swish.cpython-38-x86_64-linux-gnu.so
    └── utils
    │   ├── checkpoint.cpython-38-x86_64-linux-gnu.so
    │   ├── cmvn.py
    │   ├── common.cpython-38-x86_64-linux-gnu.so
    │   ├── ctc_util.cpython-38-x86_64-linux-gnu.so
    │   ├── executor.cpython-38-x86_64-linux-gnu.so
    │   ├── mask.cpython-38-x86_64-linux-gnu.so
    │   └── scheduler.cpython-38-x86_64-linux-gnu.so
├── xseg
    └── dfl_xseg_api.cpython-38-x86_64-linux-gnu.so
└── y_utils
    ├── __init__.py
    ├── config.cpython-38-x86_64-linux-gnu.so
    ├── lcr.cpython-38-x86_64-linux-gnu.so
    ├── liblcr.so
    ├── logger.cpython-38-x86_64-linux-gnu.so
    ├── md5.cpython-38-x86_64-linux-gnu.so
    ├── time_utils.cpython-38-x86_64-linux-gnu.so
    └── tools.cpython-38-x86_64-linux-gnu.so


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/.DS_Store


--------------------------------------------------------------------------------
/1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/1.jpeg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
  3 | ![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
  4 | ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
  5 | 
  6 | **[中文](#chinese-version)** | **[English](README_en.md)**  
  7 | 
  8 | ---
  9 | 
 10 | 
 11 | <a name="chinese-version"></a>
 12 | 
 13 | # HeyGem-Linux-Python-Hack
 14 | 
 15 | ## 项目简介
 16 | 
 17 | HeyGem-Linux-Python-Hack 是一个基于 Python 的数字人项目，它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来，它能够直接在 Linux 系统上运行，摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。   
 18 | 
 19 | [RTX 50版本已经发布，点击可达](https://github.com/Holasyb918/HeyGem-Linux-Python-Hack-RTX-50)  
 20 | [Text To Face] 如果你需要较为完整的 HeyGem，即从 TTS 到数字人，那么你可以参考 [这里](README_tts_f2f.MD)
 21 | 
 22 | **如果你觉得这个项目对你有帮助，欢迎给我们 Star！**  
 23 | **如果运行过程中遇到问题，在查阅已有 Issue 后，在查阅 Google/baidu/ai 后，欢迎提交 Issues！**  
 24 | **本项目中，所有 .so 文件均由硅基编译，与开发者无关**  
 25 | **本项目中，所有模型均由硅基提供，与开发者无关**  
 26 | 
 27 | ## 主要特性
 28 | 
 29 | * 无需 Docker: 直接在 Linux 系统上运行，简化部署流程。
 30 | * 无需 Windows: 完全基于 Linux 开发和测试。
 31 | * Python 驱动: 使用 Python 语言开发，易于理解和扩展。
 32 | * 开发者友好: 易于使用和扩展。
 33 | * 完全离线。  
 34 | 
 35 | 微信群  
 36 | ![](./1.jpeg)
 37 | 
 38 | ## 开始使用
 39 | 
 40 | ### 安装
 41 | #### 环境 
 42 | 本项目**支持且仅支持 Linux & python3.8 环境**  
 43 | 请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后，使用 pip 安装项目依赖项  
 44 | **备用** 同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt)，遇到问题的话，你可以参考它来建立一个新的环境。  
 45 | **具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合，否则仍旧可能遇到问题。**  
 46 | **请尽量不要询问任何关于 pip 的问题，感谢合作**
 47 | **如果你遇到了环境难以搭建完成的问题，建议参考 [autodl 环境](https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/issues/43), 备注: 开发者与 autodl 无任何利益相关**
 48 | 
 49 | 
 50 | ```bash
 51 | # 直接安装整个 requirements.txt 不一定成功，更建议跑代码观察报错信息，然后根据报错信息结合 requirements 去尝试安装，祝你顺利。
 52 | # pip install -r requirements.txt
 53 | ```
 54 | 
 55 | ### 使用
 56 | 把项目克隆到本地
 57 | ```bash
 58 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
 59 | cd HeyGem-Linux-Python-Hack
 60 | bash download.sh
 61 | ```
 62 | #### 开始使用  
 63 | * repo 中已提供可以用于 demo 的音视频样例，代码可以直接运行。  
 64 | #### command:  
 65 | ```bash
 66 | python run.py 
 67 | ```  
 68 | 
 69 | * 如果要使用自己的数据，可以外部传入参数，请注意，**path 是本地文件，且仅支持相对路径**.  
 70 | 
 71 | #### command:  
 72 | ```bash
 73 | python run.py --audio_path example/audio.wav --video_path example/video.mp4
 74 | ```  
 75 | #### gradio:  
 76 | ```bash
 77 | python app.py
 78 | # 请等待模型初始化完成后提交任务
 79 | ```
 80 | 
 81 | ## QA
 82 | ### 1. 多个人脸报错  
 83 | 下载新的人脸检测模型，替换原本的人脸检测模型或许可以解决。
 84 | ```bash
 85 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_10g_kps.onnx
 86 | mv face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx.bak
 87 | mv scrfd_10g_kps.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
 88 | ```
 89 | ### 2. 初始化报错  
 90 | 
 91 | 有较高概率是 onnxruntime-gpu 版本不匹配导致的。  
 92 | ```bash
 93 | python check_env/check_onnx_cuda.py
 94 | ```
 95 | 观察输出是否包括 successfully.  
 96 | 如果遇到问题，你可以尝试以下方法：
 97 | 1. 建议根据自己 cuda 等环境尝试更换一些版本。  
 98 | 2. 如果难以解决，先卸载 onnxruntime-gpu 和 onnxruntime，然后使用 conda 安装 cudatoolkit 环境，然后再尝试 pip 安装 onnxruntime-gpu。    
 99 | 
100 |     验证可行版本如下：  
101 |     | cudatoolkit | onnxruntime-gpu | 备注 |
102 |     | --- | --- | --- |
103 |     | 11.8.0 | 1.16.0 |  |
104 | 
105 | ### 3. ImportError: cannot import name check_argument_types  
106 | 缺包
107 | ```bash
108 | pip install typeguard
109 | ```
110 |   
111 | ### 4. library.so 找不到  
112 | 报错一般是类似于 Could not load library libcublasLt.so.11. Error: libcublasLt.so.11: cannot open shared object file: No such file or directory  
113 | 
114 | 执行以下命令查看是否有改文件  
115 | ```
116 | sudo find /usr -name "libcublasLt.so.11"  
117 | ```
118 | 没有的话，应该需要安装对应版本的cuda  
119 | 如果有的话就把第一步查看的文件路径添加到环境变量  
120 | ```
121 | export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
122 | ```
123 | 永久生效就添加到 ~/.bashrc 里面然后 source ~/.bashrc 一下  
124 | 
125 | ## Contributing  
126 | 欢迎贡献！
127 | 
128 | ## License
129 | 参考 heyGem.ai 的协议.
130 | 


--------------------------------------------------------------------------------
/README_en.md:
--------------------------------------------------------------------------------
 1 | 
 2 | [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
 3 | ![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
 4 | ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
 5 | 
 6 | **[中文](./readme.md)** | **[English](#english-version)**
 7 | 
 8 | ---
 9 | 
10 | <a name="english-version"></a>
11 | 
12 | # HeyGem-Linux-Python-Hack
13 | 
14 | ## Introduction
15 | 
16 | [HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution.
17 | 
18 | **Feel free to Star us if you find this project useful!**  
19 | **Please submit an Issue if you run into any problems!**
20 | 
21 | ## Key Features
22 | 
23 | * No Docker Required: Runs directly on Linux systems, simplifying the deployment process.
24 | * No Windows Required: Fully developed and tested on Linux.
25 | * Python Powered: Developed using the Python language, making it easy to understand and extend.
26 | * Developer-Friendly: Easy to use, and easy to extend.
27 | 
28 | ## Getting Started
29 | 
30 | ### Installation
31 | 
32 | Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip:
33 | 
34 | ```bash
35 | pip install -r requirements.txt
36 | ```
37 | 
38 | ### Usage
39 | Clone this repository to your local machine:
40 | ```bash
41 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
42 | cd HeyGem-Linux-Python-Hack
43 | bash download.sh
44 | ```
45 | #### Getting Started
46 | * Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly.
47 | #### Command:
48 | ```bash
49 | python run.py
50 | ```
51 | * If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.**
52 | #### command:  
53 | ```bash
54 | python run.py --audio_path example/audio.wav --video_path example/video.mp4
55 | ```  
56 | #### gradio:  
57 | ```bash
58 | python app.py
59 | # Please wait until processor init done.
60 | ```
61 | 
62 | ## Contributing  
63 | Contributions are welcome! 
64 | 
65 | ## License
66 | This project is licensed under the HeyGem.ai License.
67 | 


--------------------------------------------------------------------------------
/README_tts_f2f.MD:
--------------------------------------------------------------------------------
  1 | 
  2 | [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
  3 | ![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
  4 | ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
  5 | 
  6 | **[中文](#chinese-version)** | **[English](README_en.md)**
  7 | 
  8 | ---
  9 | 
 10 | <a name="chinese-version"></a>
 11 | 
 12 | # HeyGem-Linux-Python-Hack
 13 | 
 14 | ## 项目简介
 15 | 
 16 | [HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目，它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来，它能够直接在 Linux 系统上运行，摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
 17 | 
 18 | **如果你觉得这个项目对你有帮助，欢迎给我们 Star！**  
 19 | **如果运行过程中遇到问题，在查阅已有 Issue 后，在查阅 Google/baidu/ai 后，欢迎提交 Issues！**
 20 | 
 21 | ## 主要特性
 22 | 
 23 | * 无需 Docker: 直接在 Linux 系统上运行，简化部署流程。
 24 | * 无需 Windows: 完全基于 Linux 开发和测试。
 25 | * Python 驱动: 使用 Python 语言开发，易于理解和扩展。
 26 | * 开发者友好: 易于使用和扩展。
 27 | * 完全离线。  
 28 | 
 29 | ## 开始使用
 30 | 
 31 | ### 环境
 32 | 本项目包括 tts 和 face2face 两部分
 33 | * tts 部分支持 3.8，事实上有更高版本更好；
 34 | * face2face 部分支持且仅支持 3.8。
 35 | 
 36 | 
 37 | ### 使用
 38 | 把项目克隆到本地
 39 | ```bash
 40 | # f2f
 41 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
 42 | cd HeyGem-Linux-Python-Hack
 43 | # 下载 f2f 模型
 44 | bash download.sh
 45 | 
 46 | # tts
 47 | git clone https://github.com/Holasyb918/tts-fish-speech
 48 | cd tts-fish-speech
 49 | # 下载 tts 模型
 50 | huggingface-cli download fishaudio/fish-speech-1.5 --local-dir checkpoints/fish-speech-1.5/
 51 | ```
 52 | 
 53 | ### 安装环境
 54 | 请参考 [requirements.txt](https://github.com/Holasyb918/tts-fish-speech/blob/main/requirements.txt) 并结合你的实际环境来搭建环境，如果单个环境难以满足，tts 可以使用常规的环境，不要求 3.8，但你可能需要分步完成从 text 到数字人的整个流程。
 55 | 
 56 | #### 开始使用  
 57 | * repo 中已提供可以用于 demo 的音视频样例，代码可以直接运行。  
 58 | 把你需要生成的文本放在 [example/text.txt](example/text.txt) 中，把要克隆的音色放在 [example/audio.wav](example/audio.wav) 中，然后运行以下命令：
 59 | #### command:  
 60 | ```bash
 61 | bash inference_from_text.sh example/audio.wav example/text.txt example/video.mp4
 62 | #             音色 wav           TTS 文本            视频
 63 | ```  
 64 | 
 65 | 
 66 | ## QA
 67 | ### 1. 多个人脸报错  
 68 | 下载新的人脸检测模型，替换原本的人脸检测模型或许可以解决。
 69 | ```bash
 70 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_10g_kps.onnx
 71 | mv face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx.bak
 72 | mv scrfd_10g_kps.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
 73 | ```
 74 | ### 2. 初始化报错  
 75 | 
 76 | 有较高概率是 onnxruntime-gpu 版本不匹配导致的。  
 77 | ```bash
 78 | python check_env/check_onnx_cuda.py
 79 | ```
 80 | 观察输出是否包括 successfully.  
 81 | 如果遇到问题，你可以尝试以下方法：
 82 | 1. 建议根据自己 cuda 等环境尝试更换一些版本。  
 83 | 2. 如果难以解决，先卸载 onnxruntime-gpu 和 onnxruntime，然后使用 conda 安装 cudatoolkit 环境，然后再尝试 pip 安装 onnxruntime-gpu。    
 84 | 
 85 |     验证可行版本如下：  
 86 |     | cudatoolkit | onnxruntime-gpu | 备注 |
 87 |     | --- | --- | --- |
 88 |     | 11.8.0 | 1.16.0 |  |
 89 | 
 90 | ### 3. ImportError: cannot import name check_argument_types  
 91 | 缺包
 92 | ```bash
 93 | pip install typeguard
 94 | ```
 95 | 
 96 | ## Contributing  
 97 | 欢迎贡献！
 98 | 
 99 | ## License
100 | 参考 heyGem.ai 的协议.
101 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gc
  3 | import json
  4 | import os
  5 | 
  6 | os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
  7 | import subprocess
  8 | import threading
  9 | import time
 10 | import traceback
 11 | import uuid
 12 | from enum import Enum
 13 | import queue
 14 | import shutil
 15 | from functools import partial
 16 | 
 17 | import cv2
 18 | import gradio as gr
 19 | from flask import Flask, request
 20 | 
 21 | import service.trans_dh_service
 22 | from h_utils.custom import CustomError
 23 | from y_utils.config import GlobalConfig
 24 | from y_utils.logger import logger
 25 | 
 26 | 
 27 | def write_video_gradio(
 28 |     output_imgs_queue,
 29 |     temp_dir,
 30 |     result_dir,
 31 |     work_id,
 32 |     audio_path,
 33 |     result_queue,
 34 |     width,
 35 |     height,
 36 |     fps,
 37 |     watermark_switch=0,
 38 |     digital_auth=0,
 39 |     temp_queue=None,
 40 | ):
 41 |     output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
 42 |     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
 43 |     result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
 44 |     video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
 45 |     print("Custom VideoWriter init done")
 46 |     try:
 47 |         while True:
 48 |             state, reason, value_ = output_imgs_queue.get()
 49 |             if type(state) == bool and state == True:
 50 |                 logger.info(
 51 |                     "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
 52 |                 )
 53 |                 logger.info(
 54 |                     "Custom VideoWriter Silence Video saved in {}".format(
 55 |                         os.path.realpath(output_mp4)
 56 |                     )
 57 |                 )
 58 |                 video_write.release()
 59 |                 break
 60 |             else:
 61 |                 if type(state) == bool and state == False:
 62 |                     logger.error(
 63 |                         "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
 64 |                             work_id, reason
 65 |                         )
 66 |                     )
 67 |                     raise CustomError(reason)
 68 |                 for result_img in value_:
 69 |                     video_write.write(result_img)
 70 |         if video_write is not None:
 71 |             video_write.release()
 72 |         if watermark_switch == 1 and digital_auth == 1:
 73 |             logger.info(
 74 |                 "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
 75 |             )
 76 |             if width > height:
 77 |                 command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
 78 |                     audio_path,
 79 |                     output_mp4,
 80 |                     GlobalConfig.instance().watermark_path,
 81 |                     GlobalConfig.instance().digital_auth_path,
 82 |                     result_path,
 83 |                 )
 84 |                 logger.info("command:{}".format(command))
 85 |             else:
 86 |                 command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
 87 |                     audio_path,
 88 |                     output_mp4,
 89 |                     GlobalConfig.instance().watermark_path,
 90 |                     GlobalConfig.instance().digital_auth_path,
 91 |                     result_path,
 92 |                 )
 93 |                 logger.info("command:{}".format(command))
 94 |         elif watermark_switch == 1 and digital_auth == 0:
 95 |             logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
 96 |             command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
 97 |                 audio_path,
 98 |                 output_mp4,
 99 |                 GlobalConfig.instance().watermark_path,
100 |                 result_path,
101 |             )
102 |             logger.info("command:{}".format(command))
103 |         elif watermark_switch == 0 and digital_auth == 1:
104 |             logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
105 |             if width > height:
106 |                 command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
107 |                     audio_path,
108 |                     output_mp4,
109 |                     GlobalConfig.instance().digital_auth_path,
110 |                     result_path,
111 |                 )
112 |                 logger.info("command:{}".format(command))
113 |             else:
114 |                 command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
115 |                     audio_path,
116 |                     output_mp4,
117 |                     GlobalConfig.instance().digital_auth_path,
118 |                     result_path,
119 |                 )
120 |                 logger.info("command:{}".format(command))
121 |         else:
122 |             command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
123 |                 audio_path, output_mp4, result_path
124 |             )
125 |             logger.info("Custom command:{}".format(command))
126 |         subprocess.call(command, shell=True)
127 |         print("###### Custom Video Writer write over")
128 |         print(f"###### Video result saved in {os.path.realpath(result_path)}")
129 |         result_queue.put([True, result_path])
130 |         # temp_queue.put([True, result_path])
131 |     except Exception as e:
132 |         logger.error(
133 |             "Custom VideoWriter [{}]视频帧队列处理异常结束，异常原因:[{}]".format(
134 |                 work_id, e.__str__()
135 |             )
136 |         )
137 |         result_queue.put(
138 |             [
139 |                 False,
140 |                 "[{}]视频帧队列处理异常结束，异常原因:[{}]".format(
141 |                     work_id, e.__str__()
142 |                 ),
143 |             ]
144 |         )
145 |     logger.info("Custom VideoWriter 后处理进程结束")
146 | 
147 | 
148 | service.trans_dh_service.write_video = write_video_gradio
149 | 
150 | 
151 | class VideoProcessor:
152 |     def __init__(self):
153 |         self.task = service.trans_dh_service.TransDhTask()
154 |         self.basedir = GlobalConfig.instance().result_dir
155 |         self.is_initialized = False
156 |         self._initialize_service()
157 |         print("VideoProcessor init done")
158 | 
159 |     def _initialize_service(self):
160 |         logger.info("开始初始化 trans_dh_service...")
161 |         try:
162 |             time.sleep(5)
163 |             logger.info("trans_dh_service 初始化完成。")
164 |             self.is_initialized = True
165 |         except Exception as e:
166 |             logger.error(f"初始化 trans_dh_service 失败: {e}")
167 | 
168 |     def process_video(
169 |         self, audio_file, video_file, watermark=False, digital_auth=False
170 |     ):
171 |         while not self.is_initialized:
172 |             logger.info("服务尚未完成初始化，等待 1 秒...")
173 |             time.sleep(1)
174 |         work_id = str(uuid.uuid1())
175 |         code = work_id
176 |         temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id)
177 |         result_dir = GlobalConfig.instance().result_dir
178 |         video_writer_thread = None
179 |         final_result = None
180 | 
181 |         try:
182 |             cap = cv2.VideoCapture(video_file)
183 |             width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
184 |             height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
185 |             fps = cap.get(cv2.CAP_PROP_FPS)
186 |             cap.release()
187 | 
188 |             audio_path = audio_file
189 |             video_path = video_file
190 | 
191 |             self.task.task_dic[code] = ""
192 |             self.task.work(audio_path, video_path, code, 0, 0, 0, 0)
193 | 
194 |             result_path = self.task.task_dic[code][2]
195 |             final_result_dir = os.path.join("result", code)
196 |             os.makedirs(final_result_dir, exist_ok=True)
197 |             os.system(f"mv {result_path} {final_result_dir}")
198 |             os.system(
199 |                 f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}"
200 |             )
201 |             result_path = os.path.realpath(
202 |                 os.path.join(final_result_dir, os.path.basename(result_path))
203 |             )
204 |             return result_path
205 | 
206 |         except Exception as e:
207 |             logger.error(f"处理视频时发生错误: {e}")
208 |             raise gr.Error(str(e))
209 | 
210 | 
211 | if __name__ == "__main__":
212 |     processor = VideoProcessor()
213 | 
214 |     inputs = [
215 |         gr.File(label="上传音频文件/upload audio file"),
216 |         gr.File(label="上传视频文件/upload video file"),
217 |     ]
218 |     outputs = gr.Video(label="生成的视频/Generated video")
219 | 
220 |     title = "数字人视频生成/Digital Human Video Generation"
221 |     description = "上传音频和视频文件，即可生成数字人视频。/Upload audio and video files to generate digital human videos."
222 | 
223 |     demo = gr.Interface(
224 |         fn=processor.process_video,
225 |         inputs=inputs,
226 |         outputs=outputs,
227 |         title=title,
228 |         description=description,
229 |     )
230 |     demo.queue().launch()
231 | 


--------------------------------------------------------------------------------
/check_env/check_onnx_cuda.py:
--------------------------------------------------------------------------------
 1 | import onnxruntime
 2 | import numpy as np
 3 | 
 4 | def check_gpu_usage():
 5 |     """
 6 |     Checks if ONNX Runtime can use the GPU by attempting to create an InferenceSession
 7 |     with the CUDAExecutionProvider.
 8 | 
 9 |     Returns:
10 |         True if GPU is likely being used, False otherwise.
11 |     """
12 |     providers = ("CUDAExecutionProvider",
13 |              {"device_id": 0})
14 |     session_options = onnxruntime.SessionOptions()
15 |     session_options.log_severity_level = 3
16 |     onnx_path = "./face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx"
17 |     onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[providers])
18 |     print(onnx_session.get_providers())
19 |     return "CUDAExecutionProvider" in onnx_session.get_providers(), onnx_session
20 | 
21 | if __name__ == "__main__":
22 |     is_cuda, onnx_session = check_gpu_usage()
23 |     if is_cuda:
24 |         print("ONNX Runtime is successfully using the GPU.")
25 |         inp = np.random.randn(1, 3, 640, 640).astype(np.float32)
26 |         ort_inputs = {onnx_session.get_inputs()[0].name: inp}
27 |         ort_outs = onnx_session.run(None, ort_inputs)
28 |         print(ort_outs[0].shape)
29 |     else:
30 |         print("ONNX Runtime is NOT using the GPU or there was an error initializing the CUDA provider.")
31 |         print("Please ensure that:")
32 |         print("- You have installed the 'onnxruntime-gpu' package.")
33 |         print("- You have a compatible NVIDIA GPU with appropriate drivers installed.")
34 |         print("- CUDA and cuDNN are installed and correctly configured in your system.")
35 |         print("- The versions of CUDA, cuDNN, and the NVIDIA drivers are compatible with the 'onnxruntime-gpu' version you have installed.")
36 |         print("- The ONNX Runtime build you are using supports CUDA.")
37 |         


--------------------------------------------------------------------------------
/config/config.ini:
--------------------------------------------------------------------------------
 1 | [log]
 2 | log_dir = ./log
 3 | log_file = dh.log
 4 | 
 5 | [http_server]
 6 | server_ip = 0.0.0.0
 7 | server_port = 8383
 8 | 
 9 | [temp]
10 | temp_dir = ./
11 | clean_switch = 1
12 | 
13 | [result]
14 | result_dir = ./result
15 | clean_switch = 0
16 | 
17 | [digital]
18 | batch_size = 4
19 | 
20 | [register]
21 | url = http://172.16.160.51:12120
22 | report_interval = 10
23 | enable=0
24 | 


--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | set -u
 3 | 
 4 | # face attr
 5 | mkdir -p face_attr_detect
 6 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx
 7 | 
 8 | # face detect
 9 | mkdir -p face_detect_utils/resources
10 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
11 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
12 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx
13 | 
14 | # dh model
15 | mkdir -p landmark2face_wy/checkpoints/anylang
16 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
17 | 
18 | # face parsing
19 | mkdir -p pretrain_models/face_lib/face_parsing
20 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx
21 | 
22 | # gfpgan
23 | mkdir -p pretrain_models/face_lib/face_restore/gfpgan
24 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
25 | 
26 | # xseg
27 | mkdir -p xseg
28 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx
29 | 
30 | # wenet
31 | mkdir -p wenet/examples/aishell/aidata/exp/conformer
32 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt


--------------------------------------------------------------------------------
/example/audio.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/example/audio.wav


--------------------------------------------------------------------------------
/example/video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/example/video.mp4


--------------------------------------------------------------------------------
/face_attr_detect/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_attr_detect/.DS_Store


--------------------------------------------------------------------------------
/face_attr_detect/__init__.py:
--------------------------------------------------------------------------------
1 | from .face_attr import FaceAttr
2 | 


--------------------------------------------------------------------------------
/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_detect_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/__init__.py


--------------------------------------------------------------------------------
/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/__init__.py


--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .face_align_5_landmarks import FaceDetect5Landmarks
2 | from .face_align_utils import estimate_norm
3 | 
4 | 


--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/scrfd_insightface/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2021/11/10
3 | 
4 | 
5 | from .scrfd import SCRFD


--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_lib/face_parsing/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 | 
4 | 
5 | from .face_parsing_api import FaceParsing
6 | # from .dfl_xseg_net import XsegNet
7 | 


--------------------------------------------------------------------------------
/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/face_lib/face_restore/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN
3 | 


--------------------------------------------------------------------------------
/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/h_utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/user/bin/env python
2 | # coding=utf-8
3 | """
4 | @project : dhp-service
5 | @author  : huyi
6 | @file   : __init__.py.py
7 | @ide    : PyCharm
8 | @time   : 2021-08-18 15:45:13
9 | """


--------------------------------------------------------------------------------
/h_utils/custom.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/custom.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/inference_from_text.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | set -u
 3 | 
 4 | ref_audio=$1
 5 | text_path=$2
 6 | ref_mp4=$3
 7 | 
 8 | pwd=$(pwd)
 9 | echo "ref_audio: ${ref_audio}"
10 | echo "text_path: ${text_path}"
11 | echo "ref_mp4: ${ref_mp4}"
12 | echo "pwd: ${pwd}"
13 | 
14 | real_ref_audio=$(realpath ${ref_audio})
15 | real_text_path=$(realpath ${text_path})
16 | real_ref_mp4=$(realpath ${ref_mp4})
17 | 
18 | echo "real_ref_audio: ${real_ref_audio}"
19 | echo "real_text_path: ${real_text_path}"
20 | echo "real_ref_mp4: ${real_ref_mp4}"
21 | 
22 | # tts
23 | cd tts-fish-speech
24 | echo bash run.sh ${real_ref_audio} ${real_text_path}
25 | bash run.sh ${real_ref_audio} ${real_text_path}
26 | 
27 | # f2f
28 | cd ${pwd}
29 | mv tts-fish-speech/fake.wav example/fake.wav
30 | 
31 | python run.py --audio_path example/fake.wav --video_path ${ref_mp4}
32 | 


--------------------------------------------------------------------------------
/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/checkpoints/test/opt.txt:
--------------------------------------------------------------------------------
 1 | ----------------- Options ---------------
 2 |              aspect_ratio: 1.0                           
 3 |             audio_feature: 3dmm                          
 4 |                batch_size: 16                            
 5 |           checkpoints_dir: ./landmark2face_wy/checkpoints
 6 |                 crop_size: 256                           
 7 |                  dataroot: ./data                        
 8 |              dataset_mode: Facereala3dmm                 
 9 |                 direction: AtoB                          
10 |           display_winsize: 256                           
11 |               distributed: False                         
12 |                     epoch: latest                        
13 |                      eval: False                         
14 |                  feat_num: 3                             
15 |              feature_path: ../AnnI_deep3dface_256_contains_id/
16 |                      fp16: False                         
17 |                   gpu_ids: 0                             
18 |                  img_size: 256                           
19 |                 init_gain: 0.02                          
20 |                 init_type: normal                        
21 |                  input_nc: 3                             
22 |             instance_feat: False                         
23 |                   isTrain: False                         	[default: None]
24 |                label_feat: False                         
25 |                  lan_size: 1                             
26 |             load_features: False                         
27 |                 load_iter: 0                             	[default: 0]
28 |                 load_size: 286                           
29 |                local_rank: -1                            
30 |          max_dataset_size: inf                           
31 |                mfcc0_rate: 0.2                           
32 |                     model: pirender_3dmm_mouth_hd        
33 |                model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
34 |                  n_blocks: 9                             
35 |           n_blocks_global: 9                             
36 |            n_blocks_local: 3                             
37 |                n_clusters: 10                            
38 |            n_downsample_E: 4                             
39 |       n_downsample_global: 4                             
40 |                n_layers_D: 3                             
41 |         n_local_enhancers: 1                             
42 |                      name: test                          
43 |                       ndf: 64                            
44 |                       nef: 16                            
45 |                      netD: basic                         
46 |                      netG: pirender                      
47 |                       ngf: 64                            
48 |          niter_fix_global: 0                             
49 |                no_dropout: True                          
50 |                   no_flip: False                         
51 |           no_ganFeat_loss: False                         
52 |               no_instance: False                         
53 |                      norm: instance                      
54 |                     ntest: inf                           
55 |                     num_D: 2                             
56 |                  num_test: 50                            
57 |               num_threads: 4                             
58 |                 output_nc: 3                             
59 |         perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
60 |        perceptual_network: vgg19                         
61 |     perceptual_num_scales: 4                             
62 | perceptual_use_style_loss: True                          
63 |        perceptual_weights: [4, 4, 4, 4, 4]               
64 |                     phase: test                          
65 |                preprocess: resize_and_crop               
66 |               resize_size: 512                           
67 |               results_dir: ./results/                    
68 |            serial_batches: False                         
69 |                    suffix:                               
70 |           test_audio_path: None                          
71 |                test_muban: None                          
72 |                   verbose: False                         
73 | weight_style_to_perceptual: 250                           
74 | ----------------- End -------------------
75 | 


--------------------------------------------------------------------------------
/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/data/Facereala3dmmexp512_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import random
  3 | from data.base_dataset import BaseDataset, get_params, get_transform
  4 | import torchvision.transforms as transforms
  5 | from data.image_folder import make_dataset
  6 | from PIL import Image, ImageEnhance
  7 | import numpy as np
  8 | import cv2
  9 | import torch
 10 | import time
 11 | 
 12 | def get_idts(config_name):
 13 |     idts = list()
 14 |     with open(os.path.join('../config', config_name + '.txt')) as f:
 15 |         for line in f:
 16 |             line = line.strip()
 17 |             video_name = line.split(':')[0]
 18 |             idts.append(video_name)
 19 |     return idts
 20 | 
 21 | 
 22 | def obtain_seq_index(index, num_frames):
 23 |     seq = list(range(index - 13, index + 13 + 1))
 24 |     seq = [min(max(item, 0), num_frames - 1) for item in seq]
 25 |     return seq
 26 | 
 27 | def get_3dmm_feature(img_path, idx, new_dict):
 28 |     id = img_path.split('/')[-3]
 29 |     features = new_dict[id]
 30 |     idx_list = obtain_seq_index(idx, features.shape[0])
 31 |     feature = features[idx_list, 80:144]
 32 | #    feature[:, -1] = 50
 33 |     return np.transpose(feature, (1, 0))
 34 | 
 35 | 
 36 | 
 37 | class Facereala3dmmexp512Dataset(BaseDataset):
 38 |     def __init__(self, opt, mode=None):
 39 |         BaseDataset.__init__(self, opt)
 40 |         img_size = opt.img_size
 41 |         idts = get_idts(opt.name.split('_')[0])
 42 |         print("---------load data list--------: ", idts)
 43 |         self.new_dict = {}
 44 |         if mode == 'train':
 45 |             self.labels = []
 46 |             self.label_starts = []
 47 |             self.label_ends = []
 48 |             count = 0
 49 |             for idt_name in idts:
 50 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 51 |                 root = os.path.join(opt.feature_path, idt_name)
 52 |                 feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
 53 |                 self.new_dict[idt_name] = feature
 54 |                 if opt.audio_feature == "3dmm":
 55 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 56 |                 else:
 57 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 58 |                 training_data = torch.load(training_data_path)
 59 |                 img_paths = training_data['img_paths']
 60 |                 features_3dmm = training_data['features_3dmm']
 61 |                 index = [i[0].split('/')[-1] for i in img_paths]
 62 | 
 63 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
 64 |                 self.label_starts.append(count)
 65 |                 for img in range(len(index)):
 66 |                     img_path = os.path.join(image_dir, index[img])
 67 |                     # idx_list = obtain_seq_index(img, feature.shape[0])
 68 |                     # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
 69 |                     self.labels.append([img_path, features_3dmm[img]])
 70 |                     count = count + 1
 71 |                 self.label_ends.append(count)
 72 | 
 73 |             self.label_starts = np.array(self.label_starts)
 74 |             self.label_ends = np.array(self.label_ends)
 75 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
 76 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 77 | 
 78 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
 79 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 80 |             self.shuffle()
 81 |         elif mode == 'test':
 82 |             self.labels = []
 83 |             self.label_starts = []
 84 |             self.label_ends = []
 85 |             count = 0
 86 |             for idt_name in idts:
 87 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 88 |                 root = os.path.join(opt.feature_path, idt_name)
 89 |                 feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
 90 |                 self.new_dict[idt_name] = feature
 91 |                 if opt.audio_feature == "3dmm":
 92 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 93 |                 else:
 94 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 95 |                 training_data = torch.load(training_data_path)
 96 |                 img_paths = training_data['img_paths']
 97 |                 features_3dmm = training_data['features_3dmm']
 98 |                 index = [i[0].split('/')[-1] for i in img_paths]
 99 | 
100 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
101 |                 self.label_starts.append(count)
102 |                 for img in range(len(index)):
103 |                     img_path = os.path.join(image_dir, index[img])
104 |                     # idx_list = obtain_seq_index(img, feature.shape[0])
105 |                     # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
106 |                     self.labels.append([img_path, features_3dmm[img]])
107 |                     count = count + 1
108 |                 self.label_ends.append(count)
109 | 
110 |             self.label_starts = np.array(self.label_starts)
111 |             self.label_ends = np.array(self.label_ends)
112 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
113 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
114 | 
115 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
116 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
117 |             self.shuffle()
118 | 
119 |     def shuffle(self):
120 |         self.labels_index = list(range(len(self.labels)))
121 |         random.shuffle(self.labels_index)
122 | 
123 |     def add_mouth_mask2(self, img):
124 |         mask = np.ones_like(img)
125 |         rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
126 |         mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
127 |         x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
128 |         x = np.flip(x, 0)
129 |         y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
130 |         zz1 = -y - x + 88 > 0
131 |         zz2 = np.flip(zz1, 1)
132 |         zz = (zz1 + zz2) > 0
133 |         mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
134 |         imgm = img * mask
135 |         return imgm
136 | 
137 |     def __getitem__(self, index):
138 |         # s1= time.time()
139 |         idx = self.labels_index[index]
140 |         img_path, feature_3dmm_idx= self.labels[idx]
141 |        # print(img_path, feature_3dmm_idx)
142 |         feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict)
143 |         #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
144 | 
145 |         img = np.array(Image.open(img_path).convert('RGB'))
146 |         img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
147 |         cut_pad1 = np.random.randint(0, 20)
148 |         cut_pad2 = np.random.randint(0, 20)
149 |         img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
150 |         # s2 =time.time()
151 |         # print('get data and read data ', s2-s1)
152 |         mask_B = img.copy()
153 |         # mask_end = np.random.randint(236*2, 250*2)
154 |         # index = np.random.randint(80, 90)
155 |         # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
156 |         mask_end = np.random.randint(480, 500)
157 |         index = np.random.randint(15, 30)
158 |         mask_B[index:mask_end, 70:-70] = 0
159 |         img = Image.fromarray(img)
160 | 
161 |         mask_B = Image.fromarray(mask_B)
162 |         img = self.transforms_image(img)
163 |         mask_B = self.transforms_image(mask_B)
164 | 
165 |         x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
166 | 
167 |         audio = torch.tensor(feature_3dmm)
168 |         # s3 = time.time()
169 |         # print('get 3dmm and mask ', s3 - s2)
170 |         # 保证real_A_index不是idx
171 |         max_i = 0
172 |         real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
173 |         while real_A_index == idx:
174 |             max_i += 1
175 |             real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
176 |             if max_i > 5:
177 |                 break
178 | 
179 |         imgA_path, _ = self.labels[real_A_index]
180 |         imgA = np.array(Image.open(imgA_path).convert('RGB'))
181 |         cut_pad1 = np.random.randint(0, 20)
182 |         cut_pad2 = np.random.randint(0, 20)
183 |         imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
184 | 
185 |         ########椭圆##########
186 |         # mask = np.zeros(imgA.shape, dtype=np.uint8)
187 |         # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
188 |         #             (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
189 |         # ROI = cv2.bitwise_and(imgA, mask)
190 |         # imgA = Image.fromarray(ROI)
191 |         #############################
192 |         # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
193 |         imgA = Image.fromarray(imgA)
194 |         imgA = self.transforms_image(imgA)
195 |         # s4 = time.time()
196 |         # print('end time reala ', s4 - s3)
197 |         return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
198 | 
199 |     def __len__(self):
200 |         """Return the total number of images in the dataset."""
201 |         return len(self.labels)
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     from options.train_options import TrainOptions
206 | 
207 |     opt = TrainOptions().parse()
208 |     dataset = Facereala3dmmDataset(opt)
209 |     dataset_size = len(dataset)
210 |     print(dataset_size)
211 |     for i, data in enumerate(dataset):
212 |         print(data)
213 | 


--------------------------------------------------------------------------------
/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import random
  3 | from data.base_dataset import BaseDataset, get_params, get_transform
  4 | import torchvision.transforms as transforms
  5 | from data.image_folder import make_dataset
  6 | from PIL import Image, ImageEnhance
  7 | import numpy as np
  8 | import cv2
  9 | import torch
 10 | import time
 11 | 
 12 | def get_idts(config_name):
 13 |     idts = list()
 14 |     with open(os.path.join('../config', config_name + '.txt')) as f:
 15 |         for line in f:
 16 |             line = line.strip()
 17 |             video_name = line.split(':')[0]
 18 |             idts.append(video_name)
 19 |     return idts
 20 | 
 21 | 
 22 | def obtain_seq_index(index, num_frames):
 23 |     seq = list(range(index - 10, index + 9 + 1))
 24 |     seq = [min(max(item, 0), num_frames - 1) for item in seq]
 25 |     return seq
 26 | 
 27 | def get_3dmm_feature(img_path, idx, audio_feature, new_dict):
 28 |     id = img_path.split('/')[-3]
 29 |     features, features1, features1 = new_dict[id]
 30 |     idx_list = obtain_seq_index(idx, features.shape[0])
 31 |     feature = features[idx_list, 80:144]
 32 |     feature1 = features1[:,audio_feature[0]:audio_feature[1]]
 33 |     feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1)
 34 |     # print(feature.shape)
 35 |     return np.transpose(feature, (1, 0))
 36 |     # return feature
 37 | 
 38 | 
 39 | 
 40 | class Facereala3dmmexpwenet512Dataset(BaseDataset):
 41 |     def __init__(self, opt, mode=None):
 42 |         BaseDataset.__init__(self, opt)
 43 |         img_size = opt.img_size
 44 |         idts = get_idts(opt.name.split('_')[0])
 45 |         print("---------load data list--------: ", idts)
 46 |         self.new_dict = {}
 47 |         if mode == 'train':
 48 |             self.labels = []
 49 |             self.label_starts = []
 50 |             self.label_ends = []
 51 |             count = 0
 52 |             for idt_name in idts:
 53 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 54 |                 root = os.path.join(opt.feature_path, idt_name)
 55 |                 feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
 56 |                 feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy'))
 57 |                 self.new_dict[idt_name] = [feature, feature1, feature1]
 58 |                 if opt.audio_feature == "3dmm":
 59 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 60 |                 else:
 61 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 62 |                 training_data = torch.load(training_data_path)
 63 |                 img_paths = training_data['img_paths']
 64 |                 features_3dmm = training_data['features_3dmm']
 65 |                 audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True)
 66 |                 audio_features = audio_features.tolist()
 67 |                 index = [i[0].split('/')[-1] for i in img_paths]
 68 | 
 69 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
 70 |                 self.label_starts.append(count)
 71 |                 for img in range(len(index)):
 72 |                     img_path = os.path.join(image_dir, index[img])
 73 |                     # idx_list = obtain_seq_index(img, feature.shape[0])
 74 |                     # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
 75 |                     if type(features_3dmm[img]) != int:
 76 |                         print(img_path)
 77 |                     audio_feature = audio_features[img]
 78 |                     self.labels.append([img_path, features_3dmm[img], audio_feature])
 79 |                     count = count + 1
 80 |                 self.label_ends.append(count)
 81 | 
 82 |             self.label_starts = np.array(self.label_starts)
 83 |             self.label_ends = np.array(self.label_ends)
 84 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
 85 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 86 | 
 87 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
 88 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 89 |             self.shuffle()
 90 |         elif mode == 'test':
 91 |             self.labels = []
 92 |             self.label_starts = []
 93 |             self.label_ends = []
 94 |             count = 0
 95 |             for idt_name in idts:
 96 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 97 |                 root = os.path.join(opt.feature_path, idt_name)
 98 |                 feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
 99 |                 self.new_dict[idt_name] = feature
100 |                 if opt.audio_feature == "3dmm":
101 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
102 |                 else:
103 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
104 |                 training_data = torch.load(training_data_path)
105 |                 img_paths = training_data['img_paths']
106 |                 features_3dmm = training_data['features_3dmm']
107 |                 index = [i[0].split('/')[-1] for i in img_paths]
108 | 
109 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
110 |                 self.label_starts.append(count)
111 |                 for img in range(len(index)):
112 |                     img_path = os.path.join(image_dir, index[img])
113 |                     # idx_list = obtain_seq_index(img, feature.shape[0])
114 |                     # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
115 |                     self.labels.append([img_path, features_3dmm[img]])
116 |                     count = count + 1
117 |                 self.label_ends.append(count)
118 | 
119 |             self.label_starts = np.array(self.label_starts)
120 |             self.label_ends = np.array(self.label_ends)
121 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
122 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
123 | 
124 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
125 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
126 |             self.shuffle()
127 | 
128 |     def shuffle(self):
129 |         self.labels_index = list(range(len(self.labels)))
130 |         random.shuffle(self.labels_index)
131 | 
132 |     def add_mouth_mask2(self, img):
133 |         mask = np.ones_like(img)
134 |         rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
135 |         mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
136 |         x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
137 |         x = np.flip(x, 0)
138 |         y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
139 |         zz1 = -y - x + 88 > 0
140 |         zz2 = np.flip(zz1, 1)
141 |         zz = (zz1 + zz2) > 0
142 |         mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
143 |         imgm = img * mask
144 |         return imgm
145 | 
146 |     def __getitem__(self, index):
147 |         # s1= time.time()
148 |         idx = self.labels_index[index]
149 |         img_path, feature_3dmm_idx, audio_feature= self.labels[idx]
150 |        # print(img_path, feature_3dmm_idx)
151 |         feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict)
152 |         #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
153 | 
154 |         img = np.array(Image.open(img_path).convert('RGB'))
155 |         img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
156 |         cut_pad1 = np.random.randint(0, 20)
157 |         cut_pad2 = np.random.randint(0, 20)
158 |         img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
159 |         # s2 =time.time()
160 |         # print('get data and read data ', s2-s1)
161 |         mask_B = img.copy()
162 |         # mask_end = np.random.randint(236*2, 250*2)
163 |         # index = np.random.randint(80, 90)
164 |         # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
165 |         mask_end = np.random.randint(480, 500)
166 |         index = np.random.randint(15, 30)
167 |         # index = np.random.randint(90, 100)
168 |         mask_B[index:mask_end, 70:-70] = 0
169 |         img = Image.fromarray(img)
170 | 
171 |         mask_B = Image.fromarray(mask_B)
172 |         img = self.transforms_image(img)
173 |         mask_B = self.transforms_image(mask_B)
174 | 
175 |         x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
176 | 
177 |         audio = torch.tensor(feature_3dmm)
178 |         # s3 = time.time()
179 |         # print('get 3dmm and mask ', s3 - s2)
180 |         # 保证real_A_index不是idx
181 |         max_i = 0
182 |         real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
183 |         while real_A_index == idx:
184 |             max_i += 1
185 |             real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
186 |             if max_i > 5:
187 |                 break
188 | 
189 |         imgA_path, _, _ = self.labels[real_A_index]
190 |         imgA = np.array(Image.open(imgA_path).convert('RGB'))
191 |         cut_pad1 = np.random.randint(0, 20)
192 |         cut_pad2 = np.random.randint(0, 20)
193 |         imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
194 | 
195 |         ########椭圆##########
196 |         # mask = np.zeros(imgA.shape, dtype=np.uint8)
197 |         # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
198 |         #             (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
199 |         # ROI = cv2.bitwise_and(imgA, mask)
200 |         # imgA = Image.fromarray(ROI)
201 |         #############################
202 |         # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
203 |         imgA = Image.fromarray(imgA)
204 |         imgA = self.transforms_image(imgA)
205 |         # s4 = time.time()
206 |         # print('end time reala ', s4 - s3)
207 |         return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
208 | 
209 |     def __len__(self):
210 |         """Return the total number of images in the dataset."""
211 |         return len(self.labels)
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     from options.train_options import TrainOptions
216 | 
217 |     opt = TrainOptions().parse()
218 |     dataset = Facereala3dmmDataset(opt)
219 |     dataset_size = len(dataset)
220 |     print(dataset_size)
221 |     for i, data in enumerate(dataset):
222 |         print(data)
223 | 


--------------------------------------------------------------------------------
/landmark2face_wy/data/__init__.py:
--------------------------------------------------------------------------------
  1 | """This package includes all the modules related to data loading and preprocessing
  2 | 
  3 |  To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
  4 |  You need to implement four functions:
  5 |     -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
  6 |     -- <__len__>:                       return the size of dataset.
  7 |     -- <__getitem__>:                   get a data point from data loader.
  8 |     -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
  9 | 
 10 | Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
 11 | See our template dataset class 'template_dataset.py' for more details.
 12 | """
 13 | import importlib
 14 | import torch.utils.data
 15 | from landmark2face_wy.data.base_dataset import BaseDataset
 16 | 
 17 | 
 18 | def find_dataset_using_name(dataset_name):
 19 |     """Import the module "data/[dataset_name]_dataset.py".
 20 | 
 21 |     In the file, the class called DatasetNameDataset() will
 22 |     be instantiated. It has to be a subclass of BaseDataset,
 23 |     and it is case-insensitive.
 24 |     """
 25 |     dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset"
 26 |     datasetlib = importlib.import_module(dataset_filename)
 27 | 
 28 |     dataset = None
 29 |     target_dataset_name = dataset_name.replace('_', '') + 'dataset'
 30 |     for name, cls in datasetlib.__dict__.items():
 31 |         if name.lower() == target_dataset_name.lower() \
 32 |            and issubclass(cls, BaseDataset):
 33 |             dataset = cls
 34 | 
 35 |     if dataset is None:
 36 |         raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
 37 | 
 38 |     return dataset
 39 | 
 40 | 
 41 | def get_option_setter(dataset_name):
 42 |     """Return the static method <modify_commandline_options> of the dataset class."""
 43 |     dataset_class = find_dataset_using_name(dataset_name)
 44 |     return dataset_class.modify_commandline_options
 45 | 
 46 | 
 47 | def create_dataset(opt, mode='train'):
 48 |     """Create a dataset given the option.
 49 | 
 50 |     This function wraps the class CustomDatasetDataLoader.
 51 |         This is the main interface between this package and 'train.py'/'test.py'
 52 | 
 53 |     Example:
 54 |         >>> from data import create_dataset
 55 |         >>> dataset = create_dataset(opt)
 56 |     """
 57 |     data_loader = CustomDatasetDataLoader(opt, mode)
 58 |     dataset = data_loader.load_data()
 59 |     return dataset
 60 | 
 61 | 
 62 | class CustomDatasetDataLoader():
 63 |     """Wrapper class of Dataset class that performs multi-threaded data loading"""
 64 | 
 65 |     def __init__(self, opt, mode):
 66 |         """Initialize this class
 67 | 
 68 |         Step 1: create a dataset instance given the name [dataset_mode]
 69 |         Step 2: create a multi-threaded data loader.
 70 |         """
 71 |         self.opt = opt
 72 |         dataset_class = find_dataset_using_name(opt.dataset_mode)
 73 |         self.dataset = dataset_class(opt, mode)
 74 |         print("dataset [%s] was created" % type(self.dataset).__name__)
 75 |         if mode == 'test':
 76 |             batchsize = opt.batch_size // 2
 77 |         else:
 78 |             batchsize = opt.batch_size
 79 |         print(opt.batch_size)
 80 |         if not opt.distributed:
 81 |             self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize,
 82 |                     shuffle=not opt.serial_batches,num_workers=int(opt.num_threads))
 83 |         else:
 84 |             self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分
 85 |             self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True)
 86 | 
 87 |     def load_data(self):
 88 |         return self
 89 | 
 90 |     def __len__(self):
 91 |         """Return the number of data in the dataset"""
 92 |         return min(len(self.dataset), self.opt.max_dataset_size)
 93 | 
 94 |     def __iter__(self):
 95 |         """Return a batch of data"""
 96 |         for i, data in enumerate(self.dataloader):
 97 |             if i * self.opt.batch_size >= self.opt.max_dataset_size:
 98 |                 break
 99 |             yield data
100 | 


--------------------------------------------------------------------------------
/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/data/l2faceaudio512_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import random
  3 | from data.base_dataset import BaseDataset, get_params, get_transform
  4 | import torchvision.transforms as transforms
  5 | from data.image_folder import make_dataset
  6 | from PIL import Image, ImageEnhance
  7 | import numpy as np
  8 | import cv2
  9 | import torch
 10 | 
 11 | 
 12 | def get_idts(config_name):
 13 |     idts = list()
 14 |     with open(os.path.join('../config', config_name + '.txt')) as f:
 15 |         for line in f:
 16 |             line = line.strip()
 17 |             idts.append(line)
 18 |     return idts
 19 | 
 20 | 
 21 | class L2FaceAudio512Dataset(BaseDataset):
 22 |     def __init__(self, opt, mode=None):
 23 |         BaseDataset.__init__(self, opt)
 24 |         img_size = opt.img_size
 25 |         idts = get_idts(opt.name.split('_')[0])
 26 |         print("---------load data list--------: ", idts)
 27 |         if mode == 'train':
 28 |             self.labels = []
 29 |             for idt_name in idts:
 30 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 31 |                 root = os.path.join(opt.feature_path, idt_name)
 32 |                 if opt.audio_feature == "mfcc":
 33 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 34 |                 else:
 35 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 36 |                 training_data = torch.load(training_data_path)
 37 |                 img_paths = training_data['img_paths']
 38 |                 audio_features = training_data['audio_features']
 39 |                 index = [i[0].split('/')[-1] for i in img_paths]
 40 | 
 41 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
 42 |                 # label_dir = '{}/512_landmark_crop'.format(root)
 43 | 
 44 |                 # if 'man' in opt.name:
 45 |                 #     imgs.sort(key=lambda x:int(x.split('.')[0]))
 46 |                 # else:
 47 |                 #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
 48 |                 for img in range(len(index)):
 49 |                     img_path = os.path.join(image_dir, index[img])
 50 |                     audio_feature = audio_features[img]
 51 |                     self.labels.append([img_path, audio_feature])
 52 |             # transforms.Resize([img_size, img_size], Image.BICUBIC),
 53 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
 54 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 55 |             # transforms.Resize([img_size, img_size], Image.BICUBIC),
 56 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
 57 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 58 |             self.shuffle()
 59 |         elif mode == 'test':
 60 |             self.labels = []
 61 |             for idt_name in idts:
 62 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 63 |                 root = os.path.join(opt.feature_path, idt_name)
 64 |                 if opt.audio_feature == "mfcc":
 65 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 66 |                 else:
 67 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 68 |                 training_data = torch.load(training_data_path)
 69 |                 img_paths = training_data['img_paths']
 70 |                 audio_features = training_data['audio_features']
 71 |                 index = [i[0].split('/')[-1] for i in img_paths]
 72 | 
 73 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
 74 |                 # label_dir = '{}/512_landmark_crop'.format(root)
 75 | 
 76 |                 # if 'man' in opt.name:
 77 |                 #     imgs.sort(key=lambda x:int(x.split('.')[0]))
 78 |                 # else:
 79 |                 #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
 80 |                 for img in range(len(index)):
 81 |                     img_path = os.path.join(image_dir, index[img])
 82 |                     audio_feature = audio_features[img]
 83 |                     self.labels.append([img_path, audio_feature])
 84 |                 # transforms.Resize([img_size, img_size], Image.BICUBIC),
 85 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
 86 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 87 |             # transforms.Resize([img_size, img_size], Image.BICUBIC),
 88 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
 89 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 90 |             self.shuffle()
 91 | 
 92 |     def shuffle(self):
 93 |         random.shuffle(self.labels)
 94 | 
 95 |     def add_mouth_mask2(self, img):
 96 |         mask = np.ones_like(img)
 97 |         rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30]
 98 |         mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
 99 |         x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
100 |         x = np.flip(x, 0)
101 |         y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
102 |         zz1 = -y - x + 88 > 0
103 |         zz2 = np.flip(zz1, 1)
104 |         zz = (zz1 + zz2) > 0
105 |         mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
106 |         imgm = img * mask
107 |         return imgm
108 | 
109 |     def __getitem__(self, index):
110 |         cv2.setNumThreads(0)
111 |         img_path, audio_feature = self.labels[index]
112 |         img = np.array(Image.open(img_path).convert('RGB'))
113 |         img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
114 |         cut_pad1 = np.random.randint(0, 20)
115 |         cut_pad2 = np.random.randint(0, 20)
116 |         img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
117 | 
118 |         ####椭圆mask遮住衣领#####
119 |         '''
120 |         mask = np.zeros(img.shape, dtype=np.uint8)
121 |         cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1)
122 |         '''
123 |         ####mask遮眼睛#####
124 |         mask = np.ones(img.shape, dtype=np.uint8) * 255
125 |         mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
126 |         img = cv2.bitwise_and(img, mask)
127 | 
128 |         mask_B = img.copy()
129 |         mask_B = cv2.resize(mask_B, (256, 256))
130 |         ##########脖子分割加mask#############
131 |         # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck"))
132 |         # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
133 |         # mask_B = cv2.bitwise_and(img, 255 - img_edge)
134 |         # img_edge[:128, :, :] = img[:128, :, :]
135 | 
136 |         ##########增加脖子椭圆mask#############
137 |         '''
138 |         maske = np.zeros(img.shape, dtype=np.uint8)
139 |         cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50),
140 |                     (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)),
141 |                     0, 0, 360, (255, 255, 255), -1)
142 |         maske[:img.shape[0] // 2, :, :] = 0
143 |         mask_B = cv2.bitwise_and(mask_B, 255-maske)
144 |         '''
145 |         ##########之前老的矩形mask#############
146 |         mask_end = np.random.randint(236, 256)
147 |         mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
148 |         ##########之前老的矩形mask#############
149 |         ##########蔡星宇三角mask#############
150 |         # mask_B = self.add_mouth_mask2(mask_B)
151 |         ##########蔡星宇三角mask#############
152 |         # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
153 |         img = Image.fromarray(img)
154 |         mask_B = Image.fromarray(mask_B)
155 |         img = self.transforms_image(img)
156 |         mask_B = self.transforms_image(mask_B)
157 |         # lab = Image.open(lab_path).convert('RGB')
158 |         # lab = self.transforms_label(lab)
159 |         audio = np.zeros((256, 256), dtype=np.float32)
160 |         audio_feature = np.array(audio_feature)
161 |         audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
162 |         audio = torch.tensor([audio])
163 | 
164 |         imgA_path, _ = random.sample(self.labels, 1)[0]
165 |         imgA = np.array(Image.open(imgA_path).convert('RGB'))
166 |         cut_pad1 = np.random.randint(0, 20)
167 |         cut_pad2 = np.random.randint(0, 20)
168 |         imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
169 |         # mask = np.ones(imgA.shape, dtype=np.uint8) * 255
170 |         # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
171 |         imgA = cv2.bitwise_and(imgA, mask)
172 |         imgA = Image.fromarray(imgA)
173 |         imgA = self.transforms_image(imgA)
174 |         return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
175 | 
176 |     def __len__(self):
177 |         """Return the total number of images in the dataset."""
178 |         return len(self.labels)
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     from options.train_options import TrainOptions
183 | 
184 |     opt = TrainOptions().parse()
185 |     dataset = L2FaceDataset(opt)
186 |     dataset_size = len(dataset)
187 |     print(dataset_size)
188 |     for i, data in enumerate(dataset):
189 |         print(data)


--------------------------------------------------------------------------------
/landmark2face_wy/data/l2faceaudio_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import random
  3 | from data.base_dataset import BaseDataset, get_params, get_transform
  4 | import torchvision.transforms as transforms
  5 | from data.image_folder import make_dataset
  6 | from PIL import Image, ImageEnhance
  7 | import numpy as np
  8 | import cv2
  9 | import torch
 10 | 
 11 | 
 12 | def get_idts(config_name):
 13 |     idts = list()
 14 |     with open(os.path.join('../config', config_name + '.txt')) as f:
 15 |         for line in f:
 16 |             line = line.strip()
 17 |             idts.append(line)
 18 |     return idts
 19 | 
 20 | 
 21 | class L2FaceAudioDataset(BaseDataset):
 22 |     def __init__(self, opt, mode=None):
 23 |         BaseDataset.__init__(self, opt)
 24 |         img_size = opt.img_size
 25 |         idts = get_idts(opt.name.split('_')[0])
 26 |         print("---------load data list--------: ", idts)
 27 |         if mode == 'train':
 28 |             self.labels = []
 29 |             for idt_name in idts:
 30 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 31 |                 root = os.path.join(opt.feature_path, idt_name)
 32 |                 if opt.audio_feature == "mfcc":
 33 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 34 |                 else:
 35 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 36 |                 training_data = torch.load(training_data_path)
 37 |                 img_paths = training_data['img_paths']
 38 |                 audio_features = training_data['audio_features']
 39 |                 index = [i[0].split('/')[-1] for i in img_paths]
 40 | 
 41 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
 42 |                 # label_dir = '{}/512_landmark_crop'.format(root)
 43 | 
 44 |                 # if 'man' in opt.name:
 45 |                 #     imgs.sort(key=lambda x:int(x.split('.')[0]))
 46 |                 # else:
 47 |                 #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
 48 |                 for img in range(len(index)):
 49 |                     img_path = os.path.join(image_dir, index[img])
 50 |                     audio_feature = audio_features[img]
 51 |                     self.labels.append([img_path, audio_feature])
 52 |             # transforms.Resize([img_size, img_size], Image.BICUBIC),
 53 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
 54 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 55 |             # transforms.Resize([img_size, img_size], Image.BICUBIC),
 56 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
 57 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 58 |             self.shuffle()
 59 |         elif mode == 'test':
 60 |             self.labels = []
 61 |             for idt_name in idts:
 62 |                 # root = '../AnnVI/feature/{}'.format(idt_name)
 63 |                 root = os.path.join(opt.feature_path, idt_name)
 64 |                 if opt.audio_feature == "mfcc":
 65 |                     training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
 66 |                 else:
 67 |                     training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
 68 |                 training_data = torch.load(training_data_path)
 69 |                 img_paths = training_data['img_paths']
 70 |                 audio_features = training_data['audio_features']
 71 |                 index = [i[0].split('/')[-1] for i in img_paths]
 72 | 
 73 |                 image_dir = '{}/{}_dlib_crop'.format(root, img_size)
 74 |                 # label_dir = '{}/512_landmark_crop'.format(root)
 75 | 
 76 |                 # if 'man' in opt.name:
 77 |                 #     imgs.sort(key=lambda x:int(x.split('.')[0]))
 78 |                 # else:
 79 |                 #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
 80 |                 for img in range(len(index)):
 81 |                     img_path = os.path.join(image_dir, index[img])
 82 |                     audio_feature = audio_features[img]
 83 |                     self.labels.append([img_path, audio_feature])
 84 |                 # transforms.Resize([img_size, img_size], Image.BICUBIC),
 85 |             self.transforms_image = transforms.Compose([transforms.ToTensor(),
 86 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 87 |             # transforms.Resize([img_size, img_size], Image.BICUBIC),
 88 |             self.transforms_label = transforms.Compose([transforms.ToTensor(),
 89 |                                                         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 90 |             self.shuffle()
 91 | 
 92 |     def shuffle(self):
 93 |         random.shuffle(self.labels)
 94 | 
 95 |     def add_mouth_mask2(self, img):
 96 |         mask = np.ones_like(img)
 97 |         rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
 98 |         mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
 99 |         x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
100 |         x = np.flip(x, 0)
101 |         y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
102 |         zz1 = -y - x + 88 > 0
103 |         zz2 = np.flip(zz1, 1)
104 |         zz = (zz1 + zz2) > 0
105 |         mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
106 |         imgm = img * mask
107 |         return imgm
108 | 
109 |     def __getitem__(self, index):
110 |         cv2.setNumThreads(0)
111 |         img_path, audio_feature = self.labels[index]
112 |         img = np.array(Image.open(img_path).convert('RGB'))
113 |         img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
114 |         cut_pad1 = np.random.randint(0, 10)
115 |         cut_pad2 = np.random.randint(0, 10)
116 |         img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
117 | 
118 |         ####mask遮眼睛#####
119 |         mask = np.ones(img.shape, dtype=np.uint8) * 255
120 |         mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0
121 |         img = cv2.bitwise_and(img, mask)
122 | 
123 |         mask_B = img.copy()
124 |         mask_end = np.random.randint(236, 256)
125 |         ##########之前老的矩形mask#############
126 |         mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
127 |         ##########之前老的矩形mask#############
128 |         ##########蔡星宇三角mask#############
129 |         # mask_B = self.add_mouth_mask2(mask_B)
130 |         ##########蔡星宇三角mask#############
131 |         # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
132 |         img = Image.fromarray(img)
133 |         mask_B = Image.fromarray(mask_B)
134 |         img = self.transforms_image(img)
135 |         mask_B = self.transforms_image(mask_B)
136 |         # lab = Image.open(lab_path).convert('RGB')
137 |         # lab = self.transforms_label(lab)
138 |         audio = np.zeros((256, 256), dtype=np.float32)
139 |         audio_feature = np.array(audio_feature)
140 |         audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
141 |         audio = torch.tensor([audio])
142 | 
143 |         imgA_path, _ = random.sample(self.labels, 1)[0]
144 |         imgA = np.array(Image.open(imgA_path).convert('RGB'))
145 |         cut_pad1 = np.random.randint(0, 10)
146 |         cut_pad2 = np.random.randint(0, 10)
147 |         imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
148 |         imgA = cv2.bitwise_and(imgA, mask)
149 |         imgA = Image.fromarray(imgA)
150 |         imgA = self.transforms_image(imgA)
151 |         return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
152 | 
153 |     def __len__(self):
154 |         """Return the total number of images in the dataset."""
155 |         return len(self.labels)
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     from options.train_options import TrainOptions
160 | 
161 |     opt = TrainOptions().parse()
162 |     dataset = L2FaceDataset(opt)
163 |     dataset_size = len(dataset)
164 |     print(dataset_size)
165 |     for i, data in enumerate(dataset):
166 |         print(data)


--------------------------------------------------------------------------------
/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/loss/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/loss/__init__.py


--------------------------------------------------------------------------------
/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """This package contains modules related to objective functions, optimizations, and network architectures.
 2 | 
 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
 4 | You need to implement the following five functions:
 5 |     -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
 6 |     -- <set_input>:                     unpack data from dataset and apply preprocessing.
 7 |     -- <forward>:                       produce intermediate results.
 8 |     -- <optimize_parameters>:           calculate loss, gradients, and update network weights.
 9 |     -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
10 | 
11 | In the function <__init__>, you need to define four lists:
12 |     -- self.loss_names (str list):          specify the training losses that you want to plot and save.
13 |     -- self.model_names (str list):         define networks used in our training.
14 |     -- self.visual_names (str list):        specify the images that you want to display and save.
15 |     -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
16 | 
17 | Now you can use the model class by specifying flag '--model dummy'.
18 | See our template model class 'template_model.py' for more details.
19 | """
20 | 
21 | import importlib
22 | from landmark2face_wy.models.base_model import BaseModel
23 | 
24 | 
25 | def find_model_using_name(model_name):
26 |     """Import the module "models/[model_name]_model.py".
27 | 
28 |     In the file, the class called DatasetNameModel() will
29 |     be instantiated. It has to be a subclass of BaseModel,
30 |     and it is case-insensitive.
31 |     """
32 |     model_filename = "landmark2face_wy.models." + model_name + "_model"
33 |     modellib = importlib.import_module(model_filename)
34 |     model = None
35 |     target_model_name = model_name.replace('_', '') + 'model'
36 |     for name, cls in modellib.__dict__.items():
37 |         if name.lower() == target_model_name.lower() \
38 |            and issubclass(cls, BaseModel):
39 |             model = cls
40 | 
41 |     if model is None:
42 |         print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
43 |         exit(0)
44 | 
45 |     return model
46 | 
47 | 
48 | def get_option_setter(model_name):
49 |     """Return the static method <modify_commandline_options> of the model class."""
50 |     model_class = find_model_using_name(model_name)
51 |     return model_class.modify_commandline_options
52 | 
53 | 
54 | def create_model(opt):
55 |     """Create a model given the option.
56 | 
57 |     This function warps the class CustomDatasetDataLoader.
58 |     This is the main interface between this package and 'train.py'/'test.py'
59 | 
60 |     Example:
61 |         >>> from landmark2face_wy.models import create_model
62 |         >>> model = create_model(opt)
63 |     """
64 |     model = find_model_using_name(opt.model)
65 |     instance = model(opt)
66 |     print("model [%s] was created" % type(instance).__name__)
67 |     return instance
68 | 


--------------------------------------------------------------------------------
/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/options/__init__.py:
--------------------------------------------------------------------------------
1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
2 | 


--------------------------------------------------------------------------------
/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # File   : __init__.py
 3 | # Author : Jiayuan Mao
 4 | # Email  : maojiayuan@gmail.com
 5 | # Date   : 27/01/2018
 6 | #
 7 | # This file is part of Synchronized-BatchNorm-PyTorch.
 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
 9 | # Distributed under MIT License.
10 | 
11 | from .batchnorm import set_sbn_eps_mode
12 | from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
13 | from .batchnorm import patch_sync_batchnorm, convert_model
14 | from .replicate import DataParallelWithCallback, patch_replication_callback
15 | 


--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/util/__init__.py:
--------------------------------------------------------------------------------
1 | """This package includes a miscellaneous collection of useful helper functions."""
2 | 


--------------------------------------------------------------------------------
/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/license.txt


--------------------------------------------------------------------------------
/log/dh.log:
--------------------------------------------------------------------------------
  1 | [2025-03-18 12:50:40,644] [run.py[line:153]] [INFO] [TransDhTask init]
  2 | [2025-03-18 12:50:41,729] [run.py[line:158]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav  video_url:./temp/example/video.mp4]
  3 | [2025-03-18 12:50:41,732] [run.py[line:158]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4]
  4 | [2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]预处理失败，异常信息:[format video error]]
  5 | [2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]任务执行失败，异常信息:[[1002]预处理失败，异常信息:[format video error]]]
  6 | [2025-03-18 12:50:41,791] [run.py[line:158]] [INFO] [>>> 任务:1002 耗时:0.06167912483215332 ]
  7 | [2025-03-18 12:50:57,817] [run.py[line:143]] [INFO] [TransDhTask init]
  8 | [2025-03-18 12:50:58,906] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav  video_url:./temp/example/video.mp4]
  9 | [2025-03-18 12:50:58,908] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4]
 10 | [2025-03-18 12:50:58,964] [run.py[line:147]] [ERROR] [[1002]预处理失败，异常信息:[format video error]]
 11 | [2025-03-18 12:50:58,965] [run.py[line:147]] [ERROR] [[1002]任务执行失败，异常信息:[[1002]预处理失败，异常信息:[format video error]]]
 12 | [2025-03-18 12:50:58,966] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:0.059505462646484375 ]
 13 | [2025-03-18 12:52:06,385] [run.py[line:143]] [INFO] [TransDhTask init]
 14 | [2025-03-18 12:52:07,560] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./example/audio.wav  video_url:./example/video.mp4]
 15 | [2025-03-18 12:52:07,646] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./example/video.mp4 -crf 15 -vcodec copy -an -y ./1002_format.mp4]
 16 | [2025-03-18 12:52:07,801] [run.py[line:147]] [INFO] [[1002] -> ffmpeg audio: ffmpeg -loglevel warning -i ./example/audio.wav -ac 1 -ar 16000 -acodec pcm_s16le -y  ./1002_format.wav]
 17 | [2025-03-18 12:52:07,922] [run.py[line:147]] [INFO] [[1002] -> 预处理耗时:0.35927414894104004s]
 18 | [2025-03-18 12:52:10,169] [run.py[line:147]] [INFO] [[1002] -> get_aud_feat1 cost:2.245649576187134s]
 19 | [2025-03-18 12:52:11,702] [process.py[line:108]] [INFO] [>>> init_wh_process进程启动]
 20 | [2025-03-18 12:52:20,087] [process.py[line:108]] [INFO] [[1002]init_wh result :[0.8809176216714891]， cost: 8.382684469223022 s]
 21 | [2025-03-18 12:52:20,090] [run.py[line:147]] [INFO] [[1002] -> wh: [0.8809176216714891]]
 22 | [2025-03-18 12:52:21,453] [process.py[line:108]] [INFO] [>>> 数字人图片处理进程启动]
 23 | [2025-03-18 12:52:24,015] [process.py[line:108]] [INFO] [[1002]任务视频驱动队列启动 batch_size:4, len:150]
 24 | [2025-03-18 12:52:24,050] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 开始循环]
 25 | [2025-03-18 12:52:24,085] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:4]
 26 | [2025-03-18 12:52:24,112] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:8]
 27 | [2025-03-18 12:52:24,122] [process.py[line:108]] [INFO] [>>> audio_transfer get message:4]
 28 | [2025-03-18 12:52:24,139] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:12]
 29 | [2025-03-18 12:52:24,148] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:16]
 30 | [2025-03-18 12:52:24,161] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:20]
 31 | [2025-03-18 12:52:24,173] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:24]
 32 | [2025-03-18 12:52:24,185] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:28]
 33 | [2025-03-18 12:52:24,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:32]
 34 | [2025-03-18 12:52:24,208] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:36]
 35 | [2025-03-18 12:52:24,222] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:40]
 36 | [2025-03-18 12:52:24,232] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:44]
 37 | [2025-03-18 12:52:25,722] [process.py[line:108]] [INFO] [[1002] -> frame_id:[4] 模糊置信度:[0.969]]
 38 | [2025-03-18 12:52:25,723] [process.py[line:108]] [INFO] [[1002] -> need chaofen .]
 39 | [2025-03-18 12:52:25,905] [utils.py[line:145]] [INFO] [Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.]
 40 | [2025-03-18 12:52:25,906] [utils.py[line:148]] [INFO] [Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.]
 41 | [2025-03-18 12:52:25,907] [utils.py[line:160]] [INFO] [NumExpr defaulting to 8 threads.]
 42 | [2025-03-18 12:52:26,083] [process.py[line:108]] [INFO] [[4] -> chaofen  cost:1.9595112800598145s]
 43 | [2025-03-18 12:52:31,071] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:4, cost:6.948575258255005s]
 44 | [2025-03-18 12:52:31,116] [process.py[line:108]] [INFO] [>>> audio_transfer get message:8]
 45 | [2025-03-18 12:52:31,126] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:48]
 46 | [2025-03-18 12:52:31,347] [process.py[line:108]] [INFO] [[8] -> chaofen  cost:0.2294461727142334s]
 47 | [2025-03-18 12:52:31,576] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:8, cost:0.45979762077331543s]
 48 | [2025-03-18 12:52:31,605] [process.py[line:108]] [INFO] [>>> audio_transfer get message:12]
 49 | [2025-03-18 12:52:31,615] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:52]
 50 | [2025-03-18 12:52:31,818] [process.py[line:108]] [INFO] [[12] -> chaofen  cost:0.21271824836730957s]
 51 | [2025-03-18 12:52:32,036] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:12, cost:0.43187427520751953s]
 52 | [2025-03-18 12:52:32,060] [process.py[line:108]] [INFO] [>>> audio_transfer get message:16]
 53 | [2025-03-18 12:52:32,072] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:56]
 54 | [2025-03-18 12:52:32,279] [process.py[line:108]] [INFO] [[16] -> chaofen  cost:0.21899199485778809s]
 55 | [2025-03-18 12:52:32,530] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:16, cost:0.47049522399902344s]
 56 | [2025-03-18 12:52:32,552] [process.py[line:108]] [INFO] [>>> audio_transfer get message:20]
 57 | [2025-03-18 12:52:32,567] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:60]
 58 | [2025-03-18 12:52:32,766] [process.py[line:108]] [INFO] [[20] -> chaofen  cost:0.21334147453308105s]
 59 | [2025-03-18 12:52:32,993] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:20, cost:0.4411466121673584s]
 60 | [2025-03-18 12:52:33,015] [process.py[line:108]] [INFO] [>>> audio_transfer get message:24]
 61 | [2025-03-18 12:52:33,028] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:64]
 62 | [2025-03-18 12:52:33,229] [process.py[line:108]] [INFO] [[24] -> chaofen  cost:0.21344351768493652s]
 63 | [2025-03-18 12:52:33,457] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:24, cost:0.44205546379089355s]
 64 | [2025-03-18 12:52:33,479] [process.py[line:108]] [INFO] [>>> audio_transfer get message:28]
 65 | [2025-03-18 12:52:33,493] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:68]
 66 | [2025-03-18 12:52:33,697] [process.py[line:108]] [INFO] [[28] -> chaofen  cost:0.21679949760437012s]
 67 | [2025-03-18 12:52:33,924] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:28, cost:0.4448537826538086s]
 68 | [2025-03-18 12:52:33,946] [process.py[line:108]] [INFO] [>>> audio_transfer get message:32]
 69 | [2025-03-18 12:52:33,960] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:72]
 70 | [2025-03-18 12:52:34,159] [process.py[line:108]] [INFO] [[32] -> chaofen  cost:0.21156740188598633s]
 71 | [2025-03-18 12:52:34,381] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:32, cost:0.43474769592285156s]
 72 | [2025-03-18 12:52:34,403] [process.py[line:108]] [INFO] [>>> audio_transfer get message:36]
 73 | [2025-03-18 12:52:34,417] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:76]
 74 | [2025-03-18 12:52:34,618] [process.py[line:108]] [INFO] [[36] -> chaofen  cost:0.21408891677856445s]
 75 | [2025-03-18 12:52:34,844] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:36, cost:0.4406392574310303s]
 76 | [2025-03-18 12:52:34,867] [process.py[line:108]] [INFO] [>>> audio_transfer get message:40]
 77 | [2025-03-18 12:52:34,881] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:80]
 78 | [2025-03-18 12:52:35,099] [process.py[line:108]] [INFO] [[40] -> chaofen  cost:0.23105645179748535s]
 79 | [2025-03-18 12:52:35,328] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:40, cost:0.46161866188049316s]
 80 | [2025-03-18 12:52:35,350] [process.py[line:108]] [INFO] [>>> audio_transfer get message:44]
 81 | [2025-03-18 12:52:35,363] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:84]
 82 | [2025-03-18 12:52:35,577] [process.py[line:108]] [INFO] [[44] -> chaofen  cost:0.22576594352722168s]
 83 | [2025-03-18 12:52:35,808] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:44, cost:0.4577639102935791s]
 84 | [2025-03-18 12:52:35,832] [process.py[line:108]] [INFO] [>>> audio_transfer get message:48]
 85 | [2025-03-18 12:52:35,846] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:88]
 86 | [2025-03-18 12:52:36,047] [process.py[line:108]] [INFO] [[48] -> chaofen  cost:0.21441864967346191s]
 87 | [2025-03-18 12:52:36,278] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:48, cost:0.4459846019744873s]
 88 | [2025-03-18 12:52:36,301] [process.py[line:108]] [INFO] [>>> audio_transfer get message:52]
 89 | [2025-03-18 12:52:36,315] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:92]
 90 | [2025-03-18 12:52:36,521] [process.py[line:108]] [INFO] [[52] -> chaofen  cost:0.2181704044342041s]
 91 | [2025-03-18 12:52:36,777] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:52, cost:0.47586750984191895s]
 92 | [2025-03-18 12:52:36,798] [process.py[line:108]] [INFO] [>>> audio_transfer get message:56]
 93 | [2025-03-18 12:52:36,817] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:96]
 94 | [2025-03-18 12:52:37,014] [process.py[line:108]] [INFO] [[56] -> chaofen  cost:0.2147221565246582s]
 95 | [2025-03-18 12:52:37,247] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:56, cost:0.4486660957336426s]
 96 | [2025-03-18 12:52:37,266] [process.py[line:108]] [INFO] [>>> audio_transfer get message:60]
 97 | [2025-03-18 12:52:37,281] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:100]
 98 | [2025-03-18 12:52:37,483] [process.py[line:108]] [INFO] [[60] -> chaofen  cost:0.21598410606384277s]
 99 | [2025-03-18 12:52:37,703] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:60, cost:0.43683695793151855s]
100 | [2025-03-18 12:52:37,722] [process.py[line:108]] [INFO] [>>> audio_transfer get message:64]
101 | [2025-03-18 12:52:37,736] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:104]
102 | [2025-03-18 12:52:37,941] [process.py[line:108]] [INFO] [[64] -> chaofen  cost:0.2180624008178711s]
103 | [2025-03-18 12:52:38,163] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:64, cost:0.4412345886230469s]
104 | [2025-03-18 12:52:38,183] [process.py[line:108]] [INFO] [>>> audio_transfer get message:68]
105 | [2025-03-18 12:52:38,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:108]
106 | [2025-03-18 12:52:38,397] [process.py[line:108]] [INFO] [[68] -> chaofen  cost:0.21321654319763184s]
107 | [2025-03-18 12:52:38,637] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:68, cost:0.45404863357543945s]
108 | [2025-03-18 12:52:38,656] [process.py[line:108]] [INFO] [>>> audio_transfer get message:72]
109 | [2025-03-18 12:52:38,670] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:112]
110 | [2025-03-18 12:52:38,877] [process.py[line:108]] [INFO] [[72] -> chaofen  cost:0.21999263763427734s]
111 | [2025-03-18 12:52:39,100] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:72, cost:0.4440436363220215s]
112 | [2025-03-18 12:52:39,119] [process.py[line:108]] [INFO] [>>> audio_transfer get message:76]
113 | [2025-03-18 12:52:39,133] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:116]
114 | [2025-03-18 12:52:39,347] [process.py[line:108]] [INFO] [[76] -> chaofen  cost:0.22693967819213867s]
115 | [2025-03-18 12:52:39,568] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:76, cost:0.4492220878601074s]
116 | [2025-03-18 12:52:39,586] [process.py[line:108]] [INFO] [>>> audio_transfer get message:80]
117 | [2025-03-18 12:52:39,601] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:120]
118 | [2025-03-18 12:52:39,801] [process.py[line:108]] [INFO] [[80] -> chaofen  cost:0.21407222747802734s]
119 | [2025-03-18 12:52:40,024] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:80, cost:0.4377562999725342s]
120 | [2025-03-18 12:52:40,052] [process.py[line:108]] [INFO] [>>> audio_transfer get message:84]
121 | [2025-03-18 12:52:40,068] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:124]
122 | [2025-03-18 12:52:40,270] [process.py[line:108]] [INFO] [[84] -> chaofen  cost:0.21637320518493652s]
123 | [2025-03-18 12:52:40,494] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:84, cost:0.44118523597717285s]
124 | [2025-03-18 12:52:40,513] [process.py[line:108]] [INFO] [>>> audio_transfer get message:88]
125 | [2025-03-18 12:52:40,527] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:128]
126 | [2025-03-18 12:52:40,731] [process.py[line:108]] [INFO] [[88] -> chaofen  cost:0.2170412540435791s]
127 | [2025-03-18 12:52:40,951] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:88, cost:0.4383111000061035s]
128 | [2025-03-18 12:52:40,971] [process.py[line:108]] [INFO] [>>> audio_transfer get message:92]
129 | [2025-03-18 12:52:40,984] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:132]
130 | [2025-03-18 12:52:41,187] [process.py[line:108]] [INFO] [[92] -> chaofen  cost:0.2148122787475586s]
131 | [2025-03-18 12:52:41,416] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:92, cost:0.4454326629638672s]
132 | [2025-03-18 12:52:41,439] [process.py[line:108]] [INFO] [>>> audio_transfer get message:96]
133 | [2025-03-18 12:52:41,451] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:136]
134 | [2025-03-18 12:52:41,663] [process.py[line:108]] [INFO] [[96] -> chaofen  cost:0.222761869430542s]
135 | [2025-03-18 12:52:41,887] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:96, cost:0.4477369785308838s]
136 | [2025-03-18 12:52:41,906] [process.py[line:108]] [INFO] [>>> audio_transfer get message:100]
137 | [2025-03-18 12:52:41,920] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:140]
138 | [2025-03-18 12:52:42,123] [process.py[line:108]] [INFO] [[100] -> chaofen  cost:0.21576929092407227s]
139 | [2025-03-18 12:52:42,359] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:100, cost:0.4525878429412842s]
140 | [2025-03-18 12:52:42,379] [process.py[line:108]] [INFO] [>>> audio_transfer get message:104]
141 | [2025-03-18 12:52:42,394] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:144]
142 | [2025-03-18 12:52:42,596] [process.py[line:108]] [INFO] [[104] -> chaofen  cost:0.21553897857666016s]
143 | [2025-03-18 12:52:42,836] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:104, cost:0.45633435249328613s]
144 | [2025-03-18 12:52:42,855] [process.py[line:108]] [INFO] [>>> audio_transfer get message:108]
145 | [2025-03-18 12:52:42,870] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:148]
146 | [2025-03-18 12:52:42,873] [process.py[line:108]] [INFO] [append imgs over]
147 | [2025-03-18 12:52:42,879] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据结束]
148 | [2025-03-18 12:52:43,073] [process.py[line:108]] [INFO] [[108] -> chaofen  cost:0.21662592887878418s]
149 | [2025-03-18 12:52:43,297] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:108, cost:0.4421381950378418s]
150 | [2025-03-18 12:52:43,318] [process.py[line:108]] [INFO] [>>> audio_transfer get message:112]
151 | [2025-03-18 12:52:43,332] [process.py[line:108]] [INFO] [[1002]任务预处理进程结束]
152 | [2025-03-18 12:52:43,531] [process.py[line:108]] [INFO] [[112] -> chaofen  cost:0.21228814125061035s]
153 | [2025-03-18 12:52:43,791] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:112, cost:0.47336626052856445s]
154 | [2025-03-18 12:52:43,811] [process.py[line:108]] [INFO] [>>> audio_transfer get message:116]
155 | [2025-03-18 12:52:44,034] [process.py[line:108]] [INFO] [[116] -> chaofen  cost:0.2223985195159912s]
156 | [2025-03-18 12:52:44,262] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:116, cost:0.4509873390197754s]
157 | [2025-03-18 12:52:44,281] [process.py[line:108]] [INFO] [>>> audio_transfer get message:120]
158 | [2025-03-18 12:52:44,499] [process.py[line:108]] [INFO] [[120] -> chaofen  cost:0.21637916564941406s]
159 | [2025-03-18 12:52:44,742] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:120, cost:0.46120476722717285s]
160 | [2025-03-18 12:52:44,762] [process.py[line:108]] [INFO] [>>> audio_transfer get message:124]
161 | [2025-03-18 12:52:44,981] [process.py[line:108]] [INFO] [[124] -> chaofen  cost:0.21886157989501953s]
162 | [2025-03-18 12:52:45,240] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:124, cost:0.4781684875488281s]
163 | [2025-03-18 12:52:45,258] [process.py[line:108]] [INFO] [>>> audio_transfer get message:128]
164 | [2025-03-18 12:52:45,474] [process.py[line:108]] [INFO] [[128] -> chaofen  cost:0.21480226516723633s]
165 | [2025-03-18 12:52:45,708] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:128, cost:0.44920992851257324s]
166 | [2025-03-18 12:52:45,726] [process.py[line:108]] [INFO] [>>> audio_transfer get message:132]
167 | [2025-03-18 12:52:45,943] [process.py[line:108]] [INFO] [[132] -> chaofen  cost:0.21567535400390625s]
168 | [2025-03-18 12:52:46,181] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:132, cost:0.45519399642944336s]
169 | [2025-03-18 12:52:46,200] [process.py[line:108]] [INFO] [>>> audio_transfer get message:136]
170 | [2025-03-18 12:52:46,418] [process.py[line:108]] [INFO] [[136] -> chaofen  cost:0.21763992309570312s]
171 | [2025-03-18 12:52:46,662] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:136, cost:0.4619452953338623s]
172 | [2025-03-18 12:52:46,681] [process.py[line:108]] [INFO] [>>> audio_transfer get message:140]
173 | [2025-03-18 12:52:46,900] [process.py[line:108]] [INFO] [[140] -> chaofen  cost:0.21794748306274414s]
174 | [2025-03-18 12:52:47,146] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:140, cost:0.4646177291870117s]
175 | [2025-03-18 12:52:47,166] [process.py[line:108]] [INFO] [>>> audio_transfer get message:144]
176 | [2025-03-18 12:52:47,382] [process.py[line:108]] [INFO] [[144] -> chaofen  cost:0.21491503715515137s]
177 | [2025-03-18 12:52:47,619] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:144, cost:0.4536001682281494s]
178 | [2025-03-18 12:52:47,639] [process.py[line:108]] [INFO] [>>> audio_transfer get message:148]
179 | [2025-03-18 12:52:47,857] [process.py[line:108]] [INFO] [[148] -> chaofen  cost:0.21780657768249512s]
180 | [2025-03-18 12:52:48,098] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:148, cost:0.459348201751709s]
181 | [2025-03-18 12:52:48,104] [process.py[line:108]] [INFO] [>>> audio_transfer get exception msg:-1]
182 | [2025-03-18 12:52:48,105] [process.py[line:108]] [INFO] [[1002]任务数字人图片处理已完成]
183 | [2025-03-18 12:52:48,146] [run.py[line:43]] [INFO] [Custom VideoWriter [1002]视频帧队列处理已结束]
184 | [2025-03-18 12:52:48,151] [run.py[line:46]] [INFO] [Custom VideoWriter Silence Video saved in /mnt/nfs/bj4-v100-23/data1/yubosun/git_proj/heygem/heygem_ori_so/1002-t.mp4]
185 | [2025-03-18 12:52:48,155] [run.py[line:118]] [INFO] [Custom command:ffmpeg -loglevel warning -y -i ./example/audio.wav -i ./1002-t.mp4 -c:a aac -c:v libx264 -crf 15 -strict -2 ./1002-r.mp4]
186 | [2025-03-18 12:53:06,908] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:59.3451771736145 ]
187 | 


--------------------------------------------------------------------------------
/model_lib/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_wrapper import ONNXModel
2 | from .model_base import ModelBase
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/model_lib/base_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/26
3 | 
4 | 
5 | from .onnx_model import ONNXModel
6 | 
7 | 


--------------------------------------------------------------------------------
/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/model_lib/model_base.py:
--------------------------------------------------------------------------------
 1 | # -- coding: utf-8 --
 2 | # @Time : 2022/7/29
 3 | 
 4 | 
 5 | 
 6 | from .base_wrapper import ONNXModel
 7 | from pathlib import Path
 8 | 
 9 | 
10 | try:
11 |     from .base_wrapper import TRTWrapper, TRTWrapperSelf
12 | except:
13 |     pass
14 | 
15 | 
16 | # from cv2box.utils import try_import
17 | 
18 | class ModelBase:
19 |     def __init__(self, model_info, provider):
20 |         self.model_path = model_info['model_path']
21 | 
22 |         if 'input_dynamic_shape' in model_info.keys():
23 |             self.input_dynamic_shape = model_info['input_dynamic_shape']
24 |         else:
25 |             self.input_dynamic_shape = None
26 | 
27 |         if 'picklable' in model_info.keys():
28 |             picklable = model_info['picklable']
29 |         else:
30 |             picklable = False
31 | 
32 |         if 'trt_wrapper_self' in model_info.keys():
33 |             TRTWrapper = TRTWrapperSelf
34 | 
35 |         # init model
36 |         if Path(self.model_path).suffix == '.engine':
37 |             self.model_type = 'trt'
38 |             self.model = TRTWrapper(self.model_path)
39 |         elif Path(self.model_path).suffix == '.tjm':
40 |             self.model_type = 'tjm'
41 |             self.model = TJMWrapper(self.model_path, provider=provider)
42 |         elif Path(self.model_path).suffix in ['.onnx', '.bin']:
43 |             self.model_type = 'onnx'
44 |             if not picklable:
45 |                 if 'encrypt' in model_info.keys():
46 |                     self.model_path = load_encrypt_model(self.model_path, key=model_info['encrypt'])
47 |                 self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape)
48 |             else:
49 |                 self.model = OnnxModelPickable(self.model_path, provider=provider, )
50 |         else:
51 |             raise 'check model suffix , support engine/tjm/onnx now.'
52 | 


--------------------------------------------------------------------------------
/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cv2box==0.5.9
 2 | apstone==0.0.8
 3 | appdirs==1.4.4
 4 | audioread==2.1.9
 5 | typeguard==2.13.3
 6 | cffi==1.15.0
 7 | charset-normalizer==2.0.12
 8 | click==8.1.3
 9 | colorama==0.4.4
10 | cycler==0.11.0
11 | decorator==5.1.1
12 | filelock==3.7.1
13 | flatbuffers==2.0
14 | fonttools==4.36.0
15 | freetype-py==2.3.0
16 | huggingface-hub==0.0.8
17 | idna==3.3
18 | imageio==2.19.3
19 | importlib-metadata==4.11.4
20 | joblib==1.1.0
21 | kiwisolver==1.4.4
22 | kornia==0.6.6
23 | librosa==0.8.1
24 | matplotlib==3.5.3
25 | networkx==2.6.3
26 | numba==0.55.2
27 | numexpr==2.8.6
28 | numpy==1.21.6
29 | onnxruntime-gpu==1.9.0
30 | opencv-python==4.7.0.72
31 | packaging==21.3
32 | pillow==9.1.1
33 | pooch==1.6.0
34 | protobuf==4.21.5
35 | psutil==5.9.1
36 | pycparser==2.21
37 | pyglet==1.5.26
38 | pyopengl==3.1.0
39 | pyparsing==3.0.9
40 | pyrender==0.1.45
41 | python-dateutil==2.8.2
42 | pywavelets==1.3.0
43 | pyyaml==6.0
44 | regex==2022.6.2
45 | requests==2.27.1
46 | resampy==0.2.2
47 | sacremoses==0.0.53
48 | scikit-image==0.19.3
49 | scikit-learn==1.0.2
50 | scipy==1.7.1
51 | six==1.16.0
52 | soundfile==0.10.3.post1
53 | threadpoolctl==3.1.0
54 | tifffile==2021.11.2
55 | tokenizers==0.10.3
56 | torch==1.11.0+cu113
57 | torchaudio==0.11.0+cu113
58 | torchvision==0.12.0+cu113
59 | tqdm==4.64.0
60 | transformers==4.6.1
61 | trimesh==3.12.7
62 | typeguard==2.13.3
63 | typing-extensions==4.2.0
64 | urllib3==1.26.9
65 | zipp==3.8.0
66 | 


--------------------------------------------------------------------------------
/requirements_0.txt:
--------------------------------------------------------------------------------
  1 | aiofiles==23.2.1
  2 | annotated-types==0.7.0
  3 | anyio==4.5.2
  4 | apstone==0.0.8
  5 | audioread==3.0.1
  6 | blinker==1.8.2
  7 | certifi==2025.1.31
  8 | cffi==1.17.1
  9 | charset-normalizer==3.4.1
 10 | click==8.1.8
 11 | coloredlogs==15.0.1
 12 | contourpy==1.1.1
 13 | cv2box==0.5.9
 14 | cycler==0.12.1
 15 | decorator==5.2.1
 16 | einops==0.8.1
 17 | exceptiongroup==1.2.2
 18 | fastapi==0.115.11
 19 | ffmpy==0.5.0
 20 | filelock==3.16.1
 21 | Flask==3.0.3
 22 | flatbuffers==25.2.10
 23 | fonttools==4.56.0
 24 | fsspec==2025.3.0
 25 | gradio==4.44.1
 26 | gradio_client==1.3.0
 27 | h11==0.14.0
 28 | httpcore==1.0.7
 29 | httpx==0.28.1
 30 | huggingface-hub==0.29.3
 31 | humanfriendly==10.0
 32 | idna==3.10
 33 | imageio==2.35.1
 34 | importlib_metadata==8.5.0
 35 | importlib_resources==6.4.5
 36 | itsdangerous==2.2.0
 37 | Jinja2==3.1.6
 38 | joblib==1.4.2
 39 | kiwisolver==1.4.7
 40 | lazy_loader==0.4
 41 | librosa==0.11.0
 42 | llvmlite==0.41.1
 43 | markdown-it-py==3.0.0
 44 | MarkupSafe==2.1.5
 45 | matplotlib==3.7.5
 46 | mdurl==0.1.2
 47 | mpmath==1.3.0
 48 | msgpack==1.1.0
 49 | networkx==3.1
 50 | numba==0.58.1
 51 | numexpr==2.8.6
 52 | numpy==1.24.4
 53 | onnxruntime-gpu==1.16.0
 54 | opencv-python==4.11.0.86
 55 | orjson==3.10.15
 56 | packaging==24.2
 57 | pandas==2.0.3
 58 | pillow==10.4.0
 59 | platformdirs==4.3.6
 60 | pooch==1.8.2
 61 | protobuf==5.29.4
 62 | pycparser==2.22
 63 | pydantic==2.10.6
 64 | pydantic_core==2.27.2
 65 | pydub==0.25.1
 66 | Pygments==2.19.1
 67 | pyparsing==3.1.4
 68 | python-dateutil==2.9.0.post0
 69 | python-multipart==0.0.20
 70 | pytz==2025.1
 71 | PyWavelets==1.4.1
 72 | PyYAML==6.0.2
 73 | requests==2.32.3
 74 | rich==13.9.4
 75 | ruff==0.11.1
 76 | scikit-image==0.21.0
 77 | scikit-learn==1.3.2
 78 | scipy==1.10.1
 79 | semantic-version==2.10.0
 80 | shellingham==1.5.4
 81 | six==1.17.0
 82 | sniffio==1.3.1
 83 | soundfile==0.13.1
 84 | soxr==0.3.7
 85 | spark-parser==1.8.9
 86 | starlette==0.44.0
 87 | sympy==1.13.3
 88 | threadpoolctl==3.5.0
 89 | tifffile==2023.7.10
 90 | tomlkit==0.12.0
 91 | torch==1.11.0+cu113
 92 | torchaudio==0.11.0+cu113
 93 | torchvision==0.12.0+cu113
 94 | tqdm==4.67.1
 95 | typeguard==2.13.3
 96 | typer==0.15.2
 97 | typing_extensions==4.12.2
 98 | tzdata==2025.1
 99 | urllib3==2.2.3
100 | uvicorn==0.33.0
101 | websockets==12.0
102 | Werkzeug==3.0.6
103 | xdis==6.1.3
104 | zipp==3.20.2
105 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gc
  3 | import json
  4 | import os
  5 | import subprocess
  6 | import sys
  7 | import threading
  8 | import time
  9 | import traceback
 10 | import uuid
 11 | from enum import Enum
 12 | 
 13 | import queue
 14 | import cv2
 15 | from flask import Flask, request
 16 | 
 17 | if sys.version_info.major != 3 or sys.version_info.minor != 8:
 18 |     print("请使用 Python 3.8 版本运行此脚本")
 19 |     sys.exit(1)
 20 | 
 21 | import service.trans_dh_service
 22 | 
 23 | from h_utils.custom import CustomError
 24 | from y_utils.config import GlobalConfig
 25 | from y_utils.logger import logger
 26 | 
 27 | 
 28 | def get_args():
 29 |     parser = argparse.ArgumentParser(
 30 |         formatter_class=(argparse.ArgumentDefaultsHelpFormatter)
 31 |     )
 32 | 
 33 |     parser.add_argument(
 34 |         "--audio_path",
 35 |         type=str,
 36 |         default="example/audio.wav",
 37 |         help="path to local audio file",
 38 |     )
 39 |     parser.add_argument(
 40 |         "--video_path",
 41 |         type=str,
 42 |         default="example/video.mp4",
 43 |         help="path to local video file",
 44 |     )
 45 |     opt = parser.parse_args()
 46 |     return opt
 47 | 
 48 | 
 49 | def write_video(
 50 |     output_imgs_queue,
 51 |     temp_dir,
 52 |     result_dir,
 53 |     work_id,
 54 |     audio_path,
 55 |     result_queue,
 56 |     width,
 57 |     height,
 58 |     fps,
 59 |     watermark_switch=0,
 60 |     digital_auth=0,
 61 | ):
 62 |     output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
 63 |     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
 64 |     result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
 65 |     video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
 66 |     print("Custom VideoWriter init done")
 67 |     try:
 68 |         while True:
 69 |             state, reason, value_ = output_imgs_queue.get()
 70 |             if type(state) == bool and state == True:
 71 |                 logger.info(
 72 |                     "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
 73 |                 )
 74 |                 logger.info(
 75 |                     "Custom VideoWriter Silence Video saved in {}".format(
 76 |                         os.path.realpath(output_mp4)
 77 |                     )
 78 |                 )
 79 |                 video_write.release()
 80 |                 break
 81 |             else:
 82 |                 if type(state) == bool and state == False:
 83 |                     logger.error(
 84 |                         "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
 85 |                             work_id, reason
 86 |                         )
 87 |                     )
 88 |                     raise CustomError(reason)
 89 |                 for result_img in value_:
 90 |                     video_write.write(result_img)
 91 |         if video_write is not None:
 92 |             video_write.release()
 93 |         if watermark_switch == 1 and digital_auth == 1:
 94 |             logger.info(
 95 |                 "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
 96 |             )
 97 |             if width > height:
 98 |                 command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
 99 |                     audio_path,
100 |                     output_mp4,
101 |                     GlobalConfig.instance().watermark_path,
102 |                     GlobalConfig.instance().digital_auth_path,
103 |                     result_path,
104 |                 )
105 |                 logger.info("command:{}".format(command))
106 |             else:
107 |                 command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
108 |                     audio_path,
109 |                     output_mp4,
110 |                     GlobalConfig.instance().watermark_path,
111 |                     GlobalConfig.instance().digital_auth_path,
112 |                     result_path,
113 |                 )
114 |                 logger.info("command:{}".format(command))
115 |         elif watermark_switch == 1 and digital_auth == 0:
116 |             logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
117 |             command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
118 |                 audio_path,
119 |                 output_mp4,
120 |                 GlobalConfig.instance().watermark_path,
121 |                 result_path,
122 |             )
123 |             logger.info("command:{}".format(command))
124 |         elif watermark_switch == 0 and digital_auth == 1:
125 |             logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
126 |             if width > height:
127 |                 command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
128 |                     audio_path,
129 |                     output_mp4,
130 |                     GlobalConfig.instance().digital_auth_path,
131 |                     result_path,
132 |                 )
133 |                 logger.info("command:{}".format(command))
134 |             else:
135 |                 command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
136 |                     audio_path,
137 |                     output_mp4,
138 |                     GlobalConfig.instance().digital_auth_path,
139 |                     result_path,
140 |                 )
141 |                 logger.info("command:{}".format(command))
142 |         else:
143 |             command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
144 |                 audio_path, output_mp4, result_path
145 |             )
146 |             logger.info("Custom command:{}".format(command))
147 |         subprocess.call(command, shell=True)
148 |         print("###### Custom Video Writer write over")
149 |         print(f"###### Video result saved in {os.path.realpath(result_path)}")
150 |         exit(0)
151 |         result_queue.put([True, result_path])
152 |     except Exception as e:
153 |         logger.error(
154 |             "Custom VideoWriter [{}]视频帧队列处理异常结束，异常原因:[{}]".format(
155 |                 work_id, e.__str__()
156 |             )
157 |         )
158 |         result_queue.put(
159 |             [
160 |                 False,
161 |                 "[{}]视频帧队列处理异常结束，异常原因:[{}]".format(
162 |                     work_id, e.__str__()
163 |                 ),
164 |             ]
165 |         )
166 |     logger.info("Custom VideoWriter 后处理进程结束")
167 | 
168 | 
169 | service.trans_dh_service.write_video = write_video
170 | 
171 | 
172 | def main():
173 |     opt = get_args()
174 |     if not os.path.exists(opt.audio_path):
175 |         audio_url = "example/audio.wav"
176 |     else:
177 |         audio_url = opt.audio_path
178 | 
179 |     if not os.path.exists(opt.video_path):
180 |         video_url = "example/video.mp4"
181 |     else:
182 |         video_url = opt.video_path
183 |     sys.argv = [sys.argv[0]]
184 |     task = service.trans_dh_service.TransDhTask()
185 |     time.sleep(10) # somehow, this works...
186 | 
187 |     code = "1004"
188 |     task.work(audio_url, video_url, code, 0, 0, 0, 0)
189 | 
190 | 
191 | if __name__ == "__main__":
192 |     main()
193 | 
194 | # python run.py
195 | # python run.py --audio_path example/audio.wav --video_path example/video.mp4
196 | 


--------------------------------------------------------------------------------
/service/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/user/bin/env python
 2 | # coding=utf-8
 3 | """
 4 | @project : face2face_train
 5 | @author  : huyi
 6 | @file   : __init__.py.py
 7 | @ide    : PyCharm
 8 | @time   : 2023-12-06 14:46:40
 9 | """
10 | 


--------------------------------------------------------------------------------
/service/server.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/service/server.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/sources.list:
--------------------------------------------------------------------------------
 1 | # 默认注释了源码镜像以提高 apt update 速度，如有需要可自行取消注释
 2 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
 3 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
 4 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
 5 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
 6 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
 7 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
 8 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
 9 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
10 | 
11 | # 预发布软件源，不建议启用
12 | # deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
13 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
14 | 


--------------------------------------------------------------------------------
/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml:
--------------------------------------------------------------------------------
 1 | # network architecture
 2 | # encoder related
 3 | encoder: conformer
 4 | encoder_conf:
 5 |     output_size: 256    # dimension of attention
 6 |     attention_heads: 4
 7 |     linear_units: 2048  # the number of units of position-wise feed forward
 8 |     num_blocks: 12      # the number of encoder blocks
 9 |     dropout_rate: 0.1
10 |     positional_dropout_rate: 0.1
11 |     attention_dropout_rate: 0.0
12 |     input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
13 |     normalize_before: true
14 |     cnn_module_kernel: 15
15 |     use_cnn_module: True
16 |     activation_type: 'swish'
17 |     pos_enc_layer_type: 'rel_pos'
18 |     selfattention_layer_type: 'rel_selfattn'
19 | 
20 | # decoder related
21 | decoder: transformer
22 | decoder_conf:
23 |     attention_heads: 4
24 |     linear_units: 2048
25 |     num_blocks: 6
26 |     dropout_rate: 0.1
27 |     positional_dropout_rate: 0.1
28 |     self_attention_dropout_rate: 0.0
29 |     src_attention_dropout_rate: 0.0
30 | 
31 | # hybrid CTC/attention
32 | model_conf:
33 |     ctc_weight: 0.3
34 |     lsm_weight: 0.1     # label smoothing option
35 |     length_normalized_loss: false
36 | 
37 | # use raw_wav or kaldi feature
38 | raw_wav: false
39 | 
40 | # feature extraction
41 | collate_conf:
42 |     # waveform level config
43 |     wav_distortion_conf:
44 |         wav_dither: 0.1
45 |         wav_distortion_rate: 0.0
46 |         distortion_methods: []
47 |     speed_perturb: true
48 |     feature_extraction_conf:
49 |         feature_type: 'fbank'
50 |         mel_bins: 80
51 |         frame_shift: 10
52 |         frame_length: 25
53 |         using_pitch: false
54 |     # spec level config
55 |     # spec_swap: false
56 |     feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature
57 |     spec_aug: true
58 |     spec_aug_conf:
59 |         warp_for_time: False
60 |         num_t_mask: 2
61 |         num_f_mask: 2
62 |         max_t: 50
63 |         max_f: 10
64 |         max_w: 80
65 | 
66 | 
67 | # dataset related
68 | dataset_conf:
69 |     max_length: 1300     #40960
70 |     min_length: 0
71 |     batch_type: 'static' # static or dynamic
72 |     batch_size: 40
73 |     sort: true
74 | 
75 | grad_clip: 5
76 | accum_grad: 4
77 | max_epoch: 240
78 | log_interval: 100
79 | 
80 | optim: adam
81 | optim_conf:
82 |     lr: 0.0025  #0.0025
83 | scheduler: warmuplr     # pytorch v1.1.0+ required
84 | scheduler_conf:
85 |     warmup_steps: 100000
86 | 


--------------------------------------------------------------------------------
/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml:
--------------------------------------------------------------------------------
 1 | # network architecture
 2 | # encoder related
 3 | encoder: conformer
 4 | encoder_conf:
 5 |     output_size: 256    # dimension of attention
 6 |     attention_heads: 4
 7 |     linear_units: 1024  # the number of units of position-wise feed forward
 8 |     num_blocks: 6      # the number of encoder blocks
 9 |     dropout_rate: 0.1
10 |     positional_dropout_rate: 0.1
11 |     attention_dropout_rate: 0.0
12 |     input_layer: linear # encoder input type, you can chose linear,conv2d, conv2d6 and conv2d8
13 |     normalize_before: true
14 |     cnn_module_kernel: 15
15 |     use_cnn_module: True
16 |     activation_type: 'swish'
17 |     pos_enc_layer_type: 'rel_pos'
18 |     selfattention_layer_type: 'rel_selfattn'
19 | 
20 | # decoder related
21 | decoder: transformer
22 | decoder_conf:
23 |     attention_heads: 4
24 |     linear_units: 1024
25 |     num_blocks: 3
26 |     dropout_rate: 0.1
27 |     positional_dropout_rate: 0.1
28 |     self_attention_dropout_rate: 0.0
29 |     src_attention_dropout_rate: 0.0
30 | 
31 | # hybrid CTC/attention
32 | model_conf:
33 |     ctc_weight: 0.3
34 |     lsm_weight: 0.1     # label smoothing option
35 |     length_normalized_loss: false
36 | 
37 | # use raw_wav or kaldi feature
38 | raw_wav: false
39 | 
40 | # feature extraction
41 | collate_conf:
42 |     # waveform level config
43 |     wav_distortion_conf:
44 |         wav_dither: 0.1
45 |         wav_distortion_rate: 0.0
46 |         distortion_methods: []
47 |     speed_perturb: true
48 |     feature_extraction_conf:
49 |         feature_type: 'fbank'
50 |         mel_bins: 80
51 |         frame_shift: 10
52 |         frame_length: 25
53 |         using_pitch: false
54 |     # spec level config
55 |     # spec_swap: false
56 |     feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature
57 |     spec_aug: true
58 |     spec_aug_conf:
59 |         warp_for_time: False
60 |         num_t_mask: 2
61 |         num_f_mask: 2
62 |         max_t: 50
63 |         max_f: 10
64 |         max_w: 80
65 | 
66 | 
67 | # dataset related
68 | dataset_conf:
69 |     max_length: 1300     #40960
70 |     min_length: 0
71 |     batch_type: 'static' # static or dynamic
72 |     batch_size: 40
73 |     sort: true
74 | 
75 | grad_clip: 5
76 | accum_grad: 4
77 | max_epoch: 240
78 | log_interval: 100
79 | 
80 | optim: adam
81 | optim_conf:
82 |     lr: 0.002
83 | scheduler: warmuplr     # pytorch v1.1.0+ required
84 | scheduler_conf:
85 |     warmup_steps: 50000
86 | 


--------------------------------------------------------------------------------
/wenet/tools/_extract_feats.py:
--------------------------------------------------------------------------------
  1 | import librosa
  2 | # import tensorflow as tf
  3 | import numpy as np
  4 | from scipy.io import wavfile
  5 | from scipy import signal
  6 | 
  7 | import torchaudio.compliance.kaldi as kaldi
  8 | import torchaudio
  9 | # torchaudio.set_audio_backend("sox_io")
 10 | 
 11 | 
 12 | def _extract_feature(wav_path):
 13 |     """ Extract acoustic fbank feature from origin waveform.
 14 | 
 15 |     Speed perturbation and wave amplitude distortion is optional.
 16 | 
 17 |     Args:
 18 |         batch: a list of tuple (wav id , wave path).
 19 |         speed_perturb: bool, whether or not to use speed pertubation.
 20 |         wav_distortion_conf: a dict , the config of wave amplitude distortion.
 21 |         feature_extraction_conf:a dict , the config of fbank extraction.
 22 | 
 23 |     Returns:
 24 |         (keys, feats, labels)
 25 |     """
 26 |     waveform, sample_rate = torchaudio.load_wav(wav_path)
 27 | 
 28 |     mat = kaldi.fbank(
 29 |                 waveform,
 30 |                 num_mel_bins=80,
 31 |                 frame_length=25,
 32 |                 frame_shift=10,
 33 |                 dither=0.1,
 34 |                 energy_floor=0.0,
 35 |                 sample_frequency=sample_rate)
 36 |     mat = mat.detach().numpy()
 37 | 
 38 |     return mat
 39 | 
 40 | def _extract_feature_norm(wav_path):
 41 |     """ Extract acoustic fbank feature from origin waveform.
 42 | 
 43 |     Speed perturbation and wave amplitude distortion is optional.
 44 | 
 45 |     Args:
 46 |         batch: a list of tuple (wav id , wave path).
 47 |         speed_perturb: bool, whether or not to use speed pertubation.
 48 |         wav_distortion_conf: a dict , the config of wave amplitude distortion.
 49 |         feature_extraction_conf:a dict , the config of fbank extraction.
 50 | 
 51 |     Returns:
 52 |         (keys, feats, labels)
 53 |     """
 54 | 
 55 |     waveform, sample_rate = torchaudio.load_wav(wav_path)
 56 | 
 57 |     mat = kaldi.fbank(
 58 |                 waveform,
 59 |                 num_mel_bins=80,
 60 |                 frame_length=25,
 61 |                 frame_shift=10,
 62 |                 dither=0.1,
 63 |                 energy_floor=0.0,
 64 |                 sample_frequency=sample_rate)
 65 |     mat = mat.detach().numpy()
 66 | 
 67 |     return mat
 68 | 
 69 | 
 70 | hparams = {
 71 |     'sample_rate': 16000,#一秒16000个采样点
 72 |     'preemphasis': 0.97,
 73 |     'n_fft': 1024,
 74 |     'hop_length': 200,#80个采样点为帧移动步长 5ms
 75 |     'win_length': 800,#400个采样点为帧宽度，25ms
 76 |     'num_mels': 80,
 77 |     'n_mfcc': 13,
 78 |     'window': 'hann',
 79 |     'fmin': 0.,
 80 |     'fmax': 8000.,
 81 |     'ref_db': 20,  #
 82 |     'min_db': -80.0,  # restrict the dynamic range of log power
 83 |     'iterations': 100,  # griffin_lim #iterations
 84 |     'silence_db': -28.0,
 85 |     'center': True,#是否将MFCC作为当前帧中间向量的结果。（数个向量作为一帧生成一个mfcc)
 86 | }
 87 | 
 88 | _mel_basis = None
 89 | 
 90 | 
 91 | def load_wav(wav_f, sr=None):
 92 |     # wav_arr, _ = librosa.load(wav_f, sr=sr)
 93 |     # return wav_arr
 94 |     if type(wav_f)==str:
 95 |         wav_arr, _ = librosa.load(wav_f, sr=sr)
 96 |     else:
 97 |         wav_arr = wav_f
 98 |     return wav_arr
 99 | 
100 | def write_wav(write_path, wav_arr, sr):
101 |     wav_arr *= 32767 / max(0.01, np.max(np.abs(wav_arr)))
102 |     wavfile.write(write_path, sr, wav_arr.astype(np.int16))
103 |     return
104 | 
105 | def preempahsis(wav_arr, pre_param=hparams['preemphasis']):
106 |     return signal.lfilter([1, -pre_param], [1], wav_arr)
107 | 
108 | def deemphasis(wav_arr, pre_param=hparams['preemphasis']):
109 |     return signal.lfilter([1], [1, -pre_param], wav_arr)
110 | 
111 | def split_wav(wav_arr, top_db=-hparams['silence_db']):
112 |     intervals = librosa.effects.split(wav_arr, top_db=top_db)
113 |     return intervals
114 | 
115 | def mulaw_encode(wav_arr, quantization_channels):
116 |     mu = float(quantization_channels - 1)
117 |     safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0)
118 |     encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu)
119 |     return encoded
120 | 
121 | def mulaw_encode_quantize(wav_arr, quantization_channels):
122 |     mu = float(quantization_channels - 1)
123 |     safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0)
124 |     encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu)
125 |     return ((encoded + 1.) / 2 * mu + 0.5).astype(np.int32)
126 | 
127 | def mulaw_decode(encoded, quantization_channels):
128 |     mu = float(quantization_channels - 1)
129 |     magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.)
130 |     return np.sign(encoded) * magnitude
131 | 
132 | def mulaw_decode_quantize(encoded, quantization_channels):
133 |     mu = float(quantization_channels - 1)
134 |     signal = 2 * (encoded.astype(np.float32) / mu) - 1.
135 |     magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.)
136 |     return np.sign(signal) * magnitude
137 | 
138 | def mulaw_encode_quantize_tf(wav_batch, quantization_channels):
139 |     with tf.variable_scope('mulaw_encode'):
140 |         mu = tf.cast(quantization_channels - 1, tf.float32)
141 |         safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0)
142 |         encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu)
143 |         return tf.cast((encoded + 1.) / 2 * mu + 0.5, tf.int32)
144 | 
145 | # def mulaw_encode_tf(wav_batch, quantization_channels):
146 | #     with tf.variable_scope('mulaw_encode'):
147 | #         mu = tf.cast(quantization_channels - 1, tf.float32)
148 | #         safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0)
149 | #         encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu)
150 | #         return encoded
151 | 
152 | # def mulaw_decode_quantize_tf(encoded, quantization_channels):
153 |     with tf.variable_scope('mulaw_decode'):
154 |         mu = tf.cast(quantization_channels - 1, tf.float32)
155 |         signal = 2 * (tf.cast(encoded, tf.float32) / mu) - 1.
156 |         magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.)
157 |         return tf.sign(signal) * magnitude
158 | 
159 | # def mulaw_decode_tf(encoded, quantization_channels):
160 |     with tf.variable_scope('mulaw_decode'):
161 |         mu = tf.cast(quantization_channels - 1, tf.float32)
162 |         magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.)
163 |         return tf.sign(encoded) * magnitude
164 | 
165 | def stft(wav_arr, n_fft=hparams['n_fft'],#短时傅里叶变化
166 |          hop_len=hparams['hop_length'],
167 |          win_len=hparams['win_length'],
168 |          window=hparams['window'],
169 |          center=hparams['center']):
170 |     # return shape: [n_freqs, time]
171 |     return librosa.core.stft(wav_arr, n_fft=n_fft, hop_length=hop_len,
172 |                              win_length=win_len, window=window, center=center)
173 | 
174 | # def stft_tf(wav_arr, n_fft=hparams['n_fft'],
175 | #             hop_len=hparams['hop_length'],
176 | #             win_len=hparams['win_length'],
177 | #             window=hparams['window']):
178 | #     window_f = {'hann': tf.contrib.signal.hann_window,
179 | #                 'hamming': tf.contrib.signal.hamming_window}[window]
180 | #     # returned value is of shape [..., frames, fft_bins] and complex64 value
181 | #     return tf.contrib.signal.stft(signals=wav_arr, frame_length=win_len,
182 | #                                   frame_step=hop_len, fft_length=n_fft,
183 | #                                   window_fn=window_f)
184 | 
185 | def istft(stft_matrix, hop_len=hparams['hop_length'],
186 |           win_len=hparams['win_length'], window=hparams['window']):
187 |     # stft_matrix should be complex stft results instead of magnitude spectrogram
188 |     # or power spectrogram, and of shape [n_freqs, time]
189 |     return librosa.core.istft(stft_matrix, hop_length=hop_len,
190 |                               win_length=win_len, window=window)
191 | 
192 | # def istft_tf(stft_matrix, hop_len=hparams['hop_length'], n_fft=hparams['n_fft'],
193 | #              win_len=hparams['win_length'], window=hparams['window']):
194 | #     window_f = {'hann': tf.contrib.signal.hann_window,
195 | #                 'hamming': tf.contrib.signal.hamming_window}[window]
196 | #     # stft_matrix should be of shape [..., frames, fft_bins]
197 | #     return tf.contrib.signal.inverse_stft(stft_matrix, frame_length=win_len,
198 | #                                           frame_step=hop_len, fft_length=n_fft,
199 | #                                           window_fn=window_f)
200 | 
201 | def spectrogram(wav_arr, n_fft=hparams['n_fft'],
202 |                 hop_len=hparams['hop_length'],
203 |                 win_len=hparams['win_length'],
204 |                 window=hparams['window'],
205 |                 center=hparams['center']):
206 |     # return shape: [time, n_freqs]
207 |     s = stft(wav_arr, n_fft=n_fft, hop_len=hop_len,
208 |              win_len=win_len, window=window, center=center).T
209 |     magnitude = np.abs(s)       #幅度谱
210 |     power = magnitude ** 2             #能量谱                         #经过短时傅里叶变换得到magnitude(?)和其平方  为什么不是快速傅里叶变化
211 |     return {'magnitude': magnitude,
212 |             'power': power,
213 |             'stft':s.T}
214 | 
215 | def power_spec2mel(power_spec, sr=hparams['sample_rate'], n_fft=hparams['n_fft'],
216 |                    num_mels=hparams['num_mels'], fmin=hparams['fmin'], fmax=hparams['fmax']):
217 |     # power_spec should be of shape [time, 1+n_fft/2]
218 |     power_spec_t = power_spec.T
219 |     global _mel_basis
220 |     _mel_basis = (librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
221 |                   if _mel_basis is None else _mel_basis)  # [n_mels, 1+n_fft/2]
222 |     mel_spec = np.dot(_mel_basis, power_spec_t)  # [n_mels, time]
223 |     return mel_spec.T   # mel谱
224 | 
225 | def wav2melspec(wav_arr, sr=hparams['sample_rate'], n_fft=hparams['n_fft'],
226 |                 hop_len=hparams['hop_length'], win_len=hparams['win_length'],
227 |                 window=hparams['window'], num_mels=hparams['num_mels'],
228 |                 fmin=hparams['fmin'], fmax=hparams['fmax']):
229 |     power_spec = spectrogram(wav_arr, n_fft, hop_len, win_len, window)['power']
230 |     melspec = power_spec2mel(power_spec.T, sr, n_fft, num_mels, fmin, fmax)
231 |     return melspec  # [time, num_mels]
232 | 
233 | def wav2mfcc(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],
234 |              n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
235 |              win_len=hparams['win_length'], window=hparams['window'],
236 |              num_mels=hparams['num_mels'], fmin=0.0,
237 |              fmax=None, ref_db=hparams['ref_db']):
238 |     from scipy.fftpack import dct
239 |     print("wav_arr1:",wav_arr.shape)
240 |     wav_arr = preempahsis(wav_arr)
241 |     print("wav_arr2:",wav_arr.shape)
242 | 
243 |     mag_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
244 |                            win_len=win_len, window=window)['magnitude']
245 |     mel_spec = power_spec2mel(mag_spec, sr=sr, n_fft=n_fft, num_mels=num_mels,
246 |                               fmin=fmin, fmax=fmax)
247 |     # log_melspec = power2db(mel_spec, ref_db=ref_db)
248 |     log_melspec = librosa.amplitude_to_db(mel_spec)
249 |     mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc]
250 |     # mfcc = np.dot(librosa.filters.dct(n_mfcc, log_melspec.shape[1]), log_melspec.T)
251 |     deltas = librosa.feature.delta(mfcc)
252 |     delta_deltas = librosa.feature.delta(mfcc, order=2)
253 |     mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0)
254 | 
255 |     return mfcc_feature.T
256 | 
257 | def wav2mfcc_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],#使用这个
258 |                 n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
259 |                 win_len=hparams['win_length'], window=hparams['window'],
260 |                 num_mels=hparams['num_mels'], fmin=0.0,
261 |                 fmax=None, ref_db=hparams['ref_db'],
262 |                 center=hparams['center']):
263 |     from scipy.fftpack import dct
264 |     wav_arr = preempahsis(wav_arr)
265 |     #经过一次滤波
266 |     power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
267 |                              win_len=win_len, window=window, center=center)['power']
268 |     mel_spec = power_spec2mel(power_spec, sr=sr, n_fft=n_fft, num_mels=num_mels,
269 |                               fmin=fmin, fmax=fmax)             # mel谱
270 |     log_melspec = power2db(mel_spec, ref_db=ref_db)             #对数mel谱
271 | 
272 | 
273 |     """下面是MFCC"""
274 |     # mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc]
275 |     # deltas = librosa.feature.delta(mfcc)
276 |     # delta_deltas = librosa.feature.delta(mfcc, order=2)
277 |     # mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0)
278 |     # return mfcc_feature.T
279 |     x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
280 |                              win_len=win_len, window=window, center=center)['stft']
281 |     # print("log_melspec:", x_stft.shape)
282 |     return log_melspec,x_stft
283 | 
284 | 
285 | def wav2linear_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],  # 使用这个
286 |                 n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
287 |                 win_len=hparams['win_length'], window=hparams['window'],
288 |                 num_mels=hparams['num_mels'], fmin=0.0,
289 |                 fmax=None, ref_db=hparams['ref_db'],
290 |                 center=hparams['center']):
291 |     from scipy.fftpack import dct
292 |     wav_arr = preempahsis(wav_arr)
293 |     # 经过一次滤波
294 |     power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
295 |                              win_len=win_len, window=window, center=center)['power']
296 |     linear = _amp_to_db(power_spec, ref_db=ref_db)  # 对数mel谱
297 |     normalized_linear = _db_normalize(linear, min_db=hparams['min_db'])
298 |     x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
299 |                              win_len=win_len, window=window, center=center)['stft']
300 | 
301 | 
302 |     return normalized_linear,x_stft
303 | 
304 | def _amp_to_db(x,ref_db=20):
305 |     return 20 * np.log10(np.maximum(1e-5, x)) + ref_db
306 | 
307 | 
308 | def mel2log_mel(mel_spec, ref_db=hparams['ref_db'], min_db=hparams['min_db']):
309 |     log_mel = power2db(mel_spec, ref_db)
310 |     normalized = log_power_normalize(log_mel, min_db)
311 |     return normalized
312 | 
313 | def power2db(power_spec, ref_db=hparams['ref_db'], tol=1e-5):
314 |     # power spectrogram is stft ** 2
315 |     # returned value: (10. * log10(power_spec) - ref_db)
316 |     return 10. * np.log10(power_spec + tol) - ref_db
317 | 
318 | def db2power(power_db, ref_db=hparams['ref_db']):
319 |     return np.power(10.0, 0.1 * (power_db + ref_db))
320 | #
321 | # def db2power_tf(power_db, ref_db=hparams['ref_db']):
322 | #     return tf.pow(10.0, 0.1 * (power_db + ref_db))
323 | 
324 | def log_power_normalize(log_power, min_db=hparams['min_db']):
325 |     """
326 |     :param log_power: in db, computed by power2db(spectrogram(wav_arr)['power'])
327 |     :param min_db: minimum value of log_power in db
328 |     :return: log_power normalized to [0., 1.]
329 |     """
330 |     assert min_db < 0. or "min_db should be a negative value like -80.0 or -100.0"
331 |     return np.clip((log_power - min_db) / -min_db, 0., 1.)
332 | 
333 | def log_power_denormalize(normalized_logpower, min_db=hparams['min_db']):
334 |     return np.clip(normalized_logpower, 0., 1.) * -min_db + min_db
335 | 
336 | # def log_power_denormalize_tf(normalized_logpower, min_db=hparams['min_db']):
337 | #     return tf.clip_by_value(normalized_logpower, 0., 1.) * -min_db + min_db
338 | 
339 | def griffin_lim(magnitude_spec, iterations=hparams['iterations']):
340 |     """
341 |     :param magnitude_spec: magnitude spectrogram of shape [time, n_freqs]
342 |                            obtained from spectrogram(wav_arr)['magnitude]
343 |     :param iterations: number of iterations to estimate phase
344 |     :return: waveform array
345 |     """
346 |     mag = magnitude_spec.T  # transpose to [n_freqs, time]
347 |     angles = np.exp(2j * np.pi * np.random.rand(*mag.shape))
348 |     complex_mag = np.abs(mag).astype(np.complex)
349 |     stft_0 = complex_mag * angles
350 |     y = istft(stft_0)
351 |     for i in range(iterations):
352 |         angles = np.exp(1j * np.angle(stft(y)))
353 |         y = istft(complex_mag * angles)
354 |     return y
355 | 
356 | # def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']):
357 | #     # magnitude_spec: [frames, fft_bins], of type tf.float32
358 | #     angles = tf.cast(
359 | #         tf.exp(2j * np.pi * tf.cast(
360 | #             tf.random_uniform(
361 | #                 tf.shape(magnitude_spec)),
362 | #             dtype=tf.complex64)),
363 | #         dtype=tf.complex64)
364 | #     complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64)
365 | #     stft_0 = complex_mag * angles
366 | #     y = istft_tf(stft_0)
367 | #     for i in range(iterations):
368 | #         angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64))
369 | #         y = istft_tf(complex_mag * angles)
370 | #     return y
371 | 
372 | def griffin_lim_test(wav_f, n_fft=hparams['n_fft'],
373 |                      hop_len=hparams['hop_length'],
374 |                      win_len=hparams['win_length'],
375 |                      window=hparams['window']):
376 |     wav_arr = load_wav(wav_f)
377 |     spec_dict = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
378 |                             win_len=win_len, window=window)
379 |     mag_spec = spec_dict['magnitude']
380 |     y = griffin_lim(mag_spec)
381 |     write_wav('reconstructed1.wav', y, sr=16000)
382 | 
383 | def stft2wav_test(stft_f, mean_f, std_f):
384 |     spec = np.load(stft_f)
385 |     mean = np.load(mean_f)
386 |     std = np.load(std_f)
387 |     spec = spec * std + mean
388 |     spec = log_power_denormalize(spec)
389 |     power_spec = db2power(spec)
390 |     mag_spec = power_spec ** 0.5
391 |     y = griffin_lim(mag_spec)
392 |     y = deemphasis(y)
393 |     write_wav('reconstructed2.wav', y, sr=16000)
394 |     return y
395 | #
396 | # def stft2wav_tf_test(stft_f, mean_f, std_f):
397 | #     # get inputs
398 | #     spec = np.load(stft_f)
399 | #     mean = np.load(mean_f)
400 | #     std = np.load(std_f)
401 | #     spec = spec * std + mean
402 | #     # build graph
403 | #     spec_pl = tf.placeholder(tf.float32, [None, None, 513])
404 | #     denormalized = log_power_denormalize_tf(spec_pl)
405 | #     mag_spec = tf.pow(db2power_tf(denormalized), 0.5)
406 | #     wav = grinffin_lim_tf(mag_spec)
407 | #     # set session and run
408 | #     config = tf.ConfigProto()
409 | #     config.gpu_options.allow_growth = True
410 | #     sess = tf.Session(config=config)
411 | #     wav_arr = sess.run(wav, feed_dict={spec_pl: np.expand_dims(spec, axis=0)})
412 | #     sess.close()
413 | #     y = deemphasis(np.squeeze(wav_arr))
414 | #     write_wav('reconstructed_tf.wav', y, sr=16000)
415 | #     return y
416 | 
417 | # 超参数个数：1
418 | # return: db normalized to [0., 1.]
419 | def _db_normalize(db, min_db):
420 |     return np.clip((db - min_db) / -min_db, 0., 1.)
421 | 
422 | 
423 | 
424 | 
425 | def mfcc_test():
426 |     wav_f = './test.wav'
427 |     wav_arr = load_wav(wav_f)
428 | 
429 | 
430 |     mfcc = wav2mfcc_v2(wav_arr)
431 |     mfcc1 = np.load('test.npy')
432 |     print(mfcc.min(), mfcc1.min())
433 |     print(mfcc.max(), mfcc1.max())
434 |     print(mfcc.mean(), mfcc1.mean())
435 |     print(np.abs(mfcc - mfcc1))
436 |     print(np.mean(np.abs(mfcc - mfcc1)))
437 |     import matplotlib.pyplot as plt
438 |     plt.figure()
439 |     plt.subplot(211)
440 |     plt.imshow(mfcc.T, origin='lower')
441 |     # plt.colorbar()
442 |     plt.subplot(212)
443 |     plt.imshow(mfcc1.T, origin='lower')
444 |     # plt.colorbar()
445 |     plt.tight_layout()
446 |     plt.show()
447 |     return
448 | 
449 | 
450 | 
451 | if __name__ == '__main__':
452 |     mfcc_test()
453 | 


--------------------------------------------------------------------------------
/wenet/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/__init__.py


--------------------------------------------------------------------------------
/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/utils/cmvn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import json
17 | import math
18 | 
19 | import numpy as np
20 | 
21 | 
22 | def _load_json_cmvn(json_cmvn_file):
23 |     """ Load the json format cmvn stats file and calculate cmvn
24 | 
25 |     Args:
26 |         json_cmvn_file: cmvn stats file in json format
27 | 
28 |     Returns:
29 |         a numpy array of [means, vars]
30 |     """
31 |     with open(json_cmvn_file) as f:
32 |         cmvn_stats = json.load(f)
33 | 
34 |     means = cmvn_stats['mean_stat']
35 |     variance = cmvn_stats['var_stat']
36 |     count = cmvn_stats['frame_num']
37 |     for i in range(len(means)):
38 |         means[i] /= count
39 |         variance[i] = variance[i] / count - means[i] * means[i]
40 |         if variance[i] < 1.0e-20:
41 |             variance[i] = 1.0e-20
42 |         variance[i] = 1.0 / math.sqrt(variance[i])
43 |     cmvn = np.array([means, variance])
44 |     return cmvn
45 | 
46 | 
47 | def _load_kaldi_cmvn(kaldi_cmvn_file):
48 |     """ Load the kaldi format cmvn stats file and calculate cmvn
49 | 
50 |     Args:
51 |         kaldi_cmvn_file:  kaldi text style global cmvn file, which
52 |            is generated by:
53 |            compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
54 | 
55 |     Returns:
56 |         a numpy array of [means, vars]
57 |     """
58 |     means = []
59 |     variance = []
60 |     with open(kaldi_cmvn_file, 'r') as fid:
61 |         # kaldi binary file start with '\0B'
62 |         if fid.read(2) == '\0B':
63 |             logging.error('kaldi cmvn binary file is not supported, please '
64 |                           'recompute it by: compute-cmvn-stats --binary=false '
65 |                           ' scp:feats.scp global_cmvn')
66 |             sys.exit(1)
67 |         fid.seek(0)
68 |         arr = fid.read().split()
69 |         assert (arr[0] == '[')
70 |         assert (arr[-2] == '0')
71 |         assert (arr[-1] == ']')
72 |         feat_dim = int((len(arr) - 2 - 2) / 2)
73 |         for i in range(1, feat_dim + 1):
74 |             means.append(float(arr[i]))
75 |         count = float(arr[feat_dim + 1])
76 |         for i in range(feat_dim + 2, 2 * feat_dim + 2):
77 |             variance.append(float(arr[i]))
78 | 
79 |     for i in range(len(means)):
80 |         means[i] /= count
81 |         variance[i] = variance[i] / count - means[i] * means[i]
82 |         if variance[i] < 1.0e-20:
83 |             variance[i] = 1.0e-20
84 |         variance[i] = 1.0 / math.sqrt(variance[i])
85 |     cmvn = np.array([means, variance])
86 |     return cmvn
87 | 
88 | 
89 | def load_cmvn(cmvn_file, is_json):
90 |     if is_json:
91 |         cmvn = _load_json_cmvn(cmvn_file)
92 |     else:
93 |         cmvn = _load_kaldi_cmvn(cmvn_file)
94 |     return cmvn[0], cmvn[1]
95 | 


--------------------------------------------------------------------------------
/wenet/utils/common.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/common.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/y_utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/user/bin/env python
2 | # coding=utf-8
3 | """
4 | @project : dhp-service
5 | @author  : huyi
6 | @file   : __init__.py.py
7 | @ide    : PyCharm
8 | @time   : 2021-08-18 16:29:13
9 | """


--------------------------------------------------------------------------------
/y_utils/config.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/config.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/y_utils/lcr.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/lcr.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/y_utils/liblcr.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/liblcr.so


--------------------------------------------------------------------------------
/y_utils/logger.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/logger.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/y_utils/md5.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/md5.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/y_utils/tools.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/tools.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------