├── .DS_Store ├── 1.jpeg ├── README.md ├── README_en.md ├── README_tts_f2f.MD ├── app.py ├── check_env └── check_onnx_cuda.py ├── config └── config.ini ├── download.sh ├── example ├── audio.wav └── video.mp4 ├── face_attr_detect ├── .DS_Store ├── __init__.py └── face_attr.cpython-38-x86_64-linux-gnu.so ├── face_detect_utils ├── __init__.py ├── face_detect.cpython-38-x86_64-linux-gnu.so ├── head_pose.cpython-38-x86_64-linux-gnu.so └── scrfd.cpython-38-x86_64-linux-gnu.so ├── face_lib ├── __init__.py ├── face_detect_and_align │ ├── __init__.py │ ├── face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so │ ├── face_align_utils.cpython-38-x86_64-linux-gnu.so │ └── scrfd_insightface │ │ ├── __init__.py │ │ └── scrfd.cpython-38-x86_64-linux-gnu.so ├── face_parsing │ ├── __init__.py │ └── face_parsing_api.cpython-38-x86_64-linux-gnu.so └── face_restore │ ├── __init__.py │ └── gfpgan_onnx │ └── gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so ├── h_utils ├── __init__.py ├── custom.cpython-38-x86_64-linux-gnu.so ├── obs_client.cpython-38-x86_64-linux-gnu.so ├── request_utils.cpython-38-x86_64-linux-gnu.so ├── sweep_bot.cpython-38-x86_64-linux-gnu.so └── zip_utils.cpython-38-x86_64-linux-gnu.so ├── inference_from_text.sh ├── landmark2face_wy ├── audio_handler.cpython-38-x86_64-linux-gnu.so ├── checkpoints │ └── test │ │ └── opt.txt ├── data │ ├── Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so │ ├── Facereala3dmmexp512_dataset.py │ ├── Facereala3dmmexpwenet512_dataset.py │ ├── __init__.py │ ├── base_dataset.cpython-38-x86_64-linux-gnu.so │ ├── image_folder.cpython-38-x86_64-linux-gnu.so │ ├── l2faceaudio512_dataset.py │ └── l2faceaudio_dataset.py ├── digitalhuman_interface.cpython-38-x86_64-linux-gnu.so ├── loss │ ├── __init__.py │ └── perceptual.cpython-38-x86_64-linux-gnu.so ├── models │ ├── DINet.cpython-38-x86_64-linux-gnu.so │ ├── __init__.py │ ├── base_function.cpython-38-x86_64-linux-gnu.so │ ├── base_model.cpython-38-x86_64-linux-gnu.so │ ├── face3d2face_model.cpython-38-x86_64-linux-gnu.so │ ├── face_model.cpython-38-x86_64-linux-gnu.so │ ├── l2faceaudio_model.cpython-38-x86_64-linux-gnu.so │ ├── networks.cpython-38-x86_64-linux-gnu.so │ ├── networks_HD.cpython-38-x86_64-linux-gnu.so │ ├── networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so │ ├── pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so │ └── pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so ├── options │ ├── __init__.py │ ├── base_options.cpython-38-x86_64-linux-gnu.so │ ├── test_options.cpython-38-x86_64-linux-gnu.so │ └── train_options.cpython-38-x86_64-linux-gnu.so ├── sync_batchnorm │ ├── __init__.py │ ├── batchnorm.cpython-38-x86_64-linux-gnu.so │ ├── batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so │ ├── comm.cpython-38-x86_64-linux-gnu.so │ ├── replicate.cpython-38-x86_64-linux-gnu.so │ └── unittest.cpython-38-x86_64-linux-gnu.so ├── test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so ├── test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so └── util │ ├── __init__.py │ ├── flow_util.cpython-38-x86_64-linux-gnu.so │ ├── get_data.cpython-38-x86_64-linux-gnu.so │ ├── html.cpython-38-x86_64-linux-gnu.so │ ├── image_pool.cpython-38-x86_64-linux-gnu.so │ ├── util.cpython-38-x86_64-linux-gnu.so │ └── visualizer.cpython-38-x86_64-linux-gnu.so ├── license.txt ├── log └── dh.log ├── model_lib ├── __init__.py ├── base_wrapper │ ├── __init__.py │ └── onnx_model.cpython-38-x86_64-linux-gnu.so └── model_base.py ├── preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so ├── requirements.txt ├── requirements_0.txt ├── run.py ├── service ├── __init__.py ├── server.cpython-38-x86_64-linux-gnu.so └── trans_dh_service.cpython-38-x86_64-linux-gnu.so ├── sources.list ├── wenet ├── compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so ├── examples │ └── aishell │ │ └── aidata │ │ └── conf │ │ ├── train_conformer_multi_cn.yaml │ │ └── train_conformer_multi_cn_linear.yaml ├── tools │ └── _extract_feats.py ├── transformer │ ├── __init__.py │ ├── asr_model.cpython-38-x86_64-linux-gnu.so │ ├── attention.cpython-38-x86_64-linux-gnu.so │ ├── cmvn.cpython-38-x86_64-linux-gnu.so │ ├── convolution.cpython-38-x86_64-linux-gnu.so │ ├── ctc.cpython-38-x86_64-linux-gnu.so │ ├── decoder.cpython-38-x86_64-linux-gnu.so │ ├── decoder_layer.cpython-38-x86_64-linux-gnu.so │ ├── embedding.cpython-38-x86_64-linux-gnu.so │ ├── encoder.cpython-38-x86_64-linux-gnu.so │ ├── encoder_layer.cpython-38-x86_64-linux-gnu.so │ ├── label_smoothing_loss.cpython-38-x86_64-linux-gnu.so │ ├── positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so │ ├── subsampling.cpython-38-x86_64-linux-gnu.so │ └── swish.cpython-38-x86_64-linux-gnu.so └── utils │ ├── checkpoint.cpython-38-x86_64-linux-gnu.so │ ├── cmvn.py │ ├── common.cpython-38-x86_64-linux-gnu.so │ ├── ctc_util.cpython-38-x86_64-linux-gnu.so │ ├── executor.cpython-38-x86_64-linux-gnu.so │ ├── mask.cpython-38-x86_64-linux-gnu.so │ └── scheduler.cpython-38-x86_64-linux-gnu.so ├── xseg └── dfl_xseg_api.cpython-38-x86_64-linux-gnu.so └── y_utils ├── __init__.py ├── config.cpython-38-x86_64-linux-gnu.so ├── lcr.cpython-38-x86_64-linux-gnu.so ├── liblcr.so ├── logger.cpython-38-x86_64-linux-gnu.so ├── md5.cpython-38-x86_64-linux-gnu.so ├── time_utils.cpython-38-x86_64-linux-gnu.so └── tools.cpython-38-x86_64-linux-gnu.so /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/.DS_Store -------------------------------------------------------------------------------- /1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/1.jpeg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE) 3 | ![Python](https://img.shields.io/badge/Python-3.8-blue.svg) 4 | ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg) 5 | 6 | **[中文](#chinese-version)** | **[English](README_en.md)** 7 | 8 | --- 9 | 10 | 11 | 12 | 13 | # HeyGem-Linux-Python-Hack 14 | 15 | ## 项目简介 16 | 17 | HeyGem-Linux-Python-Hack 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。 18 | 19 | [RTX 50版本已经发布,点击可达](https://github.com/Holasyb918/HeyGem-Linux-Python-Hack-RTX-50) 20 | [Text To Face] 如果你需要较为完整的 HeyGem,即从 TTS 到数字人,那么你可以参考 [这里](README_tts_f2f.MD) 21 | 22 | **如果你觉得这个项目对你有帮助,欢迎给我们 Star!** 23 | **如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!** 24 | **本项目中,所有 .so 文件均由硅基编译,与开发者无关** 25 | **本项目中,所有模型均由硅基提供,与开发者无关** 26 | 27 | ## 主要特性 28 | 29 | * 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。 30 | * 无需 Windows: 完全基于 Linux 开发和测试。 31 | * Python 驱动: 使用 Python 语言开发,易于理解和扩展。 32 | * 开发者友好: 易于使用和扩展。 33 | * 完全离线。 34 | 35 | 微信群 36 | ![](./1.jpeg) 37 | 38 | ## 开始使用 39 | 40 | ### 安装 41 | #### 环境 42 | 本项目**支持且仅支持 Linux & python3.8 环境** 43 | 请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后,使用 pip 安装项目依赖项 44 | **备用** 同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt),遇到问题的话,你可以参考它来建立一个新的环境。 45 | **具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合,否则仍旧可能遇到问题。** 46 | **请尽量不要询问任何关于 pip 的问题,感谢合作** 47 | **如果你遇到了环境难以搭建完成的问题,建议参考 [autodl 环境](https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/issues/43), 备注: 开发者与 autodl 无任何利益相关** 48 | 49 | 50 | ```bash 51 | # 直接安装整个 requirements.txt 不一定成功,更建议跑代码观察报错信息,然后根据报错信息结合 requirements 去尝试安装,祝你顺利。 52 | # pip install -r requirements.txt 53 | ``` 54 | 55 | ### 使用 56 | 把项目克隆到本地 57 | ```bash 58 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack 59 | cd HeyGem-Linux-Python-Hack 60 | bash download.sh 61 | ``` 62 | #### 开始使用 63 | * repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。 64 | #### command: 65 | ```bash 66 | python run.py 67 | ``` 68 | 69 | * 如果要使用自己的数据,可以外部传入参数,请注意,**path 是本地文件,且仅支持相对路径**. 70 | 71 | #### command: 72 | ```bash 73 | python run.py --audio_path example/audio.wav --video_path example/video.mp4 74 | ``` 75 | #### gradio: 76 | ```bash 77 | python app.py 78 | # 请等待模型初始化完成后提交任务 79 | ``` 80 | 81 | ## QA 82 | ### 1. 多个人脸报错 83 | 下载新的人脸检测模型,替换原本的人脸检测模型或许可以解决。 84 | ```bash 85 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_10g_kps.onnx 86 | mv face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx.bak 87 | mv scrfd_10g_kps.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx 88 | ``` 89 | ### 2. 初始化报错 90 | 91 | 有较高概率是 onnxruntime-gpu 版本不匹配导致的。 92 | ```bash 93 | python check_env/check_onnx_cuda.py 94 | ``` 95 | 观察输出是否包括 successfully. 96 | 如果遇到问题,你可以尝试以下方法: 97 | 1. 建议根据自己 cuda 等环境尝试更换一些版本。 98 | 2. 如果难以解决,先卸载 onnxruntime-gpu 和 onnxruntime,然后使用 conda 安装 cudatoolkit 环境,然后再尝试 pip 安装 onnxruntime-gpu。 99 | 100 | 验证可行版本如下: 101 | | cudatoolkit | onnxruntime-gpu | 备注 | 102 | | --- | --- | --- | 103 | | 11.8.0 | 1.16.0 | | 104 | 105 | ### 3. ImportError: cannot import name check_argument_types 106 | 缺包 107 | ```bash 108 | pip install typeguard 109 | ``` 110 | 111 | ### 4. library.so 找不到 112 | 报错一般是类似于 Could not load library libcublasLt.so.11. Error: libcublasLt.so.11: cannot open shared object file: No such file or directory 113 | 114 | 执行以下命令查看是否有改文件 115 | ``` 116 | sudo find /usr -name "libcublasLt.so.11" 117 | ``` 118 | 没有的话,应该需要安装对应版本的cuda 119 | 如果有的话就把第一步查看的文件路径添加到环境变量 120 | ``` 121 | export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH 122 | ``` 123 | 永久生效就添加到 ~/.bashrc 里面然后 source ~/.bashrc 一下 124 | 125 | ## Contributing 126 | 欢迎贡献! 127 | 128 | ## License 129 | 参考 heyGem.ai 的协议. 130 | -------------------------------------------------------------------------------- /README_en.md: -------------------------------------------------------------------------------- 1 | 2 | [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE) 3 | ![Python](https://img.shields.io/badge/Python-3.8-blue.svg) 4 | ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg) 5 | 6 | **[中文](./readme.md)** | **[English](#english-version)** 7 | 8 | --- 9 | 10 | 11 | 12 | # HeyGem-Linux-Python-Hack 13 | 14 | ## Introduction 15 | 16 | [HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution. 17 | 18 | **Feel free to Star us if you find this project useful!** 19 | **Please submit an Issue if you run into any problems!** 20 | 21 | ## Key Features 22 | 23 | * No Docker Required: Runs directly on Linux systems, simplifying the deployment process. 24 | * No Windows Required: Fully developed and tested on Linux. 25 | * Python Powered: Developed using the Python language, making it easy to understand and extend. 26 | * Developer-Friendly: Easy to use, and easy to extend. 27 | 28 | ## Getting Started 29 | 30 | ### Installation 31 | 32 | Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip: 33 | 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | ### Usage 39 | Clone this repository to your local machine: 40 | ```bash 41 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack 42 | cd HeyGem-Linux-Python-Hack 43 | bash download.sh 44 | ``` 45 | #### Getting Started 46 | * Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly. 47 | #### Command: 48 | ```bash 49 | python run.py 50 | ``` 51 | * If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.** 52 | #### command: 53 | ```bash 54 | python run.py --audio_path example/audio.wav --video_path example/video.mp4 55 | ``` 56 | #### gradio: 57 | ```bash 58 | python app.py 59 | # Please wait until processor init done. 60 | ``` 61 | 62 | ## Contributing 63 | Contributions are welcome! 64 | 65 | ## License 66 | This project is licensed under the HeyGem.ai License. 67 | -------------------------------------------------------------------------------- /README_tts_f2f.MD: -------------------------------------------------------------------------------- 1 | 2 | [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE) 3 | ![Python](https://img.shields.io/badge/Python-3.8-blue.svg) 4 | ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg) 5 | 6 | **[中文](#chinese-version)** | **[English](README_en.md)** 7 | 8 | --- 9 | 10 | 11 | 12 | # HeyGem-Linux-Python-Hack 13 | 14 | ## 项目简介 15 | 16 | [HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。 17 | 18 | **如果你觉得这个项目对你有帮助,欢迎给我们 Star!** 19 | **如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!** 20 | 21 | ## 主要特性 22 | 23 | * 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。 24 | * 无需 Windows: 完全基于 Linux 开发和测试。 25 | * Python 驱动: 使用 Python 语言开发,易于理解和扩展。 26 | * 开发者友好: 易于使用和扩展。 27 | * 完全离线。 28 | 29 | ## 开始使用 30 | 31 | ### 环境 32 | 本项目包括 tts 和 face2face 两部分 33 | * tts 部分支持 3.8,事实上有更高版本更好; 34 | * face2face 部分支持且仅支持 3.8。 35 | 36 | 37 | ### 使用 38 | 把项目克隆到本地 39 | ```bash 40 | # f2f 41 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack 42 | cd HeyGem-Linux-Python-Hack 43 | # 下载 f2f 模型 44 | bash download.sh 45 | 46 | # tts 47 | git clone https://github.com/Holasyb918/tts-fish-speech 48 | cd tts-fish-speech 49 | # 下载 tts 模型 50 | huggingface-cli download fishaudio/fish-speech-1.5 --local-dir checkpoints/fish-speech-1.5/ 51 | ``` 52 | 53 | ### 安装环境 54 | 请参考 [requirements.txt](https://github.com/Holasyb918/tts-fish-speech/blob/main/requirements.txt) 并结合你的实际环境来搭建环境,如果单个环境难以满足,tts 可以使用常规的环境,不要求 3.8,但你可能需要分步完成从 text 到数字人的整个流程。 55 | 56 | #### 开始使用 57 | * repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。 58 | 把你需要生成的文本放在 [example/text.txt](example/text.txt) 中,把要克隆的音色放在 [example/audio.wav](example/audio.wav) 中,然后运行以下命令: 59 | #### command: 60 | ```bash 61 | bash inference_from_text.sh example/audio.wav example/text.txt example/video.mp4 62 | # 音色 wav TTS 文本 视频 63 | ``` 64 | 65 | 66 | ## QA 67 | ### 1. 多个人脸报错 68 | 下载新的人脸检测模型,替换原本的人脸检测模型或许可以解决。 69 | ```bash 70 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_10g_kps.onnx 71 | mv face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx.bak 72 | mv scrfd_10g_kps.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx 73 | ``` 74 | ### 2. 初始化报错 75 | 76 | 有较高概率是 onnxruntime-gpu 版本不匹配导致的。 77 | ```bash 78 | python check_env/check_onnx_cuda.py 79 | ``` 80 | 观察输出是否包括 successfully. 81 | 如果遇到问题,你可以尝试以下方法: 82 | 1. 建议根据自己 cuda 等环境尝试更换一些版本。 83 | 2. 如果难以解决,先卸载 onnxruntime-gpu 和 onnxruntime,然后使用 conda 安装 cudatoolkit 环境,然后再尝试 pip 安装 onnxruntime-gpu。 84 | 85 | 验证可行版本如下: 86 | | cudatoolkit | onnxruntime-gpu | 备注 | 87 | | --- | --- | --- | 88 | | 11.8.0 | 1.16.0 | | 89 | 90 | ### 3. ImportError: cannot import name check_argument_types 91 | 缺包 92 | ```bash 93 | pip install typeguard 94 | ``` 95 | 96 | ## Contributing 97 | 欢迎贡献! 98 | 99 | ## License 100 | 参考 heyGem.ai 的协议. 101 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gc 3 | import json 4 | import os 5 | 6 | os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0" 7 | import subprocess 8 | import threading 9 | import time 10 | import traceback 11 | import uuid 12 | from enum import Enum 13 | import queue 14 | import shutil 15 | from functools import partial 16 | 17 | import cv2 18 | import gradio as gr 19 | from flask import Flask, request 20 | 21 | import service.trans_dh_service 22 | from h_utils.custom import CustomError 23 | from y_utils.config import GlobalConfig 24 | from y_utils.logger import logger 25 | 26 | 27 | def write_video_gradio( 28 | output_imgs_queue, 29 | temp_dir, 30 | result_dir, 31 | work_id, 32 | audio_path, 33 | result_queue, 34 | width, 35 | height, 36 | fps, 37 | watermark_switch=0, 38 | digital_auth=0, 39 | temp_queue=None, 40 | ): 41 | output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id)) 42 | fourcc = cv2.VideoWriter_fourcc(*"mp4v") 43 | result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id)) 44 | video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height)) 45 | print("Custom VideoWriter init done") 46 | try: 47 | while True: 48 | state, reason, value_ = output_imgs_queue.get() 49 | if type(state) == bool and state == True: 50 | logger.info( 51 | "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id) 52 | ) 53 | logger.info( 54 | "Custom VideoWriter Silence Video saved in {}".format( 55 | os.path.realpath(output_mp4) 56 | ) 57 | ) 58 | video_write.release() 59 | break 60 | else: 61 | if type(state) == bool and state == False: 62 | logger.error( 63 | "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format( 64 | work_id, reason 65 | ) 66 | ) 67 | raise CustomError(reason) 68 | for result_img in value_: 69 | video_write.write(result_img) 70 | if video_write is not None: 71 | video_write.release() 72 | if watermark_switch == 1 and digital_auth == 1: 73 | logger.info( 74 | "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id) 75 | ) 76 | if width > height: 77 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 78 | audio_path, 79 | output_mp4, 80 | GlobalConfig.instance().watermark_path, 81 | GlobalConfig.instance().digital_auth_path, 82 | result_path, 83 | ) 84 | logger.info("command:{}".format(command)) 85 | else: 86 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 87 | audio_path, 88 | output_mp4, 89 | GlobalConfig.instance().watermark_path, 90 | GlobalConfig.instance().digital_auth_path, 91 | result_path, 92 | ) 93 | logger.info("command:{}".format(command)) 94 | elif watermark_switch == 1 and digital_auth == 0: 95 | logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id)) 96 | command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format( 97 | audio_path, 98 | output_mp4, 99 | GlobalConfig.instance().watermark_path, 100 | result_path, 101 | ) 102 | logger.info("command:{}".format(command)) 103 | elif watermark_switch == 0 and digital_auth == 1: 104 | logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id)) 105 | if width > height: 106 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 107 | audio_path, 108 | output_mp4, 109 | GlobalConfig.instance().digital_auth_path, 110 | result_path, 111 | ) 112 | logger.info("command:{}".format(command)) 113 | else: 114 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 115 | audio_path, 116 | output_mp4, 117 | GlobalConfig.instance().digital_auth_path, 118 | result_path, 119 | ) 120 | logger.info("command:{}".format(command)) 121 | else: 122 | command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format( 123 | audio_path, output_mp4, result_path 124 | ) 125 | logger.info("Custom command:{}".format(command)) 126 | subprocess.call(command, shell=True) 127 | print("###### Custom Video Writer write over") 128 | print(f"###### Video result saved in {os.path.realpath(result_path)}") 129 | result_queue.put([True, result_path]) 130 | # temp_queue.put([True, result_path]) 131 | except Exception as e: 132 | logger.error( 133 | "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format( 134 | work_id, e.__str__() 135 | ) 136 | ) 137 | result_queue.put( 138 | [ 139 | False, 140 | "[{}]视频帧队列处理异常结束,异常原因:[{}]".format( 141 | work_id, e.__str__() 142 | ), 143 | ] 144 | ) 145 | logger.info("Custom VideoWriter 后处理进程结束") 146 | 147 | 148 | service.trans_dh_service.write_video = write_video_gradio 149 | 150 | 151 | class VideoProcessor: 152 | def __init__(self): 153 | self.task = service.trans_dh_service.TransDhTask() 154 | self.basedir = GlobalConfig.instance().result_dir 155 | self.is_initialized = False 156 | self._initialize_service() 157 | print("VideoProcessor init done") 158 | 159 | def _initialize_service(self): 160 | logger.info("开始初始化 trans_dh_service...") 161 | try: 162 | time.sleep(5) 163 | logger.info("trans_dh_service 初始化完成。") 164 | self.is_initialized = True 165 | except Exception as e: 166 | logger.error(f"初始化 trans_dh_service 失败: {e}") 167 | 168 | def process_video( 169 | self, audio_file, video_file, watermark=False, digital_auth=False 170 | ): 171 | while not self.is_initialized: 172 | logger.info("服务尚未完成初始化,等待 1 秒...") 173 | time.sleep(1) 174 | work_id = str(uuid.uuid1()) 175 | code = work_id 176 | temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id) 177 | result_dir = GlobalConfig.instance().result_dir 178 | video_writer_thread = None 179 | final_result = None 180 | 181 | try: 182 | cap = cv2.VideoCapture(video_file) 183 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 184 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 185 | fps = cap.get(cv2.CAP_PROP_FPS) 186 | cap.release() 187 | 188 | audio_path = audio_file 189 | video_path = video_file 190 | 191 | self.task.task_dic[code] = "" 192 | self.task.work(audio_path, video_path, code, 0, 0, 0, 0) 193 | 194 | result_path = self.task.task_dic[code][2] 195 | final_result_dir = os.path.join("result", code) 196 | os.makedirs(final_result_dir, exist_ok=True) 197 | os.system(f"mv {result_path} {final_result_dir}") 198 | os.system( 199 | f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}" 200 | ) 201 | result_path = os.path.realpath( 202 | os.path.join(final_result_dir, os.path.basename(result_path)) 203 | ) 204 | return result_path 205 | 206 | except Exception as e: 207 | logger.error(f"处理视频时发生错误: {e}") 208 | raise gr.Error(str(e)) 209 | 210 | 211 | if __name__ == "__main__": 212 | processor = VideoProcessor() 213 | 214 | inputs = [ 215 | gr.File(label="上传音频文件/upload audio file"), 216 | gr.File(label="上传视频文件/upload video file"), 217 | ] 218 | outputs = gr.Video(label="生成的视频/Generated video") 219 | 220 | title = "数字人视频生成/Digital Human Video Generation" 221 | description = "上传音频和视频文件,即可生成数字人视频。/Upload audio and video files to generate digital human videos." 222 | 223 | demo = gr.Interface( 224 | fn=processor.process_video, 225 | inputs=inputs, 226 | outputs=outputs, 227 | title=title, 228 | description=description, 229 | ) 230 | demo.queue().launch() 231 | -------------------------------------------------------------------------------- /check_env/check_onnx_cuda.py: -------------------------------------------------------------------------------- 1 | import onnxruntime 2 | import numpy as np 3 | 4 | def check_gpu_usage(): 5 | """ 6 | Checks if ONNX Runtime can use the GPU by attempting to create an InferenceSession 7 | with the CUDAExecutionProvider. 8 | 9 | Returns: 10 | True if GPU is likely being used, False otherwise. 11 | """ 12 | providers = ("CUDAExecutionProvider", 13 | {"device_id": 0}) 14 | session_options = onnxruntime.SessionOptions() 15 | session_options.log_severity_level = 3 16 | onnx_path = "./face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx" 17 | onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[providers]) 18 | print(onnx_session.get_providers()) 19 | return "CUDAExecutionProvider" in onnx_session.get_providers(), onnx_session 20 | 21 | if __name__ == "__main__": 22 | is_cuda, onnx_session = check_gpu_usage() 23 | if is_cuda: 24 | print("ONNX Runtime is successfully using the GPU.") 25 | inp = np.random.randn(1, 3, 640, 640).astype(np.float32) 26 | ort_inputs = {onnx_session.get_inputs()[0].name: inp} 27 | ort_outs = onnx_session.run(None, ort_inputs) 28 | print(ort_outs[0].shape) 29 | else: 30 | print("ONNX Runtime is NOT using the GPU or there was an error initializing the CUDA provider.") 31 | print("Please ensure that:") 32 | print("- You have installed the 'onnxruntime-gpu' package.") 33 | print("- You have a compatible NVIDIA GPU with appropriate drivers installed.") 34 | print("- CUDA and cuDNN are installed and correctly configured in your system.") 35 | print("- The versions of CUDA, cuDNN, and the NVIDIA drivers are compatible with the 'onnxruntime-gpu' version you have installed.") 36 | print("- The ONNX Runtime build you are using supports CUDA.") 37 | -------------------------------------------------------------------------------- /config/config.ini: -------------------------------------------------------------------------------- 1 | [log] 2 | log_dir = ./log 3 | log_file = dh.log 4 | 5 | [http_server] 6 | server_ip = 0.0.0.0 7 | server_port = 8383 8 | 9 | [temp] 10 | temp_dir = ./ 11 | clean_switch = 1 12 | 13 | [result] 14 | result_dir = ./result 15 | clean_switch = 0 16 | 17 | [digital] 18 | batch_size = 4 19 | 20 | [register] 21 | url = http://172.16.160.51:12120 22 | report_interval = 10 23 | enable=0 24 | -------------------------------------------------------------------------------- /download.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | set -u 3 | 4 | # face attr 5 | mkdir -p face_attr_detect 6 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx 7 | 8 | # face detect 9 | mkdir -p face_detect_utils/resources 10 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx 11 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx 12 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx 13 | 14 | # dh model 15 | mkdir -p landmark2face_wy/checkpoints/anylang 16 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth 17 | 18 | # face parsing 19 | mkdir -p pretrain_models/face_lib/face_parsing 20 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx 21 | 22 | # gfpgan 23 | mkdir -p pretrain_models/face_lib/face_restore/gfpgan 24 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx 25 | 26 | # xseg 27 | mkdir -p xseg 28 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx 29 | 30 | # wenet 31 | mkdir -p wenet/examples/aishell/aidata/exp/conformer 32 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt -------------------------------------------------------------------------------- /example/audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/example/audio.wav -------------------------------------------------------------------------------- /example/video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/example/video.mp4 -------------------------------------------------------------------------------- /face_attr_detect/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_attr_detect/.DS_Store -------------------------------------------------------------------------------- /face_attr_detect/__init__.py: -------------------------------------------------------------------------------- 1 | from .face_attr import FaceAttr 2 | -------------------------------------------------------------------------------- /face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_detect_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/__init__.py -------------------------------------------------------------------------------- /face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/__init__.py -------------------------------------------------------------------------------- /face_lib/face_detect_and_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .face_align_5_landmarks import FaceDetect5Landmarks 2 | from .face_align_utils import estimate_norm 3 | 4 | -------------------------------------------------------------------------------- /face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_lib/face_detect_and_align/scrfd_insightface/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2021/11/10 3 | 4 | 5 | from .scrfd import SCRFD -------------------------------------------------------------------------------- /face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_lib/face_parsing/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/3/29 3 | 4 | 5 | from .face_parsing_api import FaceParsing 6 | # from .dfl_xseg_net import XsegNet 7 | -------------------------------------------------------------------------------- /face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /face_lib/face_restore/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN 3 | -------------------------------------------------------------------------------- /face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /h_utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/user/bin/env python 2 | # coding=utf-8 3 | """ 4 | @project : dhp-service 5 | @author : huyi 6 | @file : __init__.py.py 7 | @ide : PyCharm 8 | @time : 2021-08-18 15:45:13 9 | """ -------------------------------------------------------------------------------- /h_utils/custom.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/custom.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /h_utils/obs_client.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /h_utils/request_utils.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /inference_from_text.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | set -u 3 | 4 | ref_audio=$1 5 | text_path=$2 6 | ref_mp4=$3 7 | 8 | pwd=$(pwd) 9 | echo "ref_audio: ${ref_audio}" 10 | echo "text_path: ${text_path}" 11 | echo "ref_mp4: ${ref_mp4}" 12 | echo "pwd: ${pwd}" 13 | 14 | real_ref_audio=$(realpath ${ref_audio}) 15 | real_text_path=$(realpath ${text_path}) 16 | real_ref_mp4=$(realpath ${ref_mp4}) 17 | 18 | echo "real_ref_audio: ${real_ref_audio}" 19 | echo "real_text_path: ${real_text_path}" 20 | echo "real_ref_mp4: ${real_ref_mp4}" 21 | 22 | # tts 23 | cd tts-fish-speech 24 | echo bash run.sh ${real_ref_audio} ${real_text_path} 25 | bash run.sh ${real_ref_audio} ${real_text_path} 26 | 27 | # f2f 28 | cd ${pwd} 29 | mv tts-fish-speech/fake.wav example/fake.wav 30 | 31 | python run.py --audio_path example/fake.wav --video_path ${ref_mp4} 32 | -------------------------------------------------------------------------------- /landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/checkpoints/test/opt.txt: -------------------------------------------------------------------------------- 1 | ----------------- Options --------------- 2 | aspect_ratio: 1.0 3 | audio_feature: 3dmm 4 | batch_size: 16 5 | checkpoints_dir: ./landmark2face_wy/checkpoints 6 | crop_size: 256 7 | dataroot: ./data 8 | dataset_mode: Facereala3dmm 9 | direction: AtoB 10 | display_winsize: 256 11 | distributed: False 12 | epoch: latest 13 | eval: False 14 | feat_num: 3 15 | feature_path: ../AnnI_deep3dface_256_contains_id/ 16 | fp16: False 17 | gpu_ids: 0 18 | img_size: 256 19 | init_gain: 0.02 20 | init_type: normal 21 | input_nc: 3 22 | instance_feat: False 23 | isTrain: False [default: None] 24 | label_feat: False 25 | lan_size: 1 26 | load_features: False 27 | load_iter: 0 [default: 0] 28 | load_size: 286 29 | local_rank: -1 30 | max_dataset_size: inf 31 | mfcc0_rate: 0.2 32 | model: pirender_3dmm_mouth_hd 33 | model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth 34 | n_blocks: 9 35 | n_blocks_global: 9 36 | n_blocks_local: 3 37 | n_clusters: 10 38 | n_downsample_E: 4 39 | n_downsample_global: 4 40 | n_layers_D: 3 41 | n_local_enhancers: 1 42 | name: test 43 | ndf: 64 44 | nef: 16 45 | netD: basic 46 | netG: pirender 47 | ngf: 64 48 | niter_fix_global: 0 49 | no_dropout: True 50 | no_flip: False 51 | no_ganFeat_loss: False 52 | no_instance: False 53 | norm: instance 54 | ntest: inf 55 | num_D: 2 56 | num_test: 50 57 | num_threads: 4 58 | output_nc: 3 59 | perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'] 60 | perceptual_network: vgg19 61 | perceptual_num_scales: 4 62 | perceptual_use_style_loss: True 63 | perceptual_weights: [4, 4, 4, 4, 4] 64 | phase: test 65 | preprocess: resize_and_crop 66 | resize_size: 512 67 | results_dir: ./results/ 68 | serial_batches: False 69 | suffix: 70 | test_audio_path: None 71 | test_muban: None 72 | verbose: False 73 | weight_style_to_perceptual: 250 74 | ----------------- End ------------------- 75 | -------------------------------------------------------------------------------- /landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/data/Facereala3dmmexp512_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | from data.base_dataset import BaseDataset, get_params, get_transform 4 | import torchvision.transforms as transforms 5 | from data.image_folder import make_dataset 6 | from PIL import Image, ImageEnhance 7 | import numpy as np 8 | import cv2 9 | import torch 10 | import time 11 | 12 | def get_idts(config_name): 13 | idts = list() 14 | with open(os.path.join('../config', config_name + '.txt')) as f: 15 | for line in f: 16 | line = line.strip() 17 | video_name = line.split(':')[0] 18 | idts.append(video_name) 19 | return idts 20 | 21 | 22 | def obtain_seq_index(index, num_frames): 23 | seq = list(range(index - 13, index + 13 + 1)) 24 | seq = [min(max(item, 0), num_frames - 1) for item in seq] 25 | return seq 26 | 27 | def get_3dmm_feature(img_path, idx, new_dict): 28 | id = img_path.split('/')[-3] 29 | features = new_dict[id] 30 | idx_list = obtain_seq_index(idx, features.shape[0]) 31 | feature = features[idx_list, 80:144] 32 | # feature[:, -1] = 50 33 | return np.transpose(feature, (1, 0)) 34 | 35 | 36 | 37 | class Facereala3dmmexp512Dataset(BaseDataset): 38 | def __init__(self, opt, mode=None): 39 | BaseDataset.__init__(self, opt) 40 | img_size = opt.img_size 41 | idts = get_idts(opt.name.split('_')[0]) 42 | print("---------load data list--------: ", idts) 43 | self.new_dict = {} 44 | if mode == 'train': 45 | self.labels = [] 46 | self.label_starts = [] 47 | self.label_ends = [] 48 | count = 0 49 | for idt_name in idts: 50 | # root = '../AnnVI/feature/{}'.format(idt_name) 51 | root = os.path.join(opt.feature_path, idt_name) 52 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) 53 | self.new_dict[idt_name] = feature 54 | if opt.audio_feature == "3dmm": 55 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 56 | else: 57 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 58 | training_data = torch.load(training_data_path) 59 | img_paths = training_data['img_paths'] 60 | features_3dmm = training_data['features_3dmm'] 61 | index = [i[0].split('/')[-1] for i in img_paths] 62 | 63 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 64 | self.label_starts.append(count) 65 | for img in range(len(index)): 66 | img_path = os.path.join(image_dir, index[img]) 67 | # idx_list = obtain_seq_index(img, feature.shape[0]) 68 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) 69 | self.labels.append([img_path, features_3dmm[img]]) 70 | count = count + 1 71 | self.label_ends.append(count) 72 | 73 | self.label_starts = np.array(self.label_starts) 74 | self.label_ends = np.array(self.label_ends) 75 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 76 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 77 | 78 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 79 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 80 | self.shuffle() 81 | elif mode == 'test': 82 | self.labels = [] 83 | self.label_starts = [] 84 | self.label_ends = [] 85 | count = 0 86 | for idt_name in idts: 87 | # root = '../AnnVI/feature/{}'.format(idt_name) 88 | root = os.path.join(opt.feature_path, idt_name) 89 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) 90 | self.new_dict[idt_name] = feature 91 | if opt.audio_feature == "3dmm": 92 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 93 | else: 94 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 95 | training_data = torch.load(training_data_path) 96 | img_paths = training_data['img_paths'] 97 | features_3dmm = training_data['features_3dmm'] 98 | index = [i[0].split('/')[-1] for i in img_paths] 99 | 100 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 101 | self.label_starts.append(count) 102 | for img in range(len(index)): 103 | img_path = os.path.join(image_dir, index[img]) 104 | # idx_list = obtain_seq_index(img, feature.shape[0]) 105 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) 106 | self.labels.append([img_path, features_3dmm[img]]) 107 | count = count + 1 108 | self.label_ends.append(count) 109 | 110 | self.label_starts = np.array(self.label_starts) 111 | self.label_ends = np.array(self.label_ends) 112 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 113 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 114 | 115 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 116 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 117 | self.shuffle() 118 | 119 | def shuffle(self): 120 | self.labels_index = list(range(len(self.labels))) 121 | random.shuffle(self.labels_index) 122 | 123 | def add_mouth_mask2(self, img): 124 | mask = np.ones_like(img) 125 | rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30] 126 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] 127 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) 128 | x = np.flip(x, 0) 129 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() 130 | zz1 = -y - x + 88 > 0 131 | zz2 = np.flip(zz1, 1) 132 | zz = (zz1 + zz2) > 0 133 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 134 | imgm = img * mask 135 | return imgm 136 | 137 | def __getitem__(self, index): 138 | # s1= time.time() 139 | idx = self.labels_index[index] 140 | img_path, feature_3dmm_idx= self.labels[idx] 141 | # print(img_path, feature_3dmm_idx) 142 | feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict) 143 | #print(img_path, feature_3dmm_idx, feature_3dmm.shape) 144 | 145 | img = np.array(Image.open(img_path).convert('RGB')) 146 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') 147 | cut_pad1 = np.random.randint(0, 20) 148 | cut_pad2 = np.random.randint(0, 20) 149 | img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] 150 | # s2 =time.time() 151 | # print('get data and read data ', s2-s1) 152 | mask_B = img.copy() 153 | # mask_end = np.random.randint(236*2, 250*2) 154 | # index = np.random.randint(80, 90) 155 | # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0 156 | mask_end = np.random.randint(480, 500) 157 | index = np.random.randint(15, 30) 158 | mask_B[index:mask_end, 70:-70] = 0 159 | img = Image.fromarray(img) 160 | 161 | mask_B = Image.fromarray(mask_B) 162 | img = self.transforms_image(img) 163 | mask_B = self.transforms_image(mask_B) 164 | 165 | x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0] 166 | 167 | audio = torch.tensor(feature_3dmm) 168 | # s3 = time.time() 169 | # print('get 3dmm and mask ', s3 - s2) 170 | # 保证real_A_index不是idx 171 | max_i = 0 172 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) 173 | while real_A_index == idx: 174 | max_i += 1 175 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) 176 | if max_i > 5: 177 | break 178 | 179 | imgA_path, _ = self.labels[real_A_index] 180 | imgA = np.array(Image.open(imgA_path).convert('RGB')) 181 | cut_pad1 = np.random.randint(0, 20) 182 | cut_pad2 = np.random.randint(0, 20) 183 | imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2] 184 | 185 | ########椭圆########## 186 | # mask = np.zeros(imgA.shape, dtype=np.uint8) 187 | # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1), 188 | # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1) 189 | # ROI = cv2.bitwise_and(imgA, mask) 190 | # imgA = Image.fromarray(ROI) 191 | ############################# 192 | # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0 193 | imgA = Image.fromarray(imgA) 194 | imgA = self.transforms_image(imgA) 195 | # s4 = time.time() 196 | # print('end time reala ', s4 - s3) 197 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} 198 | 199 | def __len__(self): 200 | """Return the total number of images in the dataset.""" 201 | return len(self.labels) 202 | 203 | 204 | if __name__ == '__main__': 205 | from options.train_options import TrainOptions 206 | 207 | opt = TrainOptions().parse() 208 | dataset = Facereala3dmmDataset(opt) 209 | dataset_size = len(dataset) 210 | print(dataset_size) 211 | for i, data in enumerate(dataset): 212 | print(data) 213 | -------------------------------------------------------------------------------- /landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | from data.base_dataset import BaseDataset, get_params, get_transform 4 | import torchvision.transforms as transforms 5 | from data.image_folder import make_dataset 6 | from PIL import Image, ImageEnhance 7 | import numpy as np 8 | import cv2 9 | import torch 10 | import time 11 | 12 | def get_idts(config_name): 13 | idts = list() 14 | with open(os.path.join('../config', config_name + '.txt')) as f: 15 | for line in f: 16 | line = line.strip() 17 | video_name = line.split(':')[0] 18 | idts.append(video_name) 19 | return idts 20 | 21 | 22 | def obtain_seq_index(index, num_frames): 23 | seq = list(range(index - 10, index + 9 + 1)) 24 | seq = [min(max(item, 0), num_frames - 1) for item in seq] 25 | return seq 26 | 27 | def get_3dmm_feature(img_path, idx, audio_feature, new_dict): 28 | id = img_path.split('/')[-3] 29 | features, features1, features1 = new_dict[id] 30 | idx_list = obtain_seq_index(idx, features.shape[0]) 31 | feature = features[idx_list, 80:144] 32 | feature1 = features1[:,audio_feature[0]:audio_feature[1]] 33 | feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1) 34 | # print(feature.shape) 35 | return np.transpose(feature, (1, 0)) 36 | # return feature 37 | 38 | 39 | 40 | class Facereala3dmmexpwenet512Dataset(BaseDataset): 41 | def __init__(self, opt, mode=None): 42 | BaseDataset.__init__(self, opt) 43 | img_size = opt.img_size 44 | idts = get_idts(opt.name.split('_')[0]) 45 | print("---------load data list--------: ", idts) 46 | self.new_dict = {} 47 | if mode == 'train': 48 | self.labels = [] 49 | self.label_starts = [] 50 | self.label_ends = [] 51 | count = 0 52 | for idt_name in idts: 53 | # root = '../AnnVI/feature/{}'.format(idt_name) 54 | root = os.path.join(opt.feature_path, idt_name) 55 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) 56 | feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy')) 57 | self.new_dict[idt_name] = [feature, feature1, feature1] 58 | if opt.audio_feature == "3dmm": 59 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 60 | else: 61 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 62 | training_data = torch.load(training_data_path) 63 | img_paths = training_data['img_paths'] 64 | features_3dmm = training_data['features_3dmm'] 65 | audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True) 66 | audio_features = audio_features.tolist() 67 | index = [i[0].split('/')[-1] for i in img_paths] 68 | 69 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 70 | self.label_starts.append(count) 71 | for img in range(len(index)): 72 | img_path = os.path.join(image_dir, index[img]) 73 | # idx_list = obtain_seq_index(img, feature.shape[0]) 74 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) 75 | if type(features_3dmm[img]) != int: 76 | print(img_path) 77 | audio_feature = audio_features[img] 78 | self.labels.append([img_path, features_3dmm[img], audio_feature]) 79 | count = count + 1 80 | self.label_ends.append(count) 81 | 82 | self.label_starts = np.array(self.label_starts) 83 | self.label_ends = np.array(self.label_ends) 84 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 85 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 86 | 87 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 88 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 89 | self.shuffle() 90 | elif mode == 'test': 91 | self.labels = [] 92 | self.label_starts = [] 93 | self.label_ends = [] 94 | count = 0 95 | for idt_name in idts: 96 | # root = '../AnnVI/feature/{}'.format(idt_name) 97 | root = os.path.join(opt.feature_path, idt_name) 98 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) 99 | self.new_dict[idt_name] = feature 100 | if opt.audio_feature == "3dmm": 101 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 102 | else: 103 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 104 | training_data = torch.load(training_data_path) 105 | img_paths = training_data['img_paths'] 106 | features_3dmm = training_data['features_3dmm'] 107 | index = [i[0].split('/')[-1] for i in img_paths] 108 | 109 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 110 | self.label_starts.append(count) 111 | for img in range(len(index)): 112 | img_path = os.path.join(image_dir, index[img]) 113 | # idx_list = obtain_seq_index(img, feature.shape[0]) 114 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) 115 | self.labels.append([img_path, features_3dmm[img]]) 116 | count = count + 1 117 | self.label_ends.append(count) 118 | 119 | self.label_starts = np.array(self.label_starts) 120 | self.label_ends = np.array(self.label_ends) 121 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 122 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 123 | 124 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 125 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 126 | self.shuffle() 127 | 128 | def shuffle(self): 129 | self.labels_index = list(range(len(self.labels))) 130 | random.shuffle(self.labels_index) 131 | 132 | def add_mouth_mask2(self, img): 133 | mask = np.ones_like(img) 134 | rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30] 135 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] 136 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) 137 | x = np.flip(x, 0) 138 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() 139 | zz1 = -y - x + 88 > 0 140 | zz2 = np.flip(zz1, 1) 141 | zz = (zz1 + zz2) > 0 142 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 143 | imgm = img * mask 144 | return imgm 145 | 146 | def __getitem__(self, index): 147 | # s1= time.time() 148 | idx = self.labels_index[index] 149 | img_path, feature_3dmm_idx, audio_feature= self.labels[idx] 150 | # print(img_path, feature_3dmm_idx) 151 | feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict) 152 | #print(img_path, feature_3dmm_idx, feature_3dmm.shape) 153 | 154 | img = np.array(Image.open(img_path).convert('RGB')) 155 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') 156 | cut_pad1 = np.random.randint(0, 20) 157 | cut_pad2 = np.random.randint(0, 20) 158 | img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] 159 | # s2 =time.time() 160 | # print('get data and read data ', s2-s1) 161 | mask_B = img.copy() 162 | # mask_end = np.random.randint(236*2, 250*2) 163 | # index = np.random.randint(80, 90) 164 | # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0 165 | mask_end = np.random.randint(480, 500) 166 | index = np.random.randint(15, 30) 167 | # index = np.random.randint(90, 100) 168 | mask_B[index:mask_end, 70:-70] = 0 169 | img = Image.fromarray(img) 170 | 171 | mask_B = Image.fromarray(mask_B) 172 | img = self.transforms_image(img) 173 | mask_B = self.transforms_image(mask_B) 174 | 175 | x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0] 176 | 177 | audio = torch.tensor(feature_3dmm) 178 | # s3 = time.time() 179 | # print('get 3dmm and mask ', s3 - s2) 180 | # 保证real_A_index不是idx 181 | max_i = 0 182 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) 183 | while real_A_index == idx: 184 | max_i += 1 185 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) 186 | if max_i > 5: 187 | break 188 | 189 | imgA_path, _, _ = self.labels[real_A_index] 190 | imgA = np.array(Image.open(imgA_path).convert('RGB')) 191 | cut_pad1 = np.random.randint(0, 20) 192 | cut_pad2 = np.random.randint(0, 20) 193 | imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2] 194 | 195 | ########椭圆########## 196 | # mask = np.zeros(imgA.shape, dtype=np.uint8) 197 | # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1), 198 | # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1) 199 | # ROI = cv2.bitwise_and(imgA, mask) 200 | # imgA = Image.fromarray(ROI) 201 | ############################# 202 | # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0 203 | imgA = Image.fromarray(imgA) 204 | imgA = self.transforms_image(imgA) 205 | # s4 = time.time() 206 | # print('end time reala ', s4 - s3) 207 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} 208 | 209 | def __len__(self): 210 | """Return the total number of images in the dataset.""" 211 | return len(self.labels) 212 | 213 | 214 | if __name__ == '__main__': 215 | from options.train_options import TrainOptions 216 | 217 | opt = TrainOptions().parse() 218 | dataset = Facereala3dmmDataset(opt) 219 | dataset_size = len(dataset) 220 | print(dataset_size) 221 | for i, data in enumerate(dataset): 222 | print(data) 223 | -------------------------------------------------------------------------------- /landmark2face_wy/data/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes all the modules related to data loading and preprocessing 2 | 3 | To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset. 4 | You need to implement four functions: 5 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). 6 | -- <__len__>: return the size of dataset. 7 | -- <__getitem__>: get a data point from data loader. 8 | -- : (optionally) add dataset-specific options and set default options. 9 | 10 | Now you can use the dataset class by specifying flag '--dataset_mode dummy'. 11 | See our template dataset class 'template_dataset.py' for more details. 12 | """ 13 | import importlib 14 | import torch.utils.data 15 | from landmark2face_wy.data.base_dataset import BaseDataset 16 | 17 | 18 | def find_dataset_using_name(dataset_name): 19 | """Import the module "data/[dataset_name]_dataset.py". 20 | 21 | In the file, the class called DatasetNameDataset() will 22 | be instantiated. It has to be a subclass of BaseDataset, 23 | and it is case-insensitive. 24 | """ 25 | dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset" 26 | datasetlib = importlib.import_module(dataset_filename) 27 | 28 | dataset = None 29 | target_dataset_name = dataset_name.replace('_', '') + 'dataset' 30 | for name, cls in datasetlib.__dict__.items(): 31 | if name.lower() == target_dataset_name.lower() \ 32 | and issubclass(cls, BaseDataset): 33 | dataset = cls 34 | 35 | if dataset is None: 36 | raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) 37 | 38 | return dataset 39 | 40 | 41 | def get_option_setter(dataset_name): 42 | """Return the static method of the dataset class.""" 43 | dataset_class = find_dataset_using_name(dataset_name) 44 | return dataset_class.modify_commandline_options 45 | 46 | 47 | def create_dataset(opt, mode='train'): 48 | """Create a dataset given the option. 49 | 50 | This function wraps the class CustomDatasetDataLoader. 51 | This is the main interface between this package and 'train.py'/'test.py' 52 | 53 | Example: 54 | >>> from data import create_dataset 55 | >>> dataset = create_dataset(opt) 56 | """ 57 | data_loader = CustomDatasetDataLoader(opt, mode) 58 | dataset = data_loader.load_data() 59 | return dataset 60 | 61 | 62 | class CustomDatasetDataLoader(): 63 | """Wrapper class of Dataset class that performs multi-threaded data loading""" 64 | 65 | def __init__(self, opt, mode): 66 | """Initialize this class 67 | 68 | Step 1: create a dataset instance given the name [dataset_mode] 69 | Step 2: create a multi-threaded data loader. 70 | """ 71 | self.opt = opt 72 | dataset_class = find_dataset_using_name(opt.dataset_mode) 73 | self.dataset = dataset_class(opt, mode) 74 | print("dataset [%s] was created" % type(self.dataset).__name__) 75 | if mode == 'test': 76 | batchsize = opt.batch_size // 2 77 | else: 78 | batchsize = opt.batch_size 79 | print(opt.batch_size) 80 | if not opt.distributed: 81 | self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize, 82 | shuffle=not opt.serial_batches,num_workers=int(opt.num_threads)) 83 | else: 84 | self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分 85 | self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True) 86 | 87 | def load_data(self): 88 | return self 89 | 90 | def __len__(self): 91 | """Return the number of data in the dataset""" 92 | return min(len(self.dataset), self.opt.max_dataset_size) 93 | 94 | def __iter__(self): 95 | """Return a batch of data""" 96 | for i, data in enumerate(self.dataloader): 97 | if i * self.opt.batch_size >= self.opt.max_dataset_size: 98 | break 99 | yield data 100 | -------------------------------------------------------------------------------- /landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/data/l2faceaudio512_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | from data.base_dataset import BaseDataset, get_params, get_transform 4 | import torchvision.transforms as transforms 5 | from data.image_folder import make_dataset 6 | from PIL import Image, ImageEnhance 7 | import numpy as np 8 | import cv2 9 | import torch 10 | 11 | 12 | def get_idts(config_name): 13 | idts = list() 14 | with open(os.path.join('../config', config_name + '.txt')) as f: 15 | for line in f: 16 | line = line.strip() 17 | idts.append(line) 18 | return idts 19 | 20 | 21 | class L2FaceAudio512Dataset(BaseDataset): 22 | def __init__(self, opt, mode=None): 23 | BaseDataset.__init__(self, opt) 24 | img_size = opt.img_size 25 | idts = get_idts(opt.name.split('_')[0]) 26 | print("---------load data list--------: ", idts) 27 | if mode == 'train': 28 | self.labels = [] 29 | for idt_name in idts: 30 | # root = '../AnnVI/feature/{}'.format(idt_name) 31 | root = os.path.join(opt.feature_path, idt_name) 32 | if opt.audio_feature == "mfcc": 33 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 34 | else: 35 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 36 | training_data = torch.load(training_data_path) 37 | img_paths = training_data['img_paths'] 38 | audio_features = training_data['audio_features'] 39 | index = [i[0].split('/')[-1] for i in img_paths] 40 | 41 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 42 | # label_dir = '{}/512_landmark_crop'.format(root) 43 | 44 | # if 'man' in opt.name: 45 | # imgs.sort(key=lambda x:int(x.split('.')[0])) 46 | # else: 47 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) 48 | for img in range(len(index)): 49 | img_path = os.path.join(image_dir, index[img]) 50 | audio_feature = audio_features[img] 51 | self.labels.append([img_path, audio_feature]) 52 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 53 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 54 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 55 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 56 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 57 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 58 | self.shuffle() 59 | elif mode == 'test': 60 | self.labels = [] 61 | for idt_name in idts: 62 | # root = '../AnnVI/feature/{}'.format(idt_name) 63 | root = os.path.join(opt.feature_path, idt_name) 64 | if opt.audio_feature == "mfcc": 65 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 66 | else: 67 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 68 | training_data = torch.load(training_data_path) 69 | img_paths = training_data['img_paths'] 70 | audio_features = training_data['audio_features'] 71 | index = [i[0].split('/')[-1] for i in img_paths] 72 | 73 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 74 | # label_dir = '{}/512_landmark_crop'.format(root) 75 | 76 | # if 'man' in opt.name: 77 | # imgs.sort(key=lambda x:int(x.split('.')[0])) 78 | # else: 79 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) 80 | for img in range(len(index)): 81 | img_path = os.path.join(image_dir, index[img]) 82 | audio_feature = audio_features[img] 83 | self.labels.append([img_path, audio_feature]) 84 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 85 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 86 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 87 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 88 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 89 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 90 | self.shuffle() 91 | 92 | def shuffle(self): 93 | random.shuffle(self.labels) 94 | 95 | def add_mouth_mask2(self, img): 96 | mask = np.ones_like(img) 97 | rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30] 98 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] 99 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) 100 | x = np.flip(x, 0) 101 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() 102 | zz1 = -y - x + 88 > 0 103 | zz2 = np.flip(zz1, 1) 104 | zz = (zz1 + zz2) > 0 105 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 106 | imgm = img * mask 107 | return imgm 108 | 109 | def __getitem__(self, index): 110 | cv2.setNumThreads(0) 111 | img_path, audio_feature = self.labels[index] 112 | img = np.array(Image.open(img_path).convert('RGB')) 113 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') 114 | cut_pad1 = np.random.randint(0, 20) 115 | cut_pad2 = np.random.randint(0, 20) 116 | img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] 117 | 118 | ####椭圆mask遮住衣领##### 119 | ''' 120 | mask = np.zeros(img.shape, dtype=np.uint8) 121 | cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1) 122 | ''' 123 | ####mask遮眼睛##### 124 | mask = np.ones(img.shape, dtype=np.uint8) * 255 125 | mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0 126 | img = cv2.bitwise_and(img, mask) 127 | 128 | mask_B = img.copy() 129 | mask_B = cv2.resize(mask_B, (256, 256)) 130 | ##########脖子分割加mask############# 131 | # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck")) 132 | # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2] 133 | # mask_B = cv2.bitwise_and(img, 255 - img_edge) 134 | # img_edge[:128, :, :] = img[:128, :, :] 135 | 136 | ##########增加脖子椭圆mask############# 137 | ''' 138 | maske = np.zeros(img.shape, dtype=np.uint8) 139 | cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50), 140 | (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)), 141 | 0, 0, 360, (255, 255, 255), -1) 142 | maske[:img.shape[0] // 2, :, :] = 0 143 | mask_B = cv2.bitwise_and(mask_B, 255-maske) 144 | ''' 145 | ##########之前老的矩形mask############# 146 | mask_end = np.random.randint(236, 256) 147 | mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0 148 | ##########之前老的矩形mask############# 149 | ##########蔡星宇三角mask############# 150 | # mask_B = self.add_mouth_mask2(mask_B) 151 | ##########蔡星宇三角mask############# 152 | # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0 153 | img = Image.fromarray(img) 154 | mask_B = Image.fromarray(mask_B) 155 | img = self.transforms_image(img) 156 | mask_B = self.transforms_image(mask_B) 157 | # lab = Image.open(lab_path).convert('RGB') 158 | # lab = self.transforms_label(lab) 159 | audio = np.zeros((256, 256), dtype=np.float32) 160 | audio_feature = np.array(audio_feature) 161 | audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature 162 | audio = torch.tensor([audio]) 163 | 164 | imgA_path, _ = random.sample(self.labels, 1)[0] 165 | imgA = np.array(Image.open(imgA_path).convert('RGB')) 166 | cut_pad1 = np.random.randint(0, 20) 167 | cut_pad2 = np.random.randint(0, 20) 168 | imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] 169 | # mask = np.ones(imgA.shape, dtype=np.uint8) * 255 170 | # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0 171 | imgA = cv2.bitwise_and(imgA, mask) 172 | imgA = Image.fromarray(imgA) 173 | imgA = self.transforms_image(imgA) 174 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} 175 | 176 | def __len__(self): 177 | """Return the total number of images in the dataset.""" 178 | return len(self.labels) 179 | 180 | 181 | if __name__ == '__main__': 182 | from options.train_options import TrainOptions 183 | 184 | opt = TrainOptions().parse() 185 | dataset = L2FaceDataset(opt) 186 | dataset_size = len(dataset) 187 | print(dataset_size) 188 | for i, data in enumerate(dataset): 189 | print(data) -------------------------------------------------------------------------------- /landmark2face_wy/data/l2faceaudio_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | from data.base_dataset import BaseDataset, get_params, get_transform 4 | import torchvision.transforms as transforms 5 | from data.image_folder import make_dataset 6 | from PIL import Image, ImageEnhance 7 | import numpy as np 8 | import cv2 9 | import torch 10 | 11 | 12 | def get_idts(config_name): 13 | idts = list() 14 | with open(os.path.join('../config', config_name + '.txt')) as f: 15 | for line in f: 16 | line = line.strip() 17 | idts.append(line) 18 | return idts 19 | 20 | 21 | class L2FaceAudioDataset(BaseDataset): 22 | def __init__(self, opt, mode=None): 23 | BaseDataset.__init__(self, opt) 24 | img_size = opt.img_size 25 | idts = get_idts(opt.name.split('_')[0]) 26 | print("---------load data list--------: ", idts) 27 | if mode == 'train': 28 | self.labels = [] 29 | for idt_name in idts: 30 | # root = '../AnnVI/feature/{}'.format(idt_name) 31 | root = os.path.join(opt.feature_path, idt_name) 32 | if opt.audio_feature == "mfcc": 33 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 34 | else: 35 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 36 | training_data = torch.load(training_data_path) 37 | img_paths = training_data['img_paths'] 38 | audio_features = training_data['audio_features'] 39 | index = [i[0].split('/')[-1] for i in img_paths] 40 | 41 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 42 | # label_dir = '{}/512_landmark_crop'.format(root) 43 | 44 | # if 'man' in opt.name: 45 | # imgs.sort(key=lambda x:int(x.split('.')[0])) 46 | # else: 47 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) 48 | for img in range(len(index)): 49 | img_path = os.path.join(image_dir, index[img]) 50 | audio_feature = audio_features[img] 51 | self.labels.append([img_path, audio_feature]) 52 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 53 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 54 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 55 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 56 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 57 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 58 | self.shuffle() 59 | elif mode == 'test': 60 | self.labels = [] 61 | for idt_name in idts: 62 | # root = '../AnnVI/feature/{}'.format(idt_name) 63 | root = os.path.join(opt.feature_path, idt_name) 64 | if opt.audio_feature == "mfcc": 65 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) 66 | else: 67 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) 68 | training_data = torch.load(training_data_path) 69 | img_paths = training_data['img_paths'] 70 | audio_features = training_data['audio_features'] 71 | index = [i[0].split('/')[-1] for i in img_paths] 72 | 73 | image_dir = '{}/{}_dlib_crop'.format(root, img_size) 74 | # label_dir = '{}/512_landmark_crop'.format(root) 75 | 76 | # if 'man' in opt.name: 77 | # imgs.sort(key=lambda x:int(x.split('.')[0])) 78 | # else: 79 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) 80 | for img in range(len(index)): 81 | img_path = os.path.join(image_dir, index[img]) 82 | audio_feature = audio_features[img] 83 | self.labels.append([img_path, audio_feature]) 84 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 85 | self.transforms_image = transforms.Compose([transforms.ToTensor(), 86 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 87 | # transforms.Resize([img_size, img_size], Image.BICUBIC), 88 | self.transforms_label = transforms.Compose([transforms.ToTensor(), 89 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 90 | self.shuffle() 91 | 92 | def shuffle(self): 93 | random.shuffle(self.labels) 94 | 95 | def add_mouth_mask2(self, img): 96 | mask = np.ones_like(img) 97 | rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30] 98 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] 99 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) 100 | x = np.flip(x, 0) 101 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() 102 | zz1 = -y - x + 88 > 0 103 | zz2 = np.flip(zz1, 1) 104 | zz = (zz1 + zz2) > 0 105 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 106 | imgm = img * mask 107 | return imgm 108 | 109 | def __getitem__(self, index): 110 | cv2.setNumThreads(0) 111 | img_path, audio_feature = self.labels[index] 112 | img = np.array(Image.open(img_path).convert('RGB')) 113 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') 114 | cut_pad1 = np.random.randint(0, 10) 115 | cut_pad2 = np.random.randint(0, 10) 116 | img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2] 117 | 118 | ####mask遮眼睛##### 119 | mask = np.ones(img.shape, dtype=np.uint8) * 255 120 | mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0 121 | img = cv2.bitwise_and(img, mask) 122 | 123 | mask_B = img.copy() 124 | mask_end = np.random.randint(236, 256) 125 | ##########之前老的矩形mask############# 126 | mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0 127 | ##########之前老的矩形mask############# 128 | ##########蔡星宇三角mask############# 129 | # mask_B = self.add_mouth_mask2(mask_B) 130 | ##########蔡星宇三角mask############# 131 | # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0 132 | img = Image.fromarray(img) 133 | mask_B = Image.fromarray(mask_B) 134 | img = self.transforms_image(img) 135 | mask_B = self.transforms_image(mask_B) 136 | # lab = Image.open(lab_path).convert('RGB') 137 | # lab = self.transforms_label(lab) 138 | audio = np.zeros((256, 256), dtype=np.float32) 139 | audio_feature = np.array(audio_feature) 140 | audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature 141 | audio = torch.tensor([audio]) 142 | 143 | imgA_path, _ = random.sample(self.labels, 1)[0] 144 | imgA = np.array(Image.open(imgA_path).convert('RGB')) 145 | cut_pad1 = np.random.randint(0, 10) 146 | cut_pad2 = np.random.randint(0, 10) 147 | imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2] 148 | imgA = cv2.bitwise_and(imgA, mask) 149 | imgA = Image.fromarray(imgA) 150 | imgA = self.transforms_image(imgA) 151 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} 152 | 153 | def __len__(self): 154 | """Return the total number of images in the dataset.""" 155 | return len(self.labels) 156 | 157 | 158 | if __name__ == '__main__': 159 | from options.train_options import TrainOptions 160 | 161 | opt = TrainOptions().parse() 162 | dataset = L2FaceDataset(opt) 163 | dataset_size = len(dataset) 164 | print(dataset_size) 165 | for i, data in enumerate(dataset): 166 | print(data) -------------------------------------------------------------------------------- /landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/loss/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/loss/__init__.py -------------------------------------------------------------------------------- /landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/__init__.py: -------------------------------------------------------------------------------- 1 | """This package contains modules related to objective functions, optimizations, and network architectures. 2 | 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. 4 | You need to implement the following five functions: 5 | -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). 6 | -- : unpack data from dataset and apply preprocessing. 7 | -- : produce intermediate results. 8 | -- : calculate loss, gradients, and update network weights. 9 | -- : (optionally) add model-specific options and set default options. 10 | 11 | In the function <__init__>, you need to define four lists: 12 | -- self.loss_names (str list): specify the training losses that you want to plot and save. 13 | -- self.model_names (str list): define networks used in our training. 14 | -- self.visual_names (str list): specify the images that you want to display and save. 15 | -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. 16 | 17 | Now you can use the model class by specifying flag '--model dummy'. 18 | See our template model class 'template_model.py' for more details. 19 | """ 20 | 21 | import importlib 22 | from landmark2face_wy.models.base_model import BaseModel 23 | 24 | 25 | def find_model_using_name(model_name): 26 | """Import the module "models/[model_name]_model.py". 27 | 28 | In the file, the class called DatasetNameModel() will 29 | be instantiated. It has to be a subclass of BaseModel, 30 | and it is case-insensitive. 31 | """ 32 | model_filename = "landmark2face_wy.models." + model_name + "_model" 33 | modellib = importlib.import_module(model_filename) 34 | model = None 35 | target_model_name = model_name.replace('_', '') + 'model' 36 | for name, cls in modellib.__dict__.items(): 37 | if name.lower() == target_model_name.lower() \ 38 | and issubclass(cls, BaseModel): 39 | model = cls 40 | 41 | if model is None: 42 | print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) 43 | exit(0) 44 | 45 | return model 46 | 47 | 48 | def get_option_setter(model_name): 49 | """Return the static method of the model class.""" 50 | model_class = find_model_using_name(model_name) 51 | return model_class.modify_commandline_options 52 | 53 | 54 | def create_model(opt): 55 | """Create a model given the option. 56 | 57 | This function warps the class CustomDatasetDataLoader. 58 | This is the main interface between this package and 'train.py'/'test.py' 59 | 60 | Example: 61 | >>> from landmark2face_wy.models import create_model 62 | >>> model = create_model(opt) 63 | """ 64 | model = find_model_using_name(opt.model) 65 | instance = model(opt) 66 | print("model [%s] was created" % type(instance).__name__) 67 | return instance 68 | -------------------------------------------------------------------------------- /landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/options/__init__.py: -------------------------------------------------------------------------------- 1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test).""" 2 | -------------------------------------------------------------------------------- /landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/sync_batchnorm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : __init__.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | from .batchnorm import set_sbn_eps_mode 12 | from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d 13 | from .batchnorm import patch_sync_batchnorm, convert_model 14 | from .replicate import DataParallelWithCallback, patch_replication_callback 15 | -------------------------------------------------------------------------------- /landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/util/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes a miscellaneous collection of useful helper functions.""" 2 | -------------------------------------------------------------------------------- /landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/license.txt -------------------------------------------------------------------------------- /log/dh.log: -------------------------------------------------------------------------------- 1 | [2025-03-18 12:50:40,644] [run.py[line:153]] [INFO] [TransDhTask init] 2 | [2025-03-18 12:50:41,729] [run.py[line:158]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4] 3 | [2025-03-18 12:50:41,732] [run.py[line:158]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4] 4 | [2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]预处理失败,异常信息:[format video error]] 5 | [2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]] 6 | [2025-03-18 12:50:41,791] [run.py[line:158]] [INFO] [>>> 任务:1002 耗时:0.06167912483215332 ] 7 | [2025-03-18 12:50:57,817] [run.py[line:143]] [INFO] [TransDhTask init] 8 | [2025-03-18 12:50:58,906] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4] 9 | [2025-03-18 12:50:58,908] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4] 10 | [2025-03-18 12:50:58,964] [run.py[line:147]] [ERROR] [[1002]预处理失败,异常信息:[format video error]] 11 | [2025-03-18 12:50:58,965] [run.py[line:147]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]] 12 | [2025-03-18 12:50:58,966] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:0.059505462646484375 ] 13 | [2025-03-18 12:52:06,385] [run.py[line:143]] [INFO] [TransDhTask init] 14 | [2025-03-18 12:52:07,560] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./example/audio.wav video_url:./example/video.mp4] 15 | [2025-03-18 12:52:07,646] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./example/video.mp4 -crf 15 -vcodec copy -an -y ./1002_format.mp4] 16 | [2025-03-18 12:52:07,801] [run.py[line:147]] [INFO] [[1002] -> ffmpeg audio: ffmpeg -loglevel warning -i ./example/audio.wav -ac 1 -ar 16000 -acodec pcm_s16le -y ./1002_format.wav] 17 | [2025-03-18 12:52:07,922] [run.py[line:147]] [INFO] [[1002] -> 预处理耗时:0.35927414894104004s] 18 | [2025-03-18 12:52:10,169] [run.py[line:147]] [INFO] [[1002] -> get_aud_feat1 cost:2.245649576187134s] 19 | [2025-03-18 12:52:11,702] [process.py[line:108]] [INFO] [>>> init_wh_process进程启动] 20 | [2025-03-18 12:52:20,087] [process.py[line:108]] [INFO] [[1002]init_wh result :[0.8809176216714891], cost: 8.382684469223022 s] 21 | [2025-03-18 12:52:20,090] [run.py[line:147]] [INFO] [[1002] -> wh: [0.8809176216714891]] 22 | [2025-03-18 12:52:21,453] [process.py[line:108]] [INFO] [>>> 数字人图片处理进程启动] 23 | [2025-03-18 12:52:24,015] [process.py[line:108]] [INFO] [[1002]任务视频驱动队列启动 batch_size:4, len:150] 24 | [2025-03-18 12:52:24,050] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 开始循环] 25 | [2025-03-18 12:52:24,085] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:4] 26 | [2025-03-18 12:52:24,112] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:8] 27 | [2025-03-18 12:52:24,122] [process.py[line:108]] [INFO] [>>> audio_transfer get message:4] 28 | [2025-03-18 12:52:24,139] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:12] 29 | [2025-03-18 12:52:24,148] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:16] 30 | [2025-03-18 12:52:24,161] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:20] 31 | [2025-03-18 12:52:24,173] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:24] 32 | [2025-03-18 12:52:24,185] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:28] 33 | [2025-03-18 12:52:24,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:32] 34 | [2025-03-18 12:52:24,208] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:36] 35 | [2025-03-18 12:52:24,222] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:40] 36 | [2025-03-18 12:52:24,232] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:44] 37 | [2025-03-18 12:52:25,722] [process.py[line:108]] [INFO] [[1002] -> frame_id:[4] 模糊置信度:[0.969]] 38 | [2025-03-18 12:52:25,723] [process.py[line:108]] [INFO] [[1002] -> need chaofen .] 39 | [2025-03-18 12:52:25,905] [utils.py[line:145]] [INFO] [Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.] 40 | [2025-03-18 12:52:25,906] [utils.py[line:148]] [INFO] [Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.] 41 | [2025-03-18 12:52:25,907] [utils.py[line:160]] [INFO] [NumExpr defaulting to 8 threads.] 42 | [2025-03-18 12:52:26,083] [process.py[line:108]] [INFO] [[4] -> chaofen cost:1.9595112800598145s] 43 | [2025-03-18 12:52:31,071] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:4, cost:6.948575258255005s] 44 | [2025-03-18 12:52:31,116] [process.py[line:108]] [INFO] [>>> audio_transfer get message:8] 45 | [2025-03-18 12:52:31,126] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:48] 46 | [2025-03-18 12:52:31,347] [process.py[line:108]] [INFO] [[8] -> chaofen cost:0.2294461727142334s] 47 | [2025-03-18 12:52:31,576] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:8, cost:0.45979762077331543s] 48 | [2025-03-18 12:52:31,605] [process.py[line:108]] [INFO] [>>> audio_transfer get message:12] 49 | [2025-03-18 12:52:31,615] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:52] 50 | [2025-03-18 12:52:31,818] [process.py[line:108]] [INFO] [[12] -> chaofen cost:0.21271824836730957s] 51 | [2025-03-18 12:52:32,036] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:12, cost:0.43187427520751953s] 52 | [2025-03-18 12:52:32,060] [process.py[line:108]] [INFO] [>>> audio_transfer get message:16] 53 | [2025-03-18 12:52:32,072] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:56] 54 | [2025-03-18 12:52:32,279] [process.py[line:108]] [INFO] [[16] -> chaofen cost:0.21899199485778809s] 55 | [2025-03-18 12:52:32,530] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:16, cost:0.47049522399902344s] 56 | [2025-03-18 12:52:32,552] [process.py[line:108]] [INFO] [>>> audio_transfer get message:20] 57 | [2025-03-18 12:52:32,567] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:60] 58 | [2025-03-18 12:52:32,766] [process.py[line:108]] [INFO] [[20] -> chaofen cost:0.21334147453308105s] 59 | [2025-03-18 12:52:32,993] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:20, cost:0.4411466121673584s] 60 | [2025-03-18 12:52:33,015] [process.py[line:108]] [INFO] [>>> audio_transfer get message:24] 61 | [2025-03-18 12:52:33,028] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:64] 62 | [2025-03-18 12:52:33,229] [process.py[line:108]] [INFO] [[24] -> chaofen cost:0.21344351768493652s] 63 | [2025-03-18 12:52:33,457] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:24, cost:0.44205546379089355s] 64 | [2025-03-18 12:52:33,479] [process.py[line:108]] [INFO] [>>> audio_transfer get message:28] 65 | [2025-03-18 12:52:33,493] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:68] 66 | [2025-03-18 12:52:33,697] [process.py[line:108]] [INFO] [[28] -> chaofen cost:0.21679949760437012s] 67 | [2025-03-18 12:52:33,924] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:28, cost:0.4448537826538086s] 68 | [2025-03-18 12:52:33,946] [process.py[line:108]] [INFO] [>>> audio_transfer get message:32] 69 | [2025-03-18 12:52:33,960] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:72] 70 | [2025-03-18 12:52:34,159] [process.py[line:108]] [INFO] [[32] -> chaofen cost:0.21156740188598633s] 71 | [2025-03-18 12:52:34,381] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:32, cost:0.43474769592285156s] 72 | [2025-03-18 12:52:34,403] [process.py[line:108]] [INFO] [>>> audio_transfer get message:36] 73 | [2025-03-18 12:52:34,417] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:76] 74 | [2025-03-18 12:52:34,618] [process.py[line:108]] [INFO] [[36] -> chaofen cost:0.21408891677856445s] 75 | [2025-03-18 12:52:34,844] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:36, cost:0.4406392574310303s] 76 | [2025-03-18 12:52:34,867] [process.py[line:108]] [INFO] [>>> audio_transfer get message:40] 77 | [2025-03-18 12:52:34,881] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:80] 78 | [2025-03-18 12:52:35,099] [process.py[line:108]] [INFO] [[40] -> chaofen cost:0.23105645179748535s] 79 | [2025-03-18 12:52:35,328] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:40, cost:0.46161866188049316s] 80 | [2025-03-18 12:52:35,350] [process.py[line:108]] [INFO] [>>> audio_transfer get message:44] 81 | [2025-03-18 12:52:35,363] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:84] 82 | [2025-03-18 12:52:35,577] [process.py[line:108]] [INFO] [[44] -> chaofen cost:0.22576594352722168s] 83 | [2025-03-18 12:52:35,808] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:44, cost:0.4577639102935791s] 84 | [2025-03-18 12:52:35,832] [process.py[line:108]] [INFO] [>>> audio_transfer get message:48] 85 | [2025-03-18 12:52:35,846] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:88] 86 | [2025-03-18 12:52:36,047] [process.py[line:108]] [INFO] [[48] -> chaofen cost:0.21441864967346191s] 87 | [2025-03-18 12:52:36,278] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:48, cost:0.4459846019744873s] 88 | [2025-03-18 12:52:36,301] [process.py[line:108]] [INFO] [>>> audio_transfer get message:52] 89 | [2025-03-18 12:52:36,315] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:92] 90 | [2025-03-18 12:52:36,521] [process.py[line:108]] [INFO] [[52] -> chaofen cost:0.2181704044342041s] 91 | [2025-03-18 12:52:36,777] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:52, cost:0.47586750984191895s] 92 | [2025-03-18 12:52:36,798] [process.py[line:108]] [INFO] [>>> audio_transfer get message:56] 93 | [2025-03-18 12:52:36,817] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:96] 94 | [2025-03-18 12:52:37,014] [process.py[line:108]] [INFO] [[56] -> chaofen cost:0.2147221565246582s] 95 | [2025-03-18 12:52:37,247] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:56, cost:0.4486660957336426s] 96 | [2025-03-18 12:52:37,266] [process.py[line:108]] [INFO] [>>> audio_transfer get message:60] 97 | [2025-03-18 12:52:37,281] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:100] 98 | [2025-03-18 12:52:37,483] [process.py[line:108]] [INFO] [[60] -> chaofen cost:0.21598410606384277s] 99 | [2025-03-18 12:52:37,703] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:60, cost:0.43683695793151855s] 100 | [2025-03-18 12:52:37,722] [process.py[line:108]] [INFO] [>>> audio_transfer get message:64] 101 | [2025-03-18 12:52:37,736] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:104] 102 | [2025-03-18 12:52:37,941] [process.py[line:108]] [INFO] [[64] -> chaofen cost:0.2180624008178711s] 103 | [2025-03-18 12:52:38,163] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:64, cost:0.4412345886230469s] 104 | [2025-03-18 12:52:38,183] [process.py[line:108]] [INFO] [>>> audio_transfer get message:68] 105 | [2025-03-18 12:52:38,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:108] 106 | [2025-03-18 12:52:38,397] [process.py[line:108]] [INFO] [[68] -> chaofen cost:0.21321654319763184s] 107 | [2025-03-18 12:52:38,637] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:68, cost:0.45404863357543945s] 108 | [2025-03-18 12:52:38,656] [process.py[line:108]] [INFO] [>>> audio_transfer get message:72] 109 | [2025-03-18 12:52:38,670] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:112] 110 | [2025-03-18 12:52:38,877] [process.py[line:108]] [INFO] [[72] -> chaofen cost:0.21999263763427734s] 111 | [2025-03-18 12:52:39,100] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:72, cost:0.4440436363220215s] 112 | [2025-03-18 12:52:39,119] [process.py[line:108]] [INFO] [>>> audio_transfer get message:76] 113 | [2025-03-18 12:52:39,133] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:116] 114 | [2025-03-18 12:52:39,347] [process.py[line:108]] [INFO] [[76] -> chaofen cost:0.22693967819213867s] 115 | [2025-03-18 12:52:39,568] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:76, cost:0.4492220878601074s] 116 | [2025-03-18 12:52:39,586] [process.py[line:108]] [INFO] [>>> audio_transfer get message:80] 117 | [2025-03-18 12:52:39,601] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:120] 118 | [2025-03-18 12:52:39,801] [process.py[line:108]] [INFO] [[80] -> chaofen cost:0.21407222747802734s] 119 | [2025-03-18 12:52:40,024] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:80, cost:0.4377562999725342s] 120 | [2025-03-18 12:52:40,052] [process.py[line:108]] [INFO] [>>> audio_transfer get message:84] 121 | [2025-03-18 12:52:40,068] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:124] 122 | [2025-03-18 12:52:40,270] [process.py[line:108]] [INFO] [[84] -> chaofen cost:0.21637320518493652s] 123 | [2025-03-18 12:52:40,494] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:84, cost:0.44118523597717285s] 124 | [2025-03-18 12:52:40,513] [process.py[line:108]] [INFO] [>>> audio_transfer get message:88] 125 | [2025-03-18 12:52:40,527] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:128] 126 | [2025-03-18 12:52:40,731] [process.py[line:108]] [INFO] [[88] -> chaofen cost:0.2170412540435791s] 127 | [2025-03-18 12:52:40,951] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:88, cost:0.4383111000061035s] 128 | [2025-03-18 12:52:40,971] [process.py[line:108]] [INFO] [>>> audio_transfer get message:92] 129 | [2025-03-18 12:52:40,984] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:132] 130 | [2025-03-18 12:52:41,187] [process.py[line:108]] [INFO] [[92] -> chaofen cost:0.2148122787475586s] 131 | [2025-03-18 12:52:41,416] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:92, cost:0.4454326629638672s] 132 | [2025-03-18 12:52:41,439] [process.py[line:108]] [INFO] [>>> audio_transfer get message:96] 133 | [2025-03-18 12:52:41,451] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:136] 134 | [2025-03-18 12:52:41,663] [process.py[line:108]] [INFO] [[96] -> chaofen cost:0.222761869430542s] 135 | [2025-03-18 12:52:41,887] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:96, cost:0.4477369785308838s] 136 | [2025-03-18 12:52:41,906] [process.py[line:108]] [INFO] [>>> audio_transfer get message:100] 137 | [2025-03-18 12:52:41,920] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:140] 138 | [2025-03-18 12:52:42,123] [process.py[line:108]] [INFO] [[100] -> chaofen cost:0.21576929092407227s] 139 | [2025-03-18 12:52:42,359] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:100, cost:0.4525878429412842s] 140 | [2025-03-18 12:52:42,379] [process.py[line:108]] [INFO] [>>> audio_transfer get message:104] 141 | [2025-03-18 12:52:42,394] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:144] 142 | [2025-03-18 12:52:42,596] [process.py[line:108]] [INFO] [[104] -> chaofen cost:0.21553897857666016s] 143 | [2025-03-18 12:52:42,836] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:104, cost:0.45633435249328613s] 144 | [2025-03-18 12:52:42,855] [process.py[line:108]] [INFO] [>>> audio_transfer get message:108] 145 | [2025-03-18 12:52:42,870] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:148] 146 | [2025-03-18 12:52:42,873] [process.py[line:108]] [INFO] [append imgs over] 147 | [2025-03-18 12:52:42,879] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据结束] 148 | [2025-03-18 12:52:43,073] [process.py[line:108]] [INFO] [[108] -> chaofen cost:0.21662592887878418s] 149 | [2025-03-18 12:52:43,297] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:108, cost:0.4421381950378418s] 150 | [2025-03-18 12:52:43,318] [process.py[line:108]] [INFO] [>>> audio_transfer get message:112] 151 | [2025-03-18 12:52:43,332] [process.py[line:108]] [INFO] [[1002]任务预处理进程结束] 152 | [2025-03-18 12:52:43,531] [process.py[line:108]] [INFO] [[112] -> chaofen cost:0.21228814125061035s] 153 | [2025-03-18 12:52:43,791] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:112, cost:0.47336626052856445s] 154 | [2025-03-18 12:52:43,811] [process.py[line:108]] [INFO] [>>> audio_transfer get message:116] 155 | [2025-03-18 12:52:44,034] [process.py[line:108]] [INFO] [[116] -> chaofen cost:0.2223985195159912s] 156 | [2025-03-18 12:52:44,262] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:116, cost:0.4509873390197754s] 157 | [2025-03-18 12:52:44,281] [process.py[line:108]] [INFO] [>>> audio_transfer get message:120] 158 | [2025-03-18 12:52:44,499] [process.py[line:108]] [INFO] [[120] -> chaofen cost:0.21637916564941406s] 159 | [2025-03-18 12:52:44,742] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:120, cost:0.46120476722717285s] 160 | [2025-03-18 12:52:44,762] [process.py[line:108]] [INFO] [>>> audio_transfer get message:124] 161 | [2025-03-18 12:52:44,981] [process.py[line:108]] [INFO] [[124] -> chaofen cost:0.21886157989501953s] 162 | [2025-03-18 12:52:45,240] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:124, cost:0.4781684875488281s] 163 | [2025-03-18 12:52:45,258] [process.py[line:108]] [INFO] [>>> audio_transfer get message:128] 164 | [2025-03-18 12:52:45,474] [process.py[line:108]] [INFO] [[128] -> chaofen cost:0.21480226516723633s] 165 | [2025-03-18 12:52:45,708] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:128, cost:0.44920992851257324s] 166 | [2025-03-18 12:52:45,726] [process.py[line:108]] [INFO] [>>> audio_transfer get message:132] 167 | [2025-03-18 12:52:45,943] [process.py[line:108]] [INFO] [[132] -> chaofen cost:0.21567535400390625s] 168 | [2025-03-18 12:52:46,181] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:132, cost:0.45519399642944336s] 169 | [2025-03-18 12:52:46,200] [process.py[line:108]] [INFO] [>>> audio_transfer get message:136] 170 | [2025-03-18 12:52:46,418] [process.py[line:108]] [INFO] [[136] -> chaofen cost:0.21763992309570312s] 171 | [2025-03-18 12:52:46,662] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:136, cost:0.4619452953338623s] 172 | [2025-03-18 12:52:46,681] [process.py[line:108]] [INFO] [>>> audio_transfer get message:140] 173 | [2025-03-18 12:52:46,900] [process.py[line:108]] [INFO] [[140] -> chaofen cost:0.21794748306274414s] 174 | [2025-03-18 12:52:47,146] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:140, cost:0.4646177291870117s] 175 | [2025-03-18 12:52:47,166] [process.py[line:108]] [INFO] [>>> audio_transfer get message:144] 176 | [2025-03-18 12:52:47,382] [process.py[line:108]] [INFO] [[144] -> chaofen cost:0.21491503715515137s] 177 | [2025-03-18 12:52:47,619] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:144, cost:0.4536001682281494s] 178 | [2025-03-18 12:52:47,639] [process.py[line:108]] [INFO] [>>> audio_transfer get message:148] 179 | [2025-03-18 12:52:47,857] [process.py[line:108]] [INFO] [[148] -> chaofen cost:0.21780657768249512s] 180 | [2025-03-18 12:52:48,098] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:148, cost:0.459348201751709s] 181 | [2025-03-18 12:52:48,104] [process.py[line:108]] [INFO] [>>> audio_transfer get exception msg:-1] 182 | [2025-03-18 12:52:48,105] [process.py[line:108]] [INFO] [[1002]任务数字人图片处理已完成] 183 | [2025-03-18 12:52:48,146] [run.py[line:43]] [INFO] [Custom VideoWriter [1002]视频帧队列处理已结束] 184 | [2025-03-18 12:52:48,151] [run.py[line:46]] [INFO] [Custom VideoWriter Silence Video saved in /mnt/nfs/bj4-v100-23/data1/yubosun/git_proj/heygem/heygem_ori_so/1002-t.mp4] 185 | [2025-03-18 12:52:48,155] [run.py[line:118]] [INFO] [Custom command:ffmpeg -loglevel warning -y -i ./example/audio.wav -i ./1002-t.mp4 -c:a aac -c:v libx264 -crf 15 -strict -2 ./1002-r.mp4] 186 | [2025-03-18 12:53:06,908] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:59.3451771736145 ] 187 | -------------------------------------------------------------------------------- /model_lib/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_wrapper import ONNXModel 2 | from .model_base import ModelBase 3 | 4 | 5 | -------------------------------------------------------------------------------- /model_lib/base_wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/8/26 3 | 4 | 5 | from .onnx_model import ONNXModel 6 | 7 | -------------------------------------------------------------------------------- /model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /model_lib/model_base.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | # @Time : 2022/7/29 3 | 4 | 5 | 6 | from .base_wrapper import ONNXModel 7 | from pathlib import Path 8 | 9 | 10 | try: 11 | from .base_wrapper import TRTWrapper, TRTWrapperSelf 12 | except: 13 | pass 14 | 15 | 16 | # from cv2box.utils import try_import 17 | 18 | class ModelBase: 19 | def __init__(self, model_info, provider): 20 | self.model_path = model_info['model_path'] 21 | 22 | if 'input_dynamic_shape' in model_info.keys(): 23 | self.input_dynamic_shape = model_info['input_dynamic_shape'] 24 | else: 25 | self.input_dynamic_shape = None 26 | 27 | if 'picklable' in model_info.keys(): 28 | picklable = model_info['picklable'] 29 | else: 30 | picklable = False 31 | 32 | if 'trt_wrapper_self' in model_info.keys(): 33 | TRTWrapper = TRTWrapperSelf 34 | 35 | # init model 36 | if Path(self.model_path).suffix == '.engine': 37 | self.model_type = 'trt' 38 | self.model = TRTWrapper(self.model_path) 39 | elif Path(self.model_path).suffix == '.tjm': 40 | self.model_type = 'tjm' 41 | self.model = TJMWrapper(self.model_path, provider=provider) 42 | elif Path(self.model_path).suffix in ['.onnx', '.bin']: 43 | self.model_type = 'onnx' 44 | if not picklable: 45 | if 'encrypt' in model_info.keys(): 46 | self.model_path = load_encrypt_model(self.model_path, key=model_info['encrypt']) 47 | self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape) 48 | else: 49 | self.model = OnnxModelPickable(self.model_path, provider=provider, ) 50 | else: 51 | raise 'check model suffix , support engine/tjm/onnx now.' 52 | -------------------------------------------------------------------------------- /preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cv2box==0.5.9 2 | apstone==0.0.8 3 | appdirs==1.4.4 4 | audioread==2.1.9 5 | typeguard==2.13.3 6 | cffi==1.15.0 7 | charset-normalizer==2.0.12 8 | click==8.1.3 9 | colorama==0.4.4 10 | cycler==0.11.0 11 | decorator==5.1.1 12 | filelock==3.7.1 13 | flatbuffers==2.0 14 | fonttools==4.36.0 15 | freetype-py==2.3.0 16 | huggingface-hub==0.0.8 17 | idna==3.3 18 | imageio==2.19.3 19 | importlib-metadata==4.11.4 20 | joblib==1.1.0 21 | kiwisolver==1.4.4 22 | kornia==0.6.6 23 | librosa==0.8.1 24 | matplotlib==3.5.3 25 | networkx==2.6.3 26 | numba==0.55.2 27 | numexpr==2.8.6 28 | numpy==1.21.6 29 | onnxruntime-gpu==1.9.0 30 | opencv-python==4.7.0.72 31 | packaging==21.3 32 | pillow==9.1.1 33 | pooch==1.6.0 34 | protobuf==4.21.5 35 | psutil==5.9.1 36 | pycparser==2.21 37 | pyglet==1.5.26 38 | pyopengl==3.1.0 39 | pyparsing==3.0.9 40 | pyrender==0.1.45 41 | python-dateutil==2.8.2 42 | pywavelets==1.3.0 43 | pyyaml==6.0 44 | regex==2022.6.2 45 | requests==2.27.1 46 | resampy==0.2.2 47 | sacremoses==0.0.53 48 | scikit-image==0.19.3 49 | scikit-learn==1.0.2 50 | scipy==1.7.1 51 | six==1.16.0 52 | soundfile==0.10.3.post1 53 | threadpoolctl==3.1.0 54 | tifffile==2021.11.2 55 | tokenizers==0.10.3 56 | torch==1.11.0+cu113 57 | torchaudio==0.11.0+cu113 58 | torchvision==0.12.0+cu113 59 | tqdm==4.64.0 60 | transformers==4.6.1 61 | trimesh==3.12.7 62 | typeguard==2.13.3 63 | typing-extensions==4.2.0 64 | urllib3==1.26.9 65 | zipp==3.8.0 66 | -------------------------------------------------------------------------------- /requirements_0.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | annotated-types==0.7.0 3 | anyio==4.5.2 4 | apstone==0.0.8 5 | audioread==3.0.1 6 | blinker==1.8.2 7 | certifi==2025.1.31 8 | cffi==1.17.1 9 | charset-normalizer==3.4.1 10 | click==8.1.8 11 | coloredlogs==15.0.1 12 | contourpy==1.1.1 13 | cv2box==0.5.9 14 | cycler==0.12.1 15 | decorator==5.2.1 16 | einops==0.8.1 17 | exceptiongroup==1.2.2 18 | fastapi==0.115.11 19 | ffmpy==0.5.0 20 | filelock==3.16.1 21 | Flask==3.0.3 22 | flatbuffers==25.2.10 23 | fonttools==4.56.0 24 | fsspec==2025.3.0 25 | gradio==4.44.1 26 | gradio_client==1.3.0 27 | h11==0.14.0 28 | httpcore==1.0.7 29 | httpx==0.28.1 30 | huggingface-hub==0.29.3 31 | humanfriendly==10.0 32 | idna==3.10 33 | imageio==2.35.1 34 | importlib_metadata==8.5.0 35 | importlib_resources==6.4.5 36 | itsdangerous==2.2.0 37 | Jinja2==3.1.6 38 | joblib==1.4.2 39 | kiwisolver==1.4.7 40 | lazy_loader==0.4 41 | librosa==0.11.0 42 | llvmlite==0.41.1 43 | markdown-it-py==3.0.0 44 | MarkupSafe==2.1.5 45 | matplotlib==3.7.5 46 | mdurl==0.1.2 47 | mpmath==1.3.0 48 | msgpack==1.1.0 49 | networkx==3.1 50 | numba==0.58.1 51 | numexpr==2.8.6 52 | numpy==1.24.4 53 | onnxruntime-gpu==1.16.0 54 | opencv-python==4.11.0.86 55 | orjson==3.10.15 56 | packaging==24.2 57 | pandas==2.0.3 58 | pillow==10.4.0 59 | platformdirs==4.3.6 60 | pooch==1.8.2 61 | protobuf==5.29.4 62 | pycparser==2.22 63 | pydantic==2.10.6 64 | pydantic_core==2.27.2 65 | pydub==0.25.1 66 | Pygments==2.19.1 67 | pyparsing==3.1.4 68 | python-dateutil==2.9.0.post0 69 | python-multipart==0.0.20 70 | pytz==2025.1 71 | PyWavelets==1.4.1 72 | PyYAML==6.0.2 73 | requests==2.32.3 74 | rich==13.9.4 75 | ruff==0.11.1 76 | scikit-image==0.21.0 77 | scikit-learn==1.3.2 78 | scipy==1.10.1 79 | semantic-version==2.10.0 80 | shellingham==1.5.4 81 | six==1.17.0 82 | sniffio==1.3.1 83 | soundfile==0.13.1 84 | soxr==0.3.7 85 | spark-parser==1.8.9 86 | starlette==0.44.0 87 | sympy==1.13.3 88 | threadpoolctl==3.5.0 89 | tifffile==2023.7.10 90 | tomlkit==0.12.0 91 | torch==1.11.0+cu113 92 | torchaudio==0.11.0+cu113 93 | torchvision==0.12.0+cu113 94 | tqdm==4.67.1 95 | typeguard==2.13.3 96 | typer==0.15.2 97 | typing_extensions==4.12.2 98 | tzdata==2025.1 99 | urllib3==2.2.3 100 | uvicorn==0.33.0 101 | websockets==12.0 102 | Werkzeug==3.0.6 103 | xdis==6.1.3 104 | zipp==3.20.2 105 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gc 3 | import json 4 | import os 5 | import subprocess 6 | import sys 7 | import threading 8 | import time 9 | import traceback 10 | import uuid 11 | from enum import Enum 12 | 13 | import queue 14 | import cv2 15 | from flask import Flask, request 16 | 17 | if sys.version_info.major != 3 or sys.version_info.minor != 8: 18 | print("请使用 Python 3.8 版本运行此脚本") 19 | sys.exit(1) 20 | 21 | import service.trans_dh_service 22 | 23 | from h_utils.custom import CustomError 24 | from y_utils.config import GlobalConfig 25 | from y_utils.logger import logger 26 | 27 | 28 | def get_args(): 29 | parser = argparse.ArgumentParser( 30 | formatter_class=(argparse.ArgumentDefaultsHelpFormatter) 31 | ) 32 | 33 | parser.add_argument( 34 | "--audio_path", 35 | type=str, 36 | default="example/audio.wav", 37 | help="path to local audio file", 38 | ) 39 | parser.add_argument( 40 | "--video_path", 41 | type=str, 42 | default="example/video.mp4", 43 | help="path to local video file", 44 | ) 45 | opt = parser.parse_args() 46 | return opt 47 | 48 | 49 | def write_video( 50 | output_imgs_queue, 51 | temp_dir, 52 | result_dir, 53 | work_id, 54 | audio_path, 55 | result_queue, 56 | width, 57 | height, 58 | fps, 59 | watermark_switch=0, 60 | digital_auth=0, 61 | ): 62 | output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id)) 63 | fourcc = cv2.VideoWriter_fourcc(*"mp4v") 64 | result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id)) 65 | video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height)) 66 | print("Custom VideoWriter init done") 67 | try: 68 | while True: 69 | state, reason, value_ = output_imgs_queue.get() 70 | if type(state) == bool and state == True: 71 | logger.info( 72 | "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id) 73 | ) 74 | logger.info( 75 | "Custom VideoWriter Silence Video saved in {}".format( 76 | os.path.realpath(output_mp4) 77 | ) 78 | ) 79 | video_write.release() 80 | break 81 | else: 82 | if type(state) == bool and state == False: 83 | logger.error( 84 | "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format( 85 | work_id, reason 86 | ) 87 | ) 88 | raise CustomError(reason) 89 | for result_img in value_: 90 | video_write.write(result_img) 91 | if video_write is not None: 92 | video_write.release() 93 | if watermark_switch == 1 and digital_auth == 1: 94 | logger.info( 95 | "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id) 96 | ) 97 | if width > height: 98 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 99 | audio_path, 100 | output_mp4, 101 | GlobalConfig.instance().watermark_path, 102 | GlobalConfig.instance().digital_auth_path, 103 | result_path, 104 | ) 105 | logger.info("command:{}".format(command)) 106 | else: 107 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 108 | audio_path, 109 | output_mp4, 110 | GlobalConfig.instance().watermark_path, 111 | GlobalConfig.instance().digital_auth_path, 112 | result_path, 113 | ) 114 | logger.info("command:{}".format(command)) 115 | elif watermark_switch == 1 and digital_auth == 0: 116 | logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id)) 117 | command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format( 118 | audio_path, 119 | output_mp4, 120 | GlobalConfig.instance().watermark_path, 121 | result_path, 122 | ) 123 | logger.info("command:{}".format(command)) 124 | elif watermark_switch == 0 and digital_auth == 1: 125 | logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id)) 126 | if width > height: 127 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 128 | audio_path, 129 | output_mp4, 130 | GlobalConfig.instance().digital_auth_path, 131 | result_path, 132 | ) 133 | logger.info("command:{}".format(command)) 134 | else: 135 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( 136 | audio_path, 137 | output_mp4, 138 | GlobalConfig.instance().digital_auth_path, 139 | result_path, 140 | ) 141 | logger.info("command:{}".format(command)) 142 | else: 143 | command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format( 144 | audio_path, output_mp4, result_path 145 | ) 146 | logger.info("Custom command:{}".format(command)) 147 | subprocess.call(command, shell=True) 148 | print("###### Custom Video Writer write over") 149 | print(f"###### Video result saved in {os.path.realpath(result_path)}") 150 | exit(0) 151 | result_queue.put([True, result_path]) 152 | except Exception as e: 153 | logger.error( 154 | "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format( 155 | work_id, e.__str__() 156 | ) 157 | ) 158 | result_queue.put( 159 | [ 160 | False, 161 | "[{}]视频帧队列处理异常结束,异常原因:[{}]".format( 162 | work_id, e.__str__() 163 | ), 164 | ] 165 | ) 166 | logger.info("Custom VideoWriter 后处理进程结束") 167 | 168 | 169 | service.trans_dh_service.write_video = write_video 170 | 171 | 172 | def main(): 173 | opt = get_args() 174 | if not os.path.exists(opt.audio_path): 175 | audio_url = "example/audio.wav" 176 | else: 177 | audio_url = opt.audio_path 178 | 179 | if not os.path.exists(opt.video_path): 180 | video_url = "example/video.mp4" 181 | else: 182 | video_url = opt.video_path 183 | sys.argv = [sys.argv[0]] 184 | task = service.trans_dh_service.TransDhTask() 185 | time.sleep(10) # somehow, this works... 186 | 187 | code = "1004" 188 | task.work(audio_url, video_url, code, 0, 0, 0, 0) 189 | 190 | 191 | if __name__ == "__main__": 192 | main() 193 | 194 | # python run.py 195 | # python run.py --audio_path example/audio.wav --video_path example/video.mp4 196 | -------------------------------------------------------------------------------- /service/__init__.py: -------------------------------------------------------------------------------- 1 | #!/user/bin/env python 2 | # coding=utf-8 3 | """ 4 | @project : face2face_train 5 | @author : huyi 6 | @file : __init__.py.py 7 | @ide : PyCharm 8 | @time : 2023-12-06 14:46:40 9 | """ 10 | -------------------------------------------------------------------------------- /service/server.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/service/server.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /service/trans_dh_service.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /sources.list: -------------------------------------------------------------------------------- 1 | # 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释 2 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse 3 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse 4 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse 5 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse 6 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse 7 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse 8 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse 9 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse 10 | 11 | # 预发布软件源,不建议启用 12 | # deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse 13 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse 14 | -------------------------------------------------------------------------------- /wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml: -------------------------------------------------------------------------------- 1 | # network architecture 2 | # encoder related 3 | encoder: conformer 4 | encoder_conf: 5 | output_size: 256 # dimension of attention 6 | attention_heads: 4 7 | linear_units: 2048 # the number of units of position-wise feed forward 8 | num_blocks: 12 # the number of encoder blocks 9 | dropout_rate: 0.1 10 | positional_dropout_rate: 0.1 11 | attention_dropout_rate: 0.0 12 | input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8 13 | normalize_before: true 14 | cnn_module_kernel: 15 15 | use_cnn_module: True 16 | activation_type: 'swish' 17 | pos_enc_layer_type: 'rel_pos' 18 | selfattention_layer_type: 'rel_selfattn' 19 | 20 | # decoder related 21 | decoder: transformer 22 | decoder_conf: 23 | attention_heads: 4 24 | linear_units: 2048 25 | num_blocks: 6 26 | dropout_rate: 0.1 27 | positional_dropout_rate: 0.1 28 | self_attention_dropout_rate: 0.0 29 | src_attention_dropout_rate: 0.0 30 | 31 | # hybrid CTC/attention 32 | model_conf: 33 | ctc_weight: 0.3 34 | lsm_weight: 0.1 # label smoothing option 35 | length_normalized_loss: false 36 | 37 | # use raw_wav or kaldi feature 38 | raw_wav: false 39 | 40 | # feature extraction 41 | collate_conf: 42 | # waveform level config 43 | wav_distortion_conf: 44 | wav_dither: 0.1 45 | wav_distortion_rate: 0.0 46 | distortion_methods: [] 47 | speed_perturb: true 48 | feature_extraction_conf: 49 | feature_type: 'fbank' 50 | mel_bins: 80 51 | frame_shift: 10 52 | frame_length: 25 53 | using_pitch: false 54 | # spec level config 55 | # spec_swap: false 56 | feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature 57 | spec_aug: true 58 | spec_aug_conf: 59 | warp_for_time: False 60 | num_t_mask: 2 61 | num_f_mask: 2 62 | max_t: 50 63 | max_f: 10 64 | max_w: 80 65 | 66 | 67 | # dataset related 68 | dataset_conf: 69 | max_length: 1300 #40960 70 | min_length: 0 71 | batch_type: 'static' # static or dynamic 72 | batch_size: 40 73 | sort: true 74 | 75 | grad_clip: 5 76 | accum_grad: 4 77 | max_epoch: 240 78 | log_interval: 100 79 | 80 | optim: adam 81 | optim_conf: 82 | lr: 0.0025 #0.0025 83 | scheduler: warmuplr # pytorch v1.1.0+ required 84 | scheduler_conf: 85 | warmup_steps: 100000 86 | -------------------------------------------------------------------------------- /wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml: -------------------------------------------------------------------------------- 1 | # network architecture 2 | # encoder related 3 | encoder: conformer 4 | encoder_conf: 5 | output_size: 256 # dimension of attention 6 | attention_heads: 4 7 | linear_units: 1024 # the number of units of position-wise feed forward 8 | num_blocks: 6 # the number of encoder blocks 9 | dropout_rate: 0.1 10 | positional_dropout_rate: 0.1 11 | attention_dropout_rate: 0.0 12 | input_layer: linear # encoder input type, you can chose linear,conv2d, conv2d6 and conv2d8 13 | normalize_before: true 14 | cnn_module_kernel: 15 15 | use_cnn_module: True 16 | activation_type: 'swish' 17 | pos_enc_layer_type: 'rel_pos' 18 | selfattention_layer_type: 'rel_selfattn' 19 | 20 | # decoder related 21 | decoder: transformer 22 | decoder_conf: 23 | attention_heads: 4 24 | linear_units: 1024 25 | num_blocks: 3 26 | dropout_rate: 0.1 27 | positional_dropout_rate: 0.1 28 | self_attention_dropout_rate: 0.0 29 | src_attention_dropout_rate: 0.0 30 | 31 | # hybrid CTC/attention 32 | model_conf: 33 | ctc_weight: 0.3 34 | lsm_weight: 0.1 # label smoothing option 35 | length_normalized_loss: false 36 | 37 | # use raw_wav or kaldi feature 38 | raw_wav: false 39 | 40 | # feature extraction 41 | collate_conf: 42 | # waveform level config 43 | wav_distortion_conf: 44 | wav_dither: 0.1 45 | wav_distortion_rate: 0.0 46 | distortion_methods: [] 47 | speed_perturb: true 48 | feature_extraction_conf: 49 | feature_type: 'fbank' 50 | mel_bins: 80 51 | frame_shift: 10 52 | frame_length: 25 53 | using_pitch: false 54 | # spec level config 55 | # spec_swap: false 56 | feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature 57 | spec_aug: true 58 | spec_aug_conf: 59 | warp_for_time: False 60 | num_t_mask: 2 61 | num_f_mask: 2 62 | max_t: 50 63 | max_f: 10 64 | max_w: 80 65 | 66 | 67 | # dataset related 68 | dataset_conf: 69 | max_length: 1300 #40960 70 | min_length: 0 71 | batch_type: 'static' # static or dynamic 72 | batch_size: 40 73 | sort: true 74 | 75 | grad_clip: 5 76 | accum_grad: 4 77 | max_epoch: 240 78 | log_interval: 100 79 | 80 | optim: adam 81 | optim_conf: 82 | lr: 0.002 83 | scheduler: warmuplr # pytorch v1.1.0+ required 84 | scheduler_conf: 85 | warmup_steps: 50000 86 | -------------------------------------------------------------------------------- /wenet/tools/_extract_feats.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | # import tensorflow as tf 3 | import numpy as np 4 | from scipy.io import wavfile 5 | from scipy import signal 6 | 7 | import torchaudio.compliance.kaldi as kaldi 8 | import torchaudio 9 | # torchaudio.set_audio_backend("sox_io") 10 | 11 | 12 | def _extract_feature(wav_path): 13 | """ Extract acoustic fbank feature from origin waveform. 14 | 15 | Speed perturbation and wave amplitude distortion is optional. 16 | 17 | Args: 18 | batch: a list of tuple (wav id , wave path). 19 | speed_perturb: bool, whether or not to use speed pertubation. 20 | wav_distortion_conf: a dict , the config of wave amplitude distortion. 21 | feature_extraction_conf:a dict , the config of fbank extraction. 22 | 23 | Returns: 24 | (keys, feats, labels) 25 | """ 26 | waveform, sample_rate = torchaudio.load_wav(wav_path) 27 | 28 | mat = kaldi.fbank( 29 | waveform, 30 | num_mel_bins=80, 31 | frame_length=25, 32 | frame_shift=10, 33 | dither=0.1, 34 | energy_floor=0.0, 35 | sample_frequency=sample_rate) 36 | mat = mat.detach().numpy() 37 | 38 | return mat 39 | 40 | def _extract_feature_norm(wav_path): 41 | """ Extract acoustic fbank feature from origin waveform. 42 | 43 | Speed perturbation and wave amplitude distortion is optional. 44 | 45 | Args: 46 | batch: a list of tuple (wav id , wave path). 47 | speed_perturb: bool, whether or not to use speed pertubation. 48 | wav_distortion_conf: a dict , the config of wave amplitude distortion. 49 | feature_extraction_conf:a dict , the config of fbank extraction. 50 | 51 | Returns: 52 | (keys, feats, labels) 53 | """ 54 | 55 | waveform, sample_rate = torchaudio.load_wav(wav_path) 56 | 57 | mat = kaldi.fbank( 58 | waveform, 59 | num_mel_bins=80, 60 | frame_length=25, 61 | frame_shift=10, 62 | dither=0.1, 63 | energy_floor=0.0, 64 | sample_frequency=sample_rate) 65 | mat = mat.detach().numpy() 66 | 67 | return mat 68 | 69 | 70 | hparams = { 71 | 'sample_rate': 16000,#一秒16000个采样点 72 | 'preemphasis': 0.97, 73 | 'n_fft': 1024, 74 | 'hop_length': 200,#80个采样点为帧移动步长 5ms 75 | 'win_length': 800,#400个采样点为帧宽度,25ms 76 | 'num_mels': 80, 77 | 'n_mfcc': 13, 78 | 'window': 'hann', 79 | 'fmin': 0., 80 | 'fmax': 8000., 81 | 'ref_db': 20, # 82 | 'min_db': -80.0, # restrict the dynamic range of log power 83 | 'iterations': 100, # griffin_lim #iterations 84 | 'silence_db': -28.0, 85 | 'center': True,#是否将MFCC作为当前帧中间向量的结果。(数个向量作为一帧生成一个mfcc) 86 | } 87 | 88 | _mel_basis = None 89 | 90 | 91 | def load_wav(wav_f, sr=None): 92 | # wav_arr, _ = librosa.load(wav_f, sr=sr) 93 | # return wav_arr 94 | if type(wav_f)==str: 95 | wav_arr, _ = librosa.load(wav_f, sr=sr) 96 | else: 97 | wav_arr = wav_f 98 | return wav_arr 99 | 100 | def write_wav(write_path, wav_arr, sr): 101 | wav_arr *= 32767 / max(0.01, np.max(np.abs(wav_arr))) 102 | wavfile.write(write_path, sr, wav_arr.astype(np.int16)) 103 | return 104 | 105 | def preempahsis(wav_arr, pre_param=hparams['preemphasis']): 106 | return signal.lfilter([1, -pre_param], [1], wav_arr) 107 | 108 | def deemphasis(wav_arr, pre_param=hparams['preemphasis']): 109 | return signal.lfilter([1], [1, -pre_param], wav_arr) 110 | 111 | def split_wav(wav_arr, top_db=-hparams['silence_db']): 112 | intervals = librosa.effects.split(wav_arr, top_db=top_db) 113 | return intervals 114 | 115 | def mulaw_encode(wav_arr, quantization_channels): 116 | mu = float(quantization_channels - 1) 117 | safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0) 118 | encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu) 119 | return encoded 120 | 121 | def mulaw_encode_quantize(wav_arr, quantization_channels): 122 | mu = float(quantization_channels - 1) 123 | safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0) 124 | encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu) 125 | return ((encoded + 1.) / 2 * mu + 0.5).astype(np.int32) 126 | 127 | def mulaw_decode(encoded, quantization_channels): 128 | mu = float(quantization_channels - 1) 129 | magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.) 130 | return np.sign(encoded) * magnitude 131 | 132 | def mulaw_decode_quantize(encoded, quantization_channels): 133 | mu = float(quantization_channels - 1) 134 | signal = 2 * (encoded.astype(np.float32) / mu) - 1. 135 | magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.) 136 | return np.sign(signal) * magnitude 137 | 138 | def mulaw_encode_quantize_tf(wav_batch, quantization_channels): 139 | with tf.variable_scope('mulaw_encode'): 140 | mu = tf.cast(quantization_channels - 1, tf.float32) 141 | safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0) 142 | encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu) 143 | return tf.cast((encoded + 1.) / 2 * mu + 0.5, tf.int32) 144 | 145 | # def mulaw_encode_tf(wav_batch, quantization_channels): 146 | # with tf.variable_scope('mulaw_encode'): 147 | # mu = tf.cast(quantization_channels - 1, tf.float32) 148 | # safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0) 149 | # encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu) 150 | # return encoded 151 | 152 | # def mulaw_decode_quantize_tf(encoded, quantization_channels): 153 | with tf.variable_scope('mulaw_decode'): 154 | mu = tf.cast(quantization_channels - 1, tf.float32) 155 | signal = 2 * (tf.cast(encoded, tf.float32) / mu) - 1. 156 | magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.) 157 | return tf.sign(signal) * magnitude 158 | 159 | # def mulaw_decode_tf(encoded, quantization_channels): 160 | with tf.variable_scope('mulaw_decode'): 161 | mu = tf.cast(quantization_channels - 1, tf.float32) 162 | magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.) 163 | return tf.sign(encoded) * magnitude 164 | 165 | def stft(wav_arr, n_fft=hparams['n_fft'],#短时傅里叶变化 166 | hop_len=hparams['hop_length'], 167 | win_len=hparams['win_length'], 168 | window=hparams['window'], 169 | center=hparams['center']): 170 | # return shape: [n_freqs, time] 171 | return librosa.core.stft(wav_arr, n_fft=n_fft, hop_length=hop_len, 172 | win_length=win_len, window=window, center=center) 173 | 174 | # def stft_tf(wav_arr, n_fft=hparams['n_fft'], 175 | # hop_len=hparams['hop_length'], 176 | # win_len=hparams['win_length'], 177 | # window=hparams['window']): 178 | # window_f = {'hann': tf.contrib.signal.hann_window, 179 | # 'hamming': tf.contrib.signal.hamming_window}[window] 180 | # # returned value is of shape [..., frames, fft_bins] and complex64 value 181 | # return tf.contrib.signal.stft(signals=wav_arr, frame_length=win_len, 182 | # frame_step=hop_len, fft_length=n_fft, 183 | # window_fn=window_f) 184 | 185 | def istft(stft_matrix, hop_len=hparams['hop_length'], 186 | win_len=hparams['win_length'], window=hparams['window']): 187 | # stft_matrix should be complex stft results instead of magnitude spectrogram 188 | # or power spectrogram, and of shape [n_freqs, time] 189 | return librosa.core.istft(stft_matrix, hop_length=hop_len, 190 | win_length=win_len, window=window) 191 | 192 | # def istft_tf(stft_matrix, hop_len=hparams['hop_length'], n_fft=hparams['n_fft'], 193 | # win_len=hparams['win_length'], window=hparams['window']): 194 | # window_f = {'hann': tf.contrib.signal.hann_window, 195 | # 'hamming': tf.contrib.signal.hamming_window}[window] 196 | # # stft_matrix should be of shape [..., frames, fft_bins] 197 | # return tf.contrib.signal.inverse_stft(stft_matrix, frame_length=win_len, 198 | # frame_step=hop_len, fft_length=n_fft, 199 | # window_fn=window_f) 200 | 201 | def spectrogram(wav_arr, n_fft=hparams['n_fft'], 202 | hop_len=hparams['hop_length'], 203 | win_len=hparams['win_length'], 204 | window=hparams['window'], 205 | center=hparams['center']): 206 | # return shape: [time, n_freqs] 207 | s = stft(wav_arr, n_fft=n_fft, hop_len=hop_len, 208 | win_len=win_len, window=window, center=center).T 209 | magnitude = np.abs(s) #幅度谱 210 | power = magnitude ** 2 #能量谱 #经过短时傅里叶变换得到magnitude(?)和其平方 为什么不是快速傅里叶变化 211 | return {'magnitude': magnitude, 212 | 'power': power, 213 | 'stft':s.T} 214 | 215 | def power_spec2mel(power_spec, sr=hparams['sample_rate'], n_fft=hparams['n_fft'], 216 | num_mels=hparams['num_mels'], fmin=hparams['fmin'], fmax=hparams['fmax']): 217 | # power_spec should be of shape [time, 1+n_fft/2] 218 | power_spec_t = power_spec.T 219 | global _mel_basis 220 | _mel_basis = (librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) 221 | if _mel_basis is None else _mel_basis) # [n_mels, 1+n_fft/2] 222 | mel_spec = np.dot(_mel_basis, power_spec_t) # [n_mels, time] 223 | return mel_spec.T # mel谱 224 | 225 | def wav2melspec(wav_arr, sr=hparams['sample_rate'], n_fft=hparams['n_fft'], 226 | hop_len=hparams['hop_length'], win_len=hparams['win_length'], 227 | window=hparams['window'], num_mels=hparams['num_mels'], 228 | fmin=hparams['fmin'], fmax=hparams['fmax']): 229 | power_spec = spectrogram(wav_arr, n_fft, hop_len, win_len, window)['power'] 230 | melspec = power_spec2mel(power_spec.T, sr, n_fft, num_mels, fmin, fmax) 231 | return melspec # [time, num_mels] 232 | 233 | def wav2mfcc(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'], 234 | n_fft=hparams['n_fft'], hop_len=hparams['hop_length'], 235 | win_len=hparams['win_length'], window=hparams['window'], 236 | num_mels=hparams['num_mels'], fmin=0.0, 237 | fmax=None, ref_db=hparams['ref_db']): 238 | from scipy.fftpack import dct 239 | print("wav_arr1:",wav_arr.shape) 240 | wav_arr = preempahsis(wav_arr) 241 | print("wav_arr2:",wav_arr.shape) 242 | 243 | mag_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, 244 | win_len=win_len, window=window)['magnitude'] 245 | mel_spec = power_spec2mel(mag_spec, sr=sr, n_fft=n_fft, num_mels=num_mels, 246 | fmin=fmin, fmax=fmax) 247 | # log_melspec = power2db(mel_spec, ref_db=ref_db) 248 | log_melspec = librosa.amplitude_to_db(mel_spec) 249 | mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc] 250 | # mfcc = np.dot(librosa.filters.dct(n_mfcc, log_melspec.shape[1]), log_melspec.T) 251 | deltas = librosa.feature.delta(mfcc) 252 | delta_deltas = librosa.feature.delta(mfcc, order=2) 253 | mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0) 254 | 255 | return mfcc_feature.T 256 | 257 | def wav2mfcc_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],#使用这个 258 | n_fft=hparams['n_fft'], hop_len=hparams['hop_length'], 259 | win_len=hparams['win_length'], window=hparams['window'], 260 | num_mels=hparams['num_mels'], fmin=0.0, 261 | fmax=None, ref_db=hparams['ref_db'], 262 | center=hparams['center']): 263 | from scipy.fftpack import dct 264 | wav_arr = preempahsis(wav_arr) 265 | #经过一次滤波 266 | power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, 267 | win_len=win_len, window=window, center=center)['power'] 268 | mel_spec = power_spec2mel(power_spec, sr=sr, n_fft=n_fft, num_mels=num_mels, 269 | fmin=fmin, fmax=fmax) # mel谱 270 | log_melspec = power2db(mel_spec, ref_db=ref_db) #对数mel谱 271 | 272 | 273 | """下面是MFCC""" 274 | # mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc] 275 | # deltas = librosa.feature.delta(mfcc) 276 | # delta_deltas = librosa.feature.delta(mfcc, order=2) 277 | # mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0) 278 | # return mfcc_feature.T 279 | x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, 280 | win_len=win_len, window=window, center=center)['stft'] 281 | # print("log_melspec:", x_stft.shape) 282 | return log_melspec,x_stft 283 | 284 | 285 | def wav2linear_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'], # 使用这个 286 | n_fft=hparams['n_fft'], hop_len=hparams['hop_length'], 287 | win_len=hparams['win_length'], window=hparams['window'], 288 | num_mels=hparams['num_mels'], fmin=0.0, 289 | fmax=None, ref_db=hparams['ref_db'], 290 | center=hparams['center']): 291 | from scipy.fftpack import dct 292 | wav_arr = preempahsis(wav_arr) 293 | # 经过一次滤波 294 | power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, 295 | win_len=win_len, window=window, center=center)['power'] 296 | linear = _amp_to_db(power_spec, ref_db=ref_db) # 对数mel谱 297 | normalized_linear = _db_normalize(linear, min_db=hparams['min_db']) 298 | x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, 299 | win_len=win_len, window=window, center=center)['stft'] 300 | 301 | 302 | return normalized_linear,x_stft 303 | 304 | def _amp_to_db(x,ref_db=20): 305 | return 20 * np.log10(np.maximum(1e-5, x)) + ref_db 306 | 307 | 308 | def mel2log_mel(mel_spec, ref_db=hparams['ref_db'], min_db=hparams['min_db']): 309 | log_mel = power2db(mel_spec, ref_db) 310 | normalized = log_power_normalize(log_mel, min_db) 311 | return normalized 312 | 313 | def power2db(power_spec, ref_db=hparams['ref_db'], tol=1e-5): 314 | # power spectrogram is stft ** 2 315 | # returned value: (10. * log10(power_spec) - ref_db) 316 | return 10. * np.log10(power_spec + tol) - ref_db 317 | 318 | def db2power(power_db, ref_db=hparams['ref_db']): 319 | return np.power(10.0, 0.1 * (power_db + ref_db)) 320 | # 321 | # def db2power_tf(power_db, ref_db=hparams['ref_db']): 322 | # return tf.pow(10.0, 0.1 * (power_db + ref_db)) 323 | 324 | def log_power_normalize(log_power, min_db=hparams['min_db']): 325 | """ 326 | :param log_power: in db, computed by power2db(spectrogram(wav_arr)['power']) 327 | :param min_db: minimum value of log_power in db 328 | :return: log_power normalized to [0., 1.] 329 | """ 330 | assert min_db < 0. or "min_db should be a negative value like -80.0 or -100.0" 331 | return np.clip((log_power - min_db) / -min_db, 0., 1.) 332 | 333 | def log_power_denormalize(normalized_logpower, min_db=hparams['min_db']): 334 | return np.clip(normalized_logpower, 0., 1.) * -min_db + min_db 335 | 336 | # def log_power_denormalize_tf(normalized_logpower, min_db=hparams['min_db']): 337 | # return tf.clip_by_value(normalized_logpower, 0., 1.) * -min_db + min_db 338 | 339 | def griffin_lim(magnitude_spec, iterations=hparams['iterations']): 340 | """ 341 | :param magnitude_spec: magnitude spectrogram of shape [time, n_freqs] 342 | obtained from spectrogram(wav_arr)['magnitude] 343 | :param iterations: number of iterations to estimate phase 344 | :return: waveform array 345 | """ 346 | mag = magnitude_spec.T # transpose to [n_freqs, time] 347 | angles = np.exp(2j * np.pi * np.random.rand(*mag.shape)) 348 | complex_mag = np.abs(mag).astype(np.complex) 349 | stft_0 = complex_mag * angles 350 | y = istft(stft_0) 351 | for i in range(iterations): 352 | angles = np.exp(1j * np.angle(stft(y))) 353 | y = istft(complex_mag * angles) 354 | return y 355 | 356 | # def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']): 357 | # # magnitude_spec: [frames, fft_bins], of type tf.float32 358 | # angles = tf.cast( 359 | # tf.exp(2j * np.pi * tf.cast( 360 | # tf.random_uniform( 361 | # tf.shape(magnitude_spec)), 362 | # dtype=tf.complex64)), 363 | # dtype=tf.complex64) 364 | # complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64) 365 | # stft_0 = complex_mag * angles 366 | # y = istft_tf(stft_0) 367 | # for i in range(iterations): 368 | # angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64)) 369 | # y = istft_tf(complex_mag * angles) 370 | # return y 371 | 372 | def griffin_lim_test(wav_f, n_fft=hparams['n_fft'], 373 | hop_len=hparams['hop_length'], 374 | win_len=hparams['win_length'], 375 | window=hparams['window']): 376 | wav_arr = load_wav(wav_f) 377 | spec_dict = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, 378 | win_len=win_len, window=window) 379 | mag_spec = spec_dict['magnitude'] 380 | y = griffin_lim(mag_spec) 381 | write_wav('reconstructed1.wav', y, sr=16000) 382 | 383 | def stft2wav_test(stft_f, mean_f, std_f): 384 | spec = np.load(stft_f) 385 | mean = np.load(mean_f) 386 | std = np.load(std_f) 387 | spec = spec * std + mean 388 | spec = log_power_denormalize(spec) 389 | power_spec = db2power(spec) 390 | mag_spec = power_spec ** 0.5 391 | y = griffin_lim(mag_spec) 392 | y = deemphasis(y) 393 | write_wav('reconstructed2.wav', y, sr=16000) 394 | return y 395 | # 396 | # def stft2wav_tf_test(stft_f, mean_f, std_f): 397 | # # get inputs 398 | # spec = np.load(stft_f) 399 | # mean = np.load(mean_f) 400 | # std = np.load(std_f) 401 | # spec = spec * std + mean 402 | # # build graph 403 | # spec_pl = tf.placeholder(tf.float32, [None, None, 513]) 404 | # denormalized = log_power_denormalize_tf(spec_pl) 405 | # mag_spec = tf.pow(db2power_tf(denormalized), 0.5) 406 | # wav = grinffin_lim_tf(mag_spec) 407 | # # set session and run 408 | # config = tf.ConfigProto() 409 | # config.gpu_options.allow_growth = True 410 | # sess = tf.Session(config=config) 411 | # wav_arr = sess.run(wav, feed_dict={spec_pl: np.expand_dims(spec, axis=0)}) 412 | # sess.close() 413 | # y = deemphasis(np.squeeze(wav_arr)) 414 | # write_wav('reconstructed_tf.wav', y, sr=16000) 415 | # return y 416 | 417 | # 超参数个数:1 418 | # return: db normalized to [0., 1.] 419 | def _db_normalize(db, min_db): 420 | return np.clip((db - min_db) / -min_db, 0., 1.) 421 | 422 | 423 | 424 | 425 | def mfcc_test(): 426 | wav_f = './test.wav' 427 | wav_arr = load_wav(wav_f) 428 | 429 | 430 | mfcc = wav2mfcc_v2(wav_arr) 431 | mfcc1 = np.load('test.npy') 432 | print(mfcc.min(), mfcc1.min()) 433 | print(mfcc.max(), mfcc1.max()) 434 | print(mfcc.mean(), mfcc1.mean()) 435 | print(np.abs(mfcc - mfcc1)) 436 | print(np.mean(np.abs(mfcc - mfcc1))) 437 | import matplotlib.pyplot as plt 438 | plt.figure() 439 | plt.subplot(211) 440 | plt.imshow(mfcc.T, origin='lower') 441 | # plt.colorbar() 442 | plt.subplot(212) 443 | plt.imshow(mfcc1.T, origin='lower') 444 | # plt.colorbar() 445 | plt.tight_layout() 446 | plt.show() 447 | return 448 | 449 | 450 | 451 | if __name__ == '__main__': 452 | mfcc_test() 453 | -------------------------------------------------------------------------------- /wenet/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/__init__.py -------------------------------------------------------------------------------- /wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/utils/cmvn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) 2020 Mobvoi Inc (Binbin Zhang) 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import json 17 | import math 18 | 19 | import numpy as np 20 | 21 | 22 | def _load_json_cmvn(json_cmvn_file): 23 | """ Load the json format cmvn stats file and calculate cmvn 24 | 25 | Args: 26 | json_cmvn_file: cmvn stats file in json format 27 | 28 | Returns: 29 | a numpy array of [means, vars] 30 | """ 31 | with open(json_cmvn_file) as f: 32 | cmvn_stats = json.load(f) 33 | 34 | means = cmvn_stats['mean_stat'] 35 | variance = cmvn_stats['var_stat'] 36 | count = cmvn_stats['frame_num'] 37 | for i in range(len(means)): 38 | means[i] /= count 39 | variance[i] = variance[i] / count - means[i] * means[i] 40 | if variance[i] < 1.0e-20: 41 | variance[i] = 1.0e-20 42 | variance[i] = 1.0 / math.sqrt(variance[i]) 43 | cmvn = np.array([means, variance]) 44 | return cmvn 45 | 46 | 47 | def _load_kaldi_cmvn(kaldi_cmvn_file): 48 | """ Load the kaldi format cmvn stats file and calculate cmvn 49 | 50 | Args: 51 | kaldi_cmvn_file: kaldi text style global cmvn file, which 52 | is generated by: 53 | compute-cmvn-stats --binary=false scp:feats.scp global_cmvn 54 | 55 | Returns: 56 | a numpy array of [means, vars] 57 | """ 58 | means = [] 59 | variance = [] 60 | with open(kaldi_cmvn_file, 'r') as fid: 61 | # kaldi binary file start with '\0B' 62 | if fid.read(2) == '\0B': 63 | logging.error('kaldi cmvn binary file is not supported, please ' 64 | 'recompute it by: compute-cmvn-stats --binary=false ' 65 | ' scp:feats.scp global_cmvn') 66 | sys.exit(1) 67 | fid.seek(0) 68 | arr = fid.read().split() 69 | assert (arr[0] == '[') 70 | assert (arr[-2] == '0') 71 | assert (arr[-1] == ']') 72 | feat_dim = int((len(arr) - 2 - 2) / 2) 73 | for i in range(1, feat_dim + 1): 74 | means.append(float(arr[i])) 75 | count = float(arr[feat_dim + 1]) 76 | for i in range(feat_dim + 2, 2 * feat_dim + 2): 77 | variance.append(float(arr[i])) 78 | 79 | for i in range(len(means)): 80 | means[i] /= count 81 | variance[i] = variance[i] / count - means[i] * means[i] 82 | if variance[i] < 1.0e-20: 83 | variance[i] = 1.0e-20 84 | variance[i] = 1.0 / math.sqrt(variance[i]) 85 | cmvn = np.array([means, variance]) 86 | return cmvn 87 | 88 | 89 | def load_cmvn(cmvn_file, is_json): 90 | if is_json: 91 | cmvn = _load_json_cmvn(cmvn_file) 92 | else: 93 | cmvn = _load_kaldi_cmvn(cmvn_file) 94 | return cmvn[0], cmvn[1] 95 | -------------------------------------------------------------------------------- /wenet/utils/common.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/common.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/utils/executor.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/utils/mask.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /y_utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/user/bin/env python 2 | # coding=utf-8 3 | """ 4 | @project : dhp-service 5 | @author : huyi 6 | @file : __init__.py.py 7 | @ide : PyCharm 8 | @time : 2021-08-18 16:29:13 9 | """ -------------------------------------------------------------------------------- /y_utils/config.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/config.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /y_utils/lcr.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/lcr.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /y_utils/liblcr.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/liblcr.so -------------------------------------------------------------------------------- /y_utils/logger.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/logger.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /y_utils/md5.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/md5.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /y_utils/time_utils.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /y_utils/tools.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/tools.cpython-38-x86_64-linux-gnu.so --------------------------------------------------------------------------------