├── .DS_Store
├── 1.jpeg
├── README.md
├── README_en.md
├── README_tts_f2f.MD
├── app.py
├── check_env
└── check_onnx_cuda.py
├── config
└── config.ini
├── download.sh
├── example
├── audio.wav
└── video.mp4
├── face_attr_detect
├── .DS_Store
├── __init__.py
└── face_attr.cpython-38-x86_64-linux-gnu.so
├── face_detect_utils
├── __init__.py
├── face_detect.cpython-38-x86_64-linux-gnu.so
├── head_pose.cpython-38-x86_64-linux-gnu.so
└── scrfd.cpython-38-x86_64-linux-gnu.so
├── face_lib
├── __init__.py
├── face_detect_and_align
│ ├── __init__.py
│ ├── face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so
│ ├── face_align_utils.cpython-38-x86_64-linux-gnu.so
│ └── scrfd_insightface
│ │ ├── __init__.py
│ │ └── scrfd.cpython-38-x86_64-linux-gnu.so
├── face_parsing
│ ├── __init__.py
│ └── face_parsing_api.cpython-38-x86_64-linux-gnu.so
└── face_restore
│ ├── __init__.py
│ └── gfpgan_onnx
│ └── gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so
├── h_utils
├── __init__.py
├── custom.cpython-38-x86_64-linux-gnu.so
├── obs_client.cpython-38-x86_64-linux-gnu.so
├── request_utils.cpython-38-x86_64-linux-gnu.so
├── sweep_bot.cpython-38-x86_64-linux-gnu.so
└── zip_utils.cpython-38-x86_64-linux-gnu.so
├── inference_from_text.sh
├── landmark2face_wy
├── audio_handler.cpython-38-x86_64-linux-gnu.so
├── checkpoints
│ └── test
│ │ └── opt.txt
├── data
│ ├── Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so
│ ├── Facereala3dmmexp512_dataset.py
│ ├── Facereala3dmmexpwenet512_dataset.py
│ ├── __init__.py
│ ├── base_dataset.cpython-38-x86_64-linux-gnu.so
│ ├── image_folder.cpython-38-x86_64-linux-gnu.so
│ ├── l2faceaudio512_dataset.py
│ └── l2faceaudio_dataset.py
├── digitalhuman_interface.cpython-38-x86_64-linux-gnu.so
├── loss
│ ├── __init__.py
│ └── perceptual.cpython-38-x86_64-linux-gnu.so
├── models
│ ├── DINet.cpython-38-x86_64-linux-gnu.so
│ ├── __init__.py
│ ├── base_function.cpython-38-x86_64-linux-gnu.so
│ ├── base_model.cpython-38-x86_64-linux-gnu.so
│ ├── face3d2face_model.cpython-38-x86_64-linux-gnu.so
│ ├── face_model.cpython-38-x86_64-linux-gnu.so
│ ├── l2faceaudio_model.cpython-38-x86_64-linux-gnu.so
│ ├── networks.cpython-38-x86_64-linux-gnu.so
│ ├── networks_HD.cpython-38-x86_64-linux-gnu.so
│ ├── networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so
│ ├── pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so
│ └── pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so
├── options
│ ├── __init__.py
│ ├── base_options.cpython-38-x86_64-linux-gnu.so
│ ├── test_options.cpython-38-x86_64-linux-gnu.so
│ └── train_options.cpython-38-x86_64-linux-gnu.so
├── sync_batchnorm
│ ├── __init__.py
│ ├── batchnorm.cpython-38-x86_64-linux-gnu.so
│ ├── batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so
│ ├── comm.cpython-38-x86_64-linux-gnu.so
│ ├── replicate.cpython-38-x86_64-linux-gnu.so
│ └── unittest.cpython-38-x86_64-linux-gnu.so
├── test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so
├── test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so
└── util
│ ├── __init__.py
│ ├── flow_util.cpython-38-x86_64-linux-gnu.so
│ ├── get_data.cpython-38-x86_64-linux-gnu.so
│ ├── html.cpython-38-x86_64-linux-gnu.so
│ ├── image_pool.cpython-38-x86_64-linux-gnu.so
│ ├── util.cpython-38-x86_64-linux-gnu.so
│ └── visualizer.cpython-38-x86_64-linux-gnu.so
├── license.txt
├── log
└── dh.log
├── model_lib
├── __init__.py
├── base_wrapper
│ ├── __init__.py
│ └── onnx_model.cpython-38-x86_64-linux-gnu.so
└── model_base.py
├── preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so
├── requirements.txt
├── requirements_0.txt
├── run.py
├── service
├── __init__.py
├── server.cpython-38-x86_64-linux-gnu.so
└── trans_dh_service.cpython-38-x86_64-linux-gnu.so
├── sources.list
├── wenet
├── compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so
├── examples
│ └── aishell
│ │ └── aidata
│ │ └── conf
│ │ ├── train_conformer_multi_cn.yaml
│ │ └── train_conformer_multi_cn_linear.yaml
├── tools
│ └── _extract_feats.py
├── transformer
│ ├── __init__.py
│ ├── asr_model.cpython-38-x86_64-linux-gnu.so
│ ├── attention.cpython-38-x86_64-linux-gnu.so
│ ├── cmvn.cpython-38-x86_64-linux-gnu.so
│ ├── convolution.cpython-38-x86_64-linux-gnu.so
│ ├── ctc.cpython-38-x86_64-linux-gnu.so
│ ├── decoder.cpython-38-x86_64-linux-gnu.so
│ ├── decoder_layer.cpython-38-x86_64-linux-gnu.so
│ ├── embedding.cpython-38-x86_64-linux-gnu.so
│ ├── encoder.cpython-38-x86_64-linux-gnu.so
│ ├── encoder_layer.cpython-38-x86_64-linux-gnu.so
│ ├── label_smoothing_loss.cpython-38-x86_64-linux-gnu.so
│ ├── positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so
│ ├── subsampling.cpython-38-x86_64-linux-gnu.so
│ └── swish.cpython-38-x86_64-linux-gnu.so
└── utils
│ ├── checkpoint.cpython-38-x86_64-linux-gnu.so
│ ├── cmvn.py
│ ├── common.cpython-38-x86_64-linux-gnu.so
│ ├── ctc_util.cpython-38-x86_64-linux-gnu.so
│ ├── executor.cpython-38-x86_64-linux-gnu.so
│ ├── mask.cpython-38-x86_64-linux-gnu.so
│ └── scheduler.cpython-38-x86_64-linux-gnu.so
├── xseg
└── dfl_xseg_api.cpython-38-x86_64-linux-gnu.so
└── y_utils
├── __init__.py
├── config.cpython-38-x86_64-linux-gnu.so
├── lcr.cpython-38-x86_64-linux-gnu.so
├── liblcr.so
├── logger.cpython-38-x86_64-linux-gnu.so
├── md5.cpython-38-x86_64-linux-gnu.so
├── time_utils.cpython-38-x86_64-linux-gnu.so
└── tools.cpython-38-x86_64-linux-gnu.so
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/.DS_Store
--------------------------------------------------------------------------------
/1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/1.jpeg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | [](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
3 | 
4 | 
5 |
6 | **[中文](#chinese-version)** | **[English](README_en.md)**
7 |
8 | ---
9 |
10 |
11 |
12 |
13 | # HeyGem-Linux-Python-Hack
14 |
15 | ## 项目简介
16 |
17 | HeyGem-Linux-Python-Hack 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
18 |
19 | [RTX 50版本已经发布,点击可达](https://github.com/Holasyb918/HeyGem-Linux-Python-Hack-RTX-50)
20 | [Text To Face] 如果你需要较为完整的 HeyGem,即从 TTS 到数字人,那么你可以参考 [这里](README_tts_f2f.MD)
21 |
22 | **如果你觉得这个项目对你有帮助,欢迎给我们 Star!**
23 | **如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!**
24 | **本项目中,所有 .so 文件均由硅基编译,与开发者无关**
25 | **本项目中,所有模型均由硅基提供,与开发者无关**
26 |
27 | ## 主要特性
28 |
29 | * 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。
30 | * 无需 Windows: 完全基于 Linux 开发和测试。
31 | * Python 驱动: 使用 Python 语言开发,易于理解和扩展。
32 | * 开发者友好: 易于使用和扩展。
33 | * 完全离线。
34 |
35 | 微信群
36 | 
37 |
38 | ## 开始使用
39 |
40 | ### 安装
41 | #### 环境
42 | 本项目**支持且仅支持 Linux & python3.8 环境**
43 | 请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后,使用 pip 安装项目依赖项
44 | **备用** 同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt),遇到问题的话,你可以参考它来建立一个新的环境。
45 | **具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合,否则仍旧可能遇到问题。**
46 | **请尽量不要询问任何关于 pip 的问题,感谢合作**
47 | **如果你遇到了环境难以搭建完成的问题,建议参考 [autodl 环境](https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/issues/43), 备注: 开发者与 autodl 无任何利益相关**
48 |
49 |
50 | ```bash
51 | # 直接安装整个 requirements.txt 不一定成功,更建议跑代码观察报错信息,然后根据报错信息结合 requirements 去尝试安装,祝你顺利。
52 | # pip install -r requirements.txt
53 | ```
54 |
55 | ### 使用
56 | 把项目克隆到本地
57 | ```bash
58 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
59 | cd HeyGem-Linux-Python-Hack
60 | bash download.sh
61 | ```
62 | #### 开始使用
63 | * repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。
64 | #### command:
65 | ```bash
66 | python run.py
67 | ```
68 |
69 | * 如果要使用自己的数据,可以外部传入参数,请注意,**path 是本地文件,且仅支持相对路径**.
70 |
71 | #### command:
72 | ```bash
73 | python run.py --audio_path example/audio.wav --video_path example/video.mp4
74 | ```
75 | #### gradio:
76 | ```bash
77 | python app.py
78 | # 请等待模型初始化完成后提交任务
79 | ```
80 |
81 | ## QA
82 | ### 1. 多个人脸报错
83 | 下载新的人脸检测模型,替换原本的人脸检测模型或许可以解决。
84 | ```bash
85 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_10g_kps.onnx
86 | mv face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx.bak
87 | mv scrfd_10g_kps.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
88 | ```
89 | ### 2. 初始化报错
90 |
91 | 有较高概率是 onnxruntime-gpu 版本不匹配导致的。
92 | ```bash
93 | python check_env/check_onnx_cuda.py
94 | ```
95 | 观察输出是否包括 successfully.
96 | 如果遇到问题,你可以尝试以下方法:
97 | 1. 建议根据自己 cuda 等环境尝试更换一些版本。
98 | 2. 如果难以解决,先卸载 onnxruntime-gpu 和 onnxruntime,然后使用 conda 安装 cudatoolkit 环境,然后再尝试 pip 安装 onnxruntime-gpu。
99 |
100 | 验证可行版本如下:
101 | | cudatoolkit | onnxruntime-gpu | 备注 |
102 | | --- | --- | --- |
103 | | 11.8.0 | 1.16.0 | |
104 |
105 | ### 3. ImportError: cannot import name check_argument_types
106 | 缺包
107 | ```bash
108 | pip install typeguard
109 | ```
110 |
111 | ### 4. library.so 找不到
112 | 报错一般是类似于 Could not load library libcublasLt.so.11. Error: libcublasLt.so.11: cannot open shared object file: No such file or directory
113 |
114 | 执行以下命令查看是否有改文件
115 | ```
116 | sudo find /usr -name "libcublasLt.so.11"
117 | ```
118 | 没有的话,应该需要安装对应版本的cuda
119 | 如果有的话就把第一步查看的文件路径添加到环境变量
120 | ```
121 | export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
122 | ```
123 | 永久生效就添加到 ~/.bashrc 里面然后 source ~/.bashrc 一下
124 |
125 | ## Contributing
126 | 欢迎贡献!
127 |
128 | ## License
129 | 参考 heyGem.ai 的协议.
130 |
--------------------------------------------------------------------------------
/README_en.md:
--------------------------------------------------------------------------------
1 |
2 | [](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
3 | 
4 | 
5 |
6 | **[中文](./readme.md)** | **[English](#english-version)**
7 |
8 | ---
9 |
10 |
11 |
12 | # HeyGem-Linux-Python-Hack
13 |
14 | ## Introduction
15 |
16 | [HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution.
17 |
18 | **Feel free to Star us if you find this project useful!**
19 | **Please submit an Issue if you run into any problems!**
20 |
21 | ## Key Features
22 |
23 | * No Docker Required: Runs directly on Linux systems, simplifying the deployment process.
24 | * No Windows Required: Fully developed and tested on Linux.
25 | * Python Powered: Developed using the Python language, making it easy to understand and extend.
26 | * Developer-Friendly: Easy to use, and easy to extend.
27 |
28 | ## Getting Started
29 |
30 | ### Installation
31 |
32 | Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip:
33 |
34 | ```bash
35 | pip install -r requirements.txt
36 | ```
37 |
38 | ### Usage
39 | Clone this repository to your local machine:
40 | ```bash
41 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
42 | cd HeyGem-Linux-Python-Hack
43 | bash download.sh
44 | ```
45 | #### Getting Started
46 | * Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly.
47 | #### Command:
48 | ```bash
49 | python run.py
50 | ```
51 | * If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.**
52 | #### command:
53 | ```bash
54 | python run.py --audio_path example/audio.wav --video_path example/video.mp4
55 | ```
56 | #### gradio:
57 | ```bash
58 | python app.py
59 | # Please wait until processor init done.
60 | ```
61 |
62 | ## Contributing
63 | Contributions are welcome!
64 |
65 | ## License
66 | This project is licensed under the HeyGem.ai License.
67 |
--------------------------------------------------------------------------------
/README_tts_f2f.MD:
--------------------------------------------------------------------------------
1 |
2 | [](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
3 | 
4 | 
5 |
6 | **[中文](#chinese-version)** | **[English](README_en.md)**
7 |
8 | ---
9 |
10 |
11 |
12 | # HeyGem-Linux-Python-Hack
13 |
14 | ## 项目简介
15 |
16 | [HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
17 |
18 | **如果你觉得这个项目对你有帮助,欢迎给我们 Star!**
19 | **如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!**
20 |
21 | ## 主要特性
22 |
23 | * 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。
24 | * 无需 Windows: 完全基于 Linux 开发和测试。
25 | * Python 驱动: 使用 Python 语言开发,易于理解和扩展。
26 | * 开发者友好: 易于使用和扩展。
27 | * 完全离线。
28 |
29 | ## 开始使用
30 |
31 | ### 环境
32 | 本项目包括 tts 和 face2face 两部分
33 | * tts 部分支持 3.8,事实上有更高版本更好;
34 | * face2face 部分支持且仅支持 3.8。
35 |
36 |
37 | ### 使用
38 | 把项目克隆到本地
39 | ```bash
40 | # f2f
41 | git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
42 | cd HeyGem-Linux-Python-Hack
43 | # 下载 f2f 模型
44 | bash download.sh
45 |
46 | # tts
47 | git clone https://github.com/Holasyb918/tts-fish-speech
48 | cd tts-fish-speech
49 | # 下载 tts 模型
50 | huggingface-cli download fishaudio/fish-speech-1.5 --local-dir checkpoints/fish-speech-1.5/
51 | ```
52 |
53 | ### 安装环境
54 | 请参考 [requirements.txt](https://github.com/Holasyb918/tts-fish-speech/blob/main/requirements.txt) 并结合你的实际环境来搭建环境,如果单个环境难以满足,tts 可以使用常规的环境,不要求 3.8,但你可能需要分步完成从 text 到数字人的整个流程。
55 |
56 | #### 开始使用
57 | * repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。
58 | 把你需要生成的文本放在 [example/text.txt](example/text.txt) 中,把要克隆的音色放在 [example/audio.wav](example/audio.wav) 中,然后运行以下命令:
59 | #### command:
60 | ```bash
61 | bash inference_from_text.sh example/audio.wav example/text.txt example/video.mp4
62 | # 音色 wav TTS 文本 视频
63 | ```
64 |
65 |
66 | ## QA
67 | ### 1. 多个人脸报错
68 | 下载新的人脸检测模型,替换原本的人脸检测模型或许可以解决。
69 | ```bash
70 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_10g_kps.onnx
71 | mv face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx.bak
72 | mv scrfd_10g_kps.onnx face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
73 | ```
74 | ### 2. 初始化报错
75 |
76 | 有较高概率是 onnxruntime-gpu 版本不匹配导致的。
77 | ```bash
78 | python check_env/check_onnx_cuda.py
79 | ```
80 | 观察输出是否包括 successfully.
81 | 如果遇到问题,你可以尝试以下方法:
82 | 1. 建议根据自己 cuda 等环境尝试更换一些版本。
83 | 2. 如果难以解决,先卸载 onnxruntime-gpu 和 onnxruntime,然后使用 conda 安装 cudatoolkit 环境,然后再尝试 pip 安装 onnxruntime-gpu。
84 |
85 | 验证可行版本如下:
86 | | cudatoolkit | onnxruntime-gpu | 备注 |
87 | | --- | --- | --- |
88 | | 11.8.0 | 1.16.0 | |
89 |
90 | ### 3. ImportError: cannot import name check_argument_types
91 | 缺包
92 | ```bash
93 | pip install typeguard
94 | ```
95 |
96 | ## Contributing
97 | 欢迎贡献!
98 |
99 | ## License
100 | 参考 heyGem.ai 的协议.
101 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import gc
3 | import json
4 | import os
5 |
6 | os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
7 | import subprocess
8 | import threading
9 | import time
10 | import traceback
11 | import uuid
12 | from enum import Enum
13 | import queue
14 | import shutil
15 | from functools import partial
16 |
17 | import cv2
18 | import gradio as gr
19 | from flask import Flask, request
20 |
21 | import service.trans_dh_service
22 | from h_utils.custom import CustomError
23 | from y_utils.config import GlobalConfig
24 | from y_utils.logger import logger
25 |
26 |
27 | def write_video_gradio(
28 | output_imgs_queue,
29 | temp_dir,
30 | result_dir,
31 | work_id,
32 | audio_path,
33 | result_queue,
34 | width,
35 | height,
36 | fps,
37 | watermark_switch=0,
38 | digital_auth=0,
39 | temp_queue=None,
40 | ):
41 | output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
42 | fourcc = cv2.VideoWriter_fourcc(*"mp4v")
43 | result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
44 | video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
45 | print("Custom VideoWriter init done")
46 | try:
47 | while True:
48 | state, reason, value_ = output_imgs_queue.get()
49 | if type(state) == bool and state == True:
50 | logger.info(
51 | "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
52 | )
53 | logger.info(
54 | "Custom VideoWriter Silence Video saved in {}".format(
55 | os.path.realpath(output_mp4)
56 | )
57 | )
58 | video_write.release()
59 | break
60 | else:
61 | if type(state) == bool and state == False:
62 | logger.error(
63 | "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
64 | work_id, reason
65 | )
66 | )
67 | raise CustomError(reason)
68 | for result_img in value_:
69 | video_write.write(result_img)
70 | if video_write is not None:
71 | video_write.release()
72 | if watermark_switch == 1 and digital_auth == 1:
73 | logger.info(
74 | "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
75 | )
76 | if width > height:
77 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
78 | audio_path,
79 | output_mp4,
80 | GlobalConfig.instance().watermark_path,
81 | GlobalConfig.instance().digital_auth_path,
82 | result_path,
83 | )
84 | logger.info("command:{}".format(command))
85 | else:
86 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
87 | audio_path,
88 | output_mp4,
89 | GlobalConfig.instance().watermark_path,
90 | GlobalConfig.instance().digital_auth_path,
91 | result_path,
92 | )
93 | logger.info("command:{}".format(command))
94 | elif watermark_switch == 1 and digital_auth == 0:
95 | logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
96 | command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
97 | audio_path,
98 | output_mp4,
99 | GlobalConfig.instance().watermark_path,
100 | result_path,
101 | )
102 | logger.info("command:{}".format(command))
103 | elif watermark_switch == 0 and digital_auth == 1:
104 | logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
105 | if width > height:
106 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
107 | audio_path,
108 | output_mp4,
109 | GlobalConfig.instance().digital_auth_path,
110 | result_path,
111 | )
112 | logger.info("command:{}".format(command))
113 | else:
114 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
115 | audio_path,
116 | output_mp4,
117 | GlobalConfig.instance().digital_auth_path,
118 | result_path,
119 | )
120 | logger.info("command:{}".format(command))
121 | else:
122 | command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
123 | audio_path, output_mp4, result_path
124 | )
125 | logger.info("Custom command:{}".format(command))
126 | subprocess.call(command, shell=True)
127 | print("###### Custom Video Writer write over")
128 | print(f"###### Video result saved in {os.path.realpath(result_path)}")
129 | result_queue.put([True, result_path])
130 | # temp_queue.put([True, result_path])
131 | except Exception as e:
132 | logger.error(
133 | "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format(
134 | work_id, e.__str__()
135 | )
136 | )
137 | result_queue.put(
138 | [
139 | False,
140 | "[{}]视频帧队列处理异常结束,异常原因:[{}]".format(
141 | work_id, e.__str__()
142 | ),
143 | ]
144 | )
145 | logger.info("Custom VideoWriter 后处理进程结束")
146 |
147 |
148 | service.trans_dh_service.write_video = write_video_gradio
149 |
150 |
151 | class VideoProcessor:
152 | def __init__(self):
153 | self.task = service.trans_dh_service.TransDhTask()
154 | self.basedir = GlobalConfig.instance().result_dir
155 | self.is_initialized = False
156 | self._initialize_service()
157 | print("VideoProcessor init done")
158 |
159 | def _initialize_service(self):
160 | logger.info("开始初始化 trans_dh_service...")
161 | try:
162 | time.sleep(5)
163 | logger.info("trans_dh_service 初始化完成。")
164 | self.is_initialized = True
165 | except Exception as e:
166 | logger.error(f"初始化 trans_dh_service 失败: {e}")
167 |
168 | def process_video(
169 | self, audio_file, video_file, watermark=False, digital_auth=False
170 | ):
171 | while not self.is_initialized:
172 | logger.info("服务尚未完成初始化,等待 1 秒...")
173 | time.sleep(1)
174 | work_id = str(uuid.uuid1())
175 | code = work_id
176 | temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id)
177 | result_dir = GlobalConfig.instance().result_dir
178 | video_writer_thread = None
179 | final_result = None
180 |
181 | try:
182 | cap = cv2.VideoCapture(video_file)
183 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
184 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
185 | fps = cap.get(cv2.CAP_PROP_FPS)
186 | cap.release()
187 |
188 | audio_path = audio_file
189 | video_path = video_file
190 |
191 | self.task.task_dic[code] = ""
192 | self.task.work(audio_path, video_path, code, 0, 0, 0, 0)
193 |
194 | result_path = self.task.task_dic[code][2]
195 | final_result_dir = os.path.join("result", code)
196 | os.makedirs(final_result_dir, exist_ok=True)
197 | os.system(f"mv {result_path} {final_result_dir}")
198 | os.system(
199 | f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}"
200 | )
201 | result_path = os.path.realpath(
202 | os.path.join(final_result_dir, os.path.basename(result_path))
203 | )
204 | return result_path
205 |
206 | except Exception as e:
207 | logger.error(f"处理视频时发生错误: {e}")
208 | raise gr.Error(str(e))
209 |
210 |
211 | if __name__ == "__main__":
212 | processor = VideoProcessor()
213 |
214 | inputs = [
215 | gr.File(label="上传音频文件/upload audio file"),
216 | gr.File(label="上传视频文件/upload video file"),
217 | ]
218 | outputs = gr.Video(label="生成的视频/Generated video")
219 |
220 | title = "数字人视频生成/Digital Human Video Generation"
221 | description = "上传音频和视频文件,即可生成数字人视频。/Upload audio and video files to generate digital human videos."
222 |
223 | demo = gr.Interface(
224 | fn=processor.process_video,
225 | inputs=inputs,
226 | outputs=outputs,
227 | title=title,
228 | description=description,
229 | )
230 | demo.queue().launch()
231 |
--------------------------------------------------------------------------------
/check_env/check_onnx_cuda.py:
--------------------------------------------------------------------------------
1 | import onnxruntime
2 | import numpy as np
3 |
4 | def check_gpu_usage():
5 | """
6 | Checks if ONNX Runtime can use the GPU by attempting to create an InferenceSession
7 | with the CUDAExecutionProvider.
8 |
9 | Returns:
10 | True if GPU is likely being used, False otherwise.
11 | """
12 | providers = ("CUDAExecutionProvider",
13 | {"device_id": 0})
14 | session_options = onnxruntime.SessionOptions()
15 | session_options.log_severity_level = 3
16 | onnx_path = "./face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx"
17 | onnx_session = onnxruntime.InferenceSession(onnx_path, session_options, providers=[providers])
18 | print(onnx_session.get_providers())
19 | return "CUDAExecutionProvider" in onnx_session.get_providers(), onnx_session
20 |
21 | if __name__ == "__main__":
22 | is_cuda, onnx_session = check_gpu_usage()
23 | if is_cuda:
24 | print("ONNX Runtime is successfully using the GPU.")
25 | inp = np.random.randn(1, 3, 640, 640).astype(np.float32)
26 | ort_inputs = {onnx_session.get_inputs()[0].name: inp}
27 | ort_outs = onnx_session.run(None, ort_inputs)
28 | print(ort_outs[0].shape)
29 | else:
30 | print("ONNX Runtime is NOT using the GPU or there was an error initializing the CUDA provider.")
31 | print("Please ensure that:")
32 | print("- You have installed the 'onnxruntime-gpu' package.")
33 | print("- You have a compatible NVIDIA GPU with appropriate drivers installed.")
34 | print("- CUDA and cuDNN are installed and correctly configured in your system.")
35 | print("- The versions of CUDA, cuDNN, and the NVIDIA drivers are compatible with the 'onnxruntime-gpu' version you have installed.")
36 | print("- The ONNX Runtime build you are using supports CUDA.")
37 |
--------------------------------------------------------------------------------
/config/config.ini:
--------------------------------------------------------------------------------
1 | [log]
2 | log_dir = ./log
3 | log_file = dh.log
4 |
5 | [http_server]
6 | server_ip = 0.0.0.0
7 | server_port = 8383
8 |
9 | [temp]
10 | temp_dir = ./
11 | clean_switch = 1
12 |
13 | [result]
14 | result_dir = ./result
15 | clean_switch = 0
16 |
17 | [digital]
18 | batch_size = 4
19 |
20 | [register]
21 | url = http://172.16.160.51:12120
22 | report_interval = 10
23 | enable=0
24 |
--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | set -u
3 |
4 | # face attr
5 | mkdir -p face_attr_detect
6 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx
7 |
8 | # face detect
9 | mkdir -p face_detect_utils/resources
10 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
11 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
12 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx
13 |
14 | # dh model
15 | mkdir -p landmark2face_wy/checkpoints/anylang
16 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
17 |
18 | # face parsing
19 | mkdir -p pretrain_models/face_lib/face_parsing
20 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx
21 |
22 | # gfpgan
23 | mkdir -p pretrain_models/face_lib/face_restore/gfpgan
24 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
25 |
26 | # xseg
27 | mkdir -p xseg
28 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx
29 |
30 | # wenet
31 | mkdir -p wenet/examples/aishell/aidata/exp/conformer
32 | wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt
--------------------------------------------------------------------------------
/example/audio.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/example/audio.wav
--------------------------------------------------------------------------------
/example/video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/example/video.mp4
--------------------------------------------------------------------------------
/face_attr_detect/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_attr_detect/.DS_Store
--------------------------------------------------------------------------------
/face_attr_detect/__init__.py:
--------------------------------------------------------------------------------
1 | from .face_attr import FaceAttr
2 |
--------------------------------------------------------------------------------
/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_detect_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/__init__.py
--------------------------------------------------------------------------------
/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/__init__.py
--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .face_align_5_landmarks import FaceDetect5Landmarks
2 | from .face_align_utils import estimate_norm
3 |
4 |
--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/scrfd_insightface/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2021/11/10
3 |
4 |
5 | from .scrfd import SCRFD
--------------------------------------------------------------------------------
/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_lib/face_parsing/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/3/29
3 |
4 |
5 | from .face_parsing_api import FaceParsing
6 | # from .dfl_xseg_net import XsegNet
7 |
--------------------------------------------------------------------------------
/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/face_lib/face_restore/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN
3 |
--------------------------------------------------------------------------------
/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/h_utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/user/bin/env python
2 | # coding=utf-8
3 | """
4 | @project : dhp-service
5 | @author : huyi
6 | @file : __init__.py.py
7 | @ide : PyCharm
8 | @time : 2021-08-18 15:45:13
9 | """
--------------------------------------------------------------------------------
/h_utils/custom.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/custom.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/inference_from_text.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | set -u
3 |
4 | ref_audio=$1
5 | text_path=$2
6 | ref_mp4=$3
7 |
8 | pwd=$(pwd)
9 | echo "ref_audio: ${ref_audio}"
10 | echo "text_path: ${text_path}"
11 | echo "ref_mp4: ${ref_mp4}"
12 | echo "pwd: ${pwd}"
13 |
14 | real_ref_audio=$(realpath ${ref_audio})
15 | real_text_path=$(realpath ${text_path})
16 | real_ref_mp4=$(realpath ${ref_mp4})
17 |
18 | echo "real_ref_audio: ${real_ref_audio}"
19 | echo "real_text_path: ${real_text_path}"
20 | echo "real_ref_mp4: ${real_ref_mp4}"
21 |
22 | # tts
23 | cd tts-fish-speech
24 | echo bash run.sh ${real_ref_audio} ${real_text_path}
25 | bash run.sh ${real_ref_audio} ${real_text_path}
26 |
27 | # f2f
28 | cd ${pwd}
29 | mv tts-fish-speech/fake.wav example/fake.wav
30 |
31 | python run.py --audio_path example/fake.wav --video_path ${ref_mp4}
32 |
--------------------------------------------------------------------------------
/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/checkpoints/test/opt.txt:
--------------------------------------------------------------------------------
1 | ----------------- Options ---------------
2 | aspect_ratio: 1.0
3 | audio_feature: 3dmm
4 | batch_size: 16
5 | checkpoints_dir: ./landmark2face_wy/checkpoints
6 | crop_size: 256
7 | dataroot: ./data
8 | dataset_mode: Facereala3dmm
9 | direction: AtoB
10 | display_winsize: 256
11 | distributed: False
12 | epoch: latest
13 | eval: False
14 | feat_num: 3
15 | feature_path: ../AnnI_deep3dface_256_contains_id/
16 | fp16: False
17 | gpu_ids: 0
18 | img_size: 256
19 | init_gain: 0.02
20 | init_type: normal
21 | input_nc: 3
22 | instance_feat: False
23 | isTrain: False [default: None]
24 | label_feat: False
25 | lan_size: 1
26 | load_features: False
27 | load_iter: 0 [default: 0]
28 | load_size: 286
29 | local_rank: -1
30 | max_dataset_size: inf
31 | mfcc0_rate: 0.2
32 | model: pirender_3dmm_mouth_hd
33 | model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
34 | n_blocks: 9
35 | n_blocks_global: 9
36 | n_blocks_local: 3
37 | n_clusters: 10
38 | n_downsample_E: 4
39 | n_downsample_global: 4
40 | n_layers_D: 3
41 | n_local_enhancers: 1
42 | name: test
43 | ndf: 64
44 | nef: 16
45 | netD: basic
46 | netG: pirender
47 | ngf: 64
48 | niter_fix_global: 0
49 | no_dropout: True
50 | no_flip: False
51 | no_ganFeat_loss: False
52 | no_instance: False
53 | norm: instance
54 | ntest: inf
55 | num_D: 2
56 | num_test: 50
57 | num_threads: 4
58 | output_nc: 3
59 | perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
60 | perceptual_network: vgg19
61 | perceptual_num_scales: 4
62 | perceptual_use_style_loss: True
63 | perceptual_weights: [4, 4, 4, 4, 4]
64 | phase: test
65 | preprocess: resize_and_crop
66 | resize_size: 512
67 | results_dir: ./results/
68 | serial_batches: False
69 | suffix:
70 | test_audio_path: None
71 | test_muban: None
72 | verbose: False
73 | weight_style_to_perceptual: 250
74 | ----------------- End -------------------
75 |
--------------------------------------------------------------------------------
/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/data/Facereala3dmmexp512_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 | from data.base_dataset import BaseDataset, get_params, get_transform
4 | import torchvision.transforms as transforms
5 | from data.image_folder import make_dataset
6 | from PIL import Image, ImageEnhance
7 | import numpy as np
8 | import cv2
9 | import torch
10 | import time
11 |
12 | def get_idts(config_name):
13 | idts = list()
14 | with open(os.path.join('../config', config_name + '.txt')) as f:
15 | for line in f:
16 | line = line.strip()
17 | video_name = line.split(':')[0]
18 | idts.append(video_name)
19 | return idts
20 |
21 |
22 | def obtain_seq_index(index, num_frames):
23 | seq = list(range(index - 13, index + 13 + 1))
24 | seq = [min(max(item, 0), num_frames - 1) for item in seq]
25 | return seq
26 |
27 | def get_3dmm_feature(img_path, idx, new_dict):
28 | id = img_path.split('/')[-3]
29 | features = new_dict[id]
30 | idx_list = obtain_seq_index(idx, features.shape[0])
31 | feature = features[idx_list, 80:144]
32 | # feature[:, -1] = 50
33 | return np.transpose(feature, (1, 0))
34 |
35 |
36 |
37 | class Facereala3dmmexp512Dataset(BaseDataset):
38 | def __init__(self, opt, mode=None):
39 | BaseDataset.__init__(self, opt)
40 | img_size = opt.img_size
41 | idts = get_idts(opt.name.split('_')[0])
42 | print("---------load data list--------: ", idts)
43 | self.new_dict = {}
44 | if mode == 'train':
45 | self.labels = []
46 | self.label_starts = []
47 | self.label_ends = []
48 | count = 0
49 | for idt_name in idts:
50 | # root = '../AnnVI/feature/{}'.format(idt_name)
51 | root = os.path.join(opt.feature_path, idt_name)
52 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
53 | self.new_dict[idt_name] = feature
54 | if opt.audio_feature == "3dmm":
55 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
56 | else:
57 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
58 | training_data = torch.load(training_data_path)
59 | img_paths = training_data['img_paths']
60 | features_3dmm = training_data['features_3dmm']
61 | index = [i[0].split('/')[-1] for i in img_paths]
62 |
63 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
64 | self.label_starts.append(count)
65 | for img in range(len(index)):
66 | img_path = os.path.join(image_dir, index[img])
67 | # idx_list = obtain_seq_index(img, feature.shape[0])
68 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
69 | self.labels.append([img_path, features_3dmm[img]])
70 | count = count + 1
71 | self.label_ends.append(count)
72 |
73 | self.label_starts = np.array(self.label_starts)
74 | self.label_ends = np.array(self.label_ends)
75 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
76 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
77 |
78 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
79 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
80 | self.shuffle()
81 | elif mode == 'test':
82 | self.labels = []
83 | self.label_starts = []
84 | self.label_ends = []
85 | count = 0
86 | for idt_name in idts:
87 | # root = '../AnnVI/feature/{}'.format(idt_name)
88 | root = os.path.join(opt.feature_path, idt_name)
89 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
90 | self.new_dict[idt_name] = feature
91 | if opt.audio_feature == "3dmm":
92 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
93 | else:
94 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
95 | training_data = torch.load(training_data_path)
96 | img_paths = training_data['img_paths']
97 | features_3dmm = training_data['features_3dmm']
98 | index = [i[0].split('/')[-1] for i in img_paths]
99 |
100 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
101 | self.label_starts.append(count)
102 | for img in range(len(index)):
103 | img_path = os.path.join(image_dir, index[img])
104 | # idx_list = obtain_seq_index(img, feature.shape[0])
105 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
106 | self.labels.append([img_path, features_3dmm[img]])
107 | count = count + 1
108 | self.label_ends.append(count)
109 |
110 | self.label_starts = np.array(self.label_starts)
111 | self.label_ends = np.array(self.label_ends)
112 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
113 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
114 |
115 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
116 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
117 | self.shuffle()
118 |
119 | def shuffle(self):
120 | self.labels_index = list(range(len(self.labels)))
121 | random.shuffle(self.labels_index)
122 |
123 | def add_mouth_mask2(self, img):
124 | mask = np.ones_like(img)
125 | rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
126 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
127 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
128 | x = np.flip(x, 0)
129 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
130 | zz1 = -y - x + 88 > 0
131 | zz2 = np.flip(zz1, 1)
132 | zz = (zz1 + zz2) > 0
133 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
134 | imgm = img * mask
135 | return imgm
136 |
137 | def __getitem__(self, index):
138 | # s1= time.time()
139 | idx = self.labels_index[index]
140 | img_path, feature_3dmm_idx= self.labels[idx]
141 | # print(img_path, feature_3dmm_idx)
142 | feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict)
143 | #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
144 |
145 | img = np.array(Image.open(img_path).convert('RGB'))
146 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
147 | cut_pad1 = np.random.randint(0, 20)
148 | cut_pad2 = np.random.randint(0, 20)
149 | img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
150 | # s2 =time.time()
151 | # print('get data and read data ', s2-s1)
152 | mask_B = img.copy()
153 | # mask_end = np.random.randint(236*2, 250*2)
154 | # index = np.random.randint(80, 90)
155 | # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
156 | mask_end = np.random.randint(480, 500)
157 | index = np.random.randint(15, 30)
158 | mask_B[index:mask_end, 70:-70] = 0
159 | img = Image.fromarray(img)
160 |
161 | mask_B = Image.fromarray(mask_B)
162 | img = self.transforms_image(img)
163 | mask_B = self.transforms_image(mask_B)
164 |
165 | x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
166 |
167 | audio = torch.tensor(feature_3dmm)
168 | # s3 = time.time()
169 | # print('get 3dmm and mask ', s3 - s2)
170 | # 保证real_A_index不是idx
171 | max_i = 0
172 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
173 | while real_A_index == idx:
174 | max_i += 1
175 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
176 | if max_i > 5:
177 | break
178 |
179 | imgA_path, _ = self.labels[real_A_index]
180 | imgA = np.array(Image.open(imgA_path).convert('RGB'))
181 | cut_pad1 = np.random.randint(0, 20)
182 | cut_pad2 = np.random.randint(0, 20)
183 | imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
184 |
185 | ########椭圆##########
186 | # mask = np.zeros(imgA.shape, dtype=np.uint8)
187 | # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
188 | # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
189 | # ROI = cv2.bitwise_and(imgA, mask)
190 | # imgA = Image.fromarray(ROI)
191 | #############################
192 | # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
193 | imgA = Image.fromarray(imgA)
194 | imgA = self.transforms_image(imgA)
195 | # s4 = time.time()
196 | # print('end time reala ', s4 - s3)
197 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
198 |
199 | def __len__(self):
200 | """Return the total number of images in the dataset."""
201 | return len(self.labels)
202 |
203 |
204 | if __name__ == '__main__':
205 | from options.train_options import TrainOptions
206 |
207 | opt = TrainOptions().parse()
208 | dataset = Facereala3dmmDataset(opt)
209 | dataset_size = len(dataset)
210 | print(dataset_size)
211 | for i, data in enumerate(dataset):
212 | print(data)
213 |
--------------------------------------------------------------------------------
/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 | from data.base_dataset import BaseDataset, get_params, get_transform
4 | import torchvision.transforms as transforms
5 | from data.image_folder import make_dataset
6 | from PIL import Image, ImageEnhance
7 | import numpy as np
8 | import cv2
9 | import torch
10 | import time
11 |
12 | def get_idts(config_name):
13 | idts = list()
14 | with open(os.path.join('../config', config_name + '.txt')) as f:
15 | for line in f:
16 | line = line.strip()
17 | video_name = line.split(':')[0]
18 | idts.append(video_name)
19 | return idts
20 |
21 |
22 | def obtain_seq_index(index, num_frames):
23 | seq = list(range(index - 10, index + 9 + 1))
24 | seq = [min(max(item, 0), num_frames - 1) for item in seq]
25 | return seq
26 |
27 | def get_3dmm_feature(img_path, idx, audio_feature, new_dict):
28 | id = img_path.split('/')[-3]
29 | features, features1, features1 = new_dict[id]
30 | idx_list = obtain_seq_index(idx, features.shape[0])
31 | feature = features[idx_list, 80:144]
32 | feature1 = features1[:,audio_feature[0]:audio_feature[1]]
33 | feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1)
34 | # print(feature.shape)
35 | return np.transpose(feature, (1, 0))
36 | # return feature
37 |
38 |
39 |
40 | class Facereala3dmmexpwenet512Dataset(BaseDataset):
41 | def __init__(self, opt, mode=None):
42 | BaseDataset.__init__(self, opt)
43 | img_size = opt.img_size
44 | idts = get_idts(opt.name.split('_')[0])
45 | print("---------load data list--------: ", idts)
46 | self.new_dict = {}
47 | if mode == 'train':
48 | self.labels = []
49 | self.label_starts = []
50 | self.label_ends = []
51 | count = 0
52 | for idt_name in idts:
53 | # root = '../AnnVI/feature/{}'.format(idt_name)
54 | root = os.path.join(opt.feature_path, idt_name)
55 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
56 | feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy'))
57 | self.new_dict[idt_name] = [feature, feature1, feature1]
58 | if opt.audio_feature == "3dmm":
59 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
60 | else:
61 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
62 | training_data = torch.load(training_data_path)
63 | img_paths = training_data['img_paths']
64 | features_3dmm = training_data['features_3dmm']
65 | audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True)
66 | audio_features = audio_features.tolist()
67 | index = [i[0].split('/')[-1] for i in img_paths]
68 |
69 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
70 | self.label_starts.append(count)
71 | for img in range(len(index)):
72 | img_path = os.path.join(image_dir, index[img])
73 | # idx_list = obtain_seq_index(img, feature.shape[0])
74 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
75 | if type(features_3dmm[img]) != int:
76 | print(img_path)
77 | audio_feature = audio_features[img]
78 | self.labels.append([img_path, features_3dmm[img], audio_feature])
79 | count = count + 1
80 | self.label_ends.append(count)
81 |
82 | self.label_starts = np.array(self.label_starts)
83 | self.label_ends = np.array(self.label_ends)
84 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
85 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
86 |
87 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
88 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
89 | self.shuffle()
90 | elif mode == 'test':
91 | self.labels = []
92 | self.label_starts = []
93 | self.label_ends = []
94 | count = 0
95 | for idt_name in idts:
96 | # root = '../AnnVI/feature/{}'.format(idt_name)
97 | root = os.path.join(opt.feature_path, idt_name)
98 | feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
99 | self.new_dict[idt_name] = feature
100 | if opt.audio_feature == "3dmm":
101 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
102 | else:
103 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
104 | training_data = torch.load(training_data_path)
105 | img_paths = training_data['img_paths']
106 | features_3dmm = training_data['features_3dmm']
107 | index = [i[0].split('/')[-1] for i in img_paths]
108 |
109 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
110 | self.label_starts.append(count)
111 | for img in range(len(index)):
112 | img_path = os.path.join(image_dir, index[img])
113 | # idx_list = obtain_seq_index(img, feature.shape[0])
114 | # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
115 | self.labels.append([img_path, features_3dmm[img]])
116 | count = count + 1
117 | self.label_ends.append(count)
118 |
119 | self.label_starts = np.array(self.label_starts)
120 | self.label_ends = np.array(self.label_ends)
121 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
122 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
123 |
124 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
125 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
126 | self.shuffle()
127 |
128 | def shuffle(self):
129 | self.labels_index = list(range(len(self.labels)))
130 | random.shuffle(self.labels_index)
131 |
132 | def add_mouth_mask2(self, img):
133 | mask = np.ones_like(img)
134 | rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
135 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
136 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
137 | x = np.flip(x, 0)
138 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
139 | zz1 = -y - x + 88 > 0
140 | zz2 = np.flip(zz1, 1)
141 | zz = (zz1 + zz2) > 0
142 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
143 | imgm = img * mask
144 | return imgm
145 |
146 | def __getitem__(self, index):
147 | # s1= time.time()
148 | idx = self.labels_index[index]
149 | img_path, feature_3dmm_idx, audio_feature= self.labels[idx]
150 | # print(img_path, feature_3dmm_idx)
151 | feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict)
152 | #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
153 |
154 | img = np.array(Image.open(img_path).convert('RGB'))
155 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
156 | cut_pad1 = np.random.randint(0, 20)
157 | cut_pad2 = np.random.randint(0, 20)
158 | img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
159 | # s2 =time.time()
160 | # print('get data and read data ', s2-s1)
161 | mask_B = img.copy()
162 | # mask_end = np.random.randint(236*2, 250*2)
163 | # index = np.random.randint(80, 90)
164 | # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
165 | mask_end = np.random.randint(480, 500)
166 | index = np.random.randint(15, 30)
167 | # index = np.random.randint(90, 100)
168 | mask_B[index:mask_end, 70:-70] = 0
169 | img = Image.fromarray(img)
170 |
171 | mask_B = Image.fromarray(mask_B)
172 | img = self.transforms_image(img)
173 | mask_B = self.transforms_image(mask_B)
174 |
175 | x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
176 |
177 | audio = torch.tensor(feature_3dmm)
178 | # s3 = time.time()
179 | # print('get 3dmm and mask ', s3 - s2)
180 | # 保证real_A_index不是idx
181 | max_i = 0
182 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
183 | while real_A_index == idx:
184 | max_i += 1
185 | real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
186 | if max_i > 5:
187 | break
188 |
189 | imgA_path, _, _ = self.labels[real_A_index]
190 | imgA = np.array(Image.open(imgA_path).convert('RGB'))
191 | cut_pad1 = np.random.randint(0, 20)
192 | cut_pad2 = np.random.randint(0, 20)
193 | imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
194 |
195 | ########椭圆##########
196 | # mask = np.zeros(imgA.shape, dtype=np.uint8)
197 | # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
198 | # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
199 | # ROI = cv2.bitwise_and(imgA, mask)
200 | # imgA = Image.fromarray(ROI)
201 | #############################
202 | # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
203 | imgA = Image.fromarray(imgA)
204 | imgA = self.transforms_image(imgA)
205 | # s4 = time.time()
206 | # print('end time reala ', s4 - s3)
207 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
208 |
209 | def __len__(self):
210 | """Return the total number of images in the dataset."""
211 | return len(self.labels)
212 |
213 |
214 | if __name__ == '__main__':
215 | from options.train_options import TrainOptions
216 |
217 | opt = TrainOptions().parse()
218 | dataset = Facereala3dmmDataset(opt)
219 | dataset_size = len(dataset)
220 | print(dataset_size)
221 | for i, data in enumerate(dataset):
222 | print(data)
223 |
--------------------------------------------------------------------------------
/landmark2face_wy/data/__init__.py:
--------------------------------------------------------------------------------
1 | """This package includes all the modules related to data loading and preprocessing
2 |
3 | To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
4 | You need to implement four functions:
5 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
6 | -- <__len__>: return the size of dataset.
7 | -- <__getitem__>: get a data point from data loader.
8 | -- : (optionally) add dataset-specific options and set default options.
9 |
10 | Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
11 | See our template dataset class 'template_dataset.py' for more details.
12 | """
13 | import importlib
14 | import torch.utils.data
15 | from landmark2face_wy.data.base_dataset import BaseDataset
16 |
17 |
18 | def find_dataset_using_name(dataset_name):
19 | """Import the module "data/[dataset_name]_dataset.py".
20 |
21 | In the file, the class called DatasetNameDataset() will
22 | be instantiated. It has to be a subclass of BaseDataset,
23 | and it is case-insensitive.
24 | """
25 | dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset"
26 | datasetlib = importlib.import_module(dataset_filename)
27 |
28 | dataset = None
29 | target_dataset_name = dataset_name.replace('_', '') + 'dataset'
30 | for name, cls in datasetlib.__dict__.items():
31 | if name.lower() == target_dataset_name.lower() \
32 | and issubclass(cls, BaseDataset):
33 | dataset = cls
34 |
35 | if dataset is None:
36 | raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
37 |
38 | return dataset
39 |
40 |
41 | def get_option_setter(dataset_name):
42 | """Return the static method of the dataset class."""
43 | dataset_class = find_dataset_using_name(dataset_name)
44 | return dataset_class.modify_commandline_options
45 |
46 |
47 | def create_dataset(opt, mode='train'):
48 | """Create a dataset given the option.
49 |
50 | This function wraps the class CustomDatasetDataLoader.
51 | This is the main interface between this package and 'train.py'/'test.py'
52 |
53 | Example:
54 | >>> from data import create_dataset
55 | >>> dataset = create_dataset(opt)
56 | """
57 | data_loader = CustomDatasetDataLoader(opt, mode)
58 | dataset = data_loader.load_data()
59 | return dataset
60 |
61 |
62 | class CustomDatasetDataLoader():
63 | """Wrapper class of Dataset class that performs multi-threaded data loading"""
64 |
65 | def __init__(self, opt, mode):
66 | """Initialize this class
67 |
68 | Step 1: create a dataset instance given the name [dataset_mode]
69 | Step 2: create a multi-threaded data loader.
70 | """
71 | self.opt = opt
72 | dataset_class = find_dataset_using_name(opt.dataset_mode)
73 | self.dataset = dataset_class(opt, mode)
74 | print("dataset [%s] was created" % type(self.dataset).__name__)
75 | if mode == 'test':
76 | batchsize = opt.batch_size // 2
77 | else:
78 | batchsize = opt.batch_size
79 | print(opt.batch_size)
80 | if not opt.distributed:
81 | self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize,
82 | shuffle=not opt.serial_batches,num_workers=int(opt.num_threads))
83 | else:
84 | self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分
85 | self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True)
86 |
87 | def load_data(self):
88 | return self
89 |
90 | def __len__(self):
91 | """Return the number of data in the dataset"""
92 | return min(len(self.dataset), self.opt.max_dataset_size)
93 |
94 | def __iter__(self):
95 | """Return a batch of data"""
96 | for i, data in enumerate(self.dataloader):
97 | if i * self.opt.batch_size >= self.opt.max_dataset_size:
98 | break
99 | yield data
100 |
--------------------------------------------------------------------------------
/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/data/l2faceaudio512_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 | from data.base_dataset import BaseDataset, get_params, get_transform
4 | import torchvision.transforms as transforms
5 | from data.image_folder import make_dataset
6 | from PIL import Image, ImageEnhance
7 | import numpy as np
8 | import cv2
9 | import torch
10 |
11 |
12 | def get_idts(config_name):
13 | idts = list()
14 | with open(os.path.join('../config', config_name + '.txt')) as f:
15 | for line in f:
16 | line = line.strip()
17 | idts.append(line)
18 | return idts
19 |
20 |
21 | class L2FaceAudio512Dataset(BaseDataset):
22 | def __init__(self, opt, mode=None):
23 | BaseDataset.__init__(self, opt)
24 | img_size = opt.img_size
25 | idts = get_idts(opt.name.split('_')[0])
26 | print("---------load data list--------: ", idts)
27 | if mode == 'train':
28 | self.labels = []
29 | for idt_name in idts:
30 | # root = '../AnnVI/feature/{}'.format(idt_name)
31 | root = os.path.join(opt.feature_path, idt_name)
32 | if opt.audio_feature == "mfcc":
33 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
34 | else:
35 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
36 | training_data = torch.load(training_data_path)
37 | img_paths = training_data['img_paths']
38 | audio_features = training_data['audio_features']
39 | index = [i[0].split('/')[-1] for i in img_paths]
40 |
41 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
42 | # label_dir = '{}/512_landmark_crop'.format(root)
43 |
44 | # if 'man' in opt.name:
45 | # imgs.sort(key=lambda x:int(x.split('.')[0]))
46 | # else:
47 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
48 | for img in range(len(index)):
49 | img_path = os.path.join(image_dir, index[img])
50 | audio_feature = audio_features[img]
51 | self.labels.append([img_path, audio_feature])
52 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
53 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
54 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
55 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
56 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
57 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
58 | self.shuffle()
59 | elif mode == 'test':
60 | self.labels = []
61 | for idt_name in idts:
62 | # root = '../AnnVI/feature/{}'.format(idt_name)
63 | root = os.path.join(opt.feature_path, idt_name)
64 | if opt.audio_feature == "mfcc":
65 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
66 | else:
67 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
68 | training_data = torch.load(training_data_path)
69 | img_paths = training_data['img_paths']
70 | audio_features = training_data['audio_features']
71 | index = [i[0].split('/')[-1] for i in img_paths]
72 |
73 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
74 | # label_dir = '{}/512_landmark_crop'.format(root)
75 |
76 | # if 'man' in opt.name:
77 | # imgs.sort(key=lambda x:int(x.split('.')[0]))
78 | # else:
79 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
80 | for img in range(len(index)):
81 | img_path = os.path.join(image_dir, index[img])
82 | audio_feature = audio_features[img]
83 | self.labels.append([img_path, audio_feature])
84 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
85 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
86 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
87 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
88 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
89 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
90 | self.shuffle()
91 |
92 | def shuffle(self):
93 | random.shuffle(self.labels)
94 |
95 | def add_mouth_mask2(self, img):
96 | mask = np.ones_like(img)
97 | rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30]
98 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
99 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
100 | x = np.flip(x, 0)
101 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
102 | zz1 = -y - x + 88 > 0
103 | zz2 = np.flip(zz1, 1)
104 | zz = (zz1 + zz2) > 0
105 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
106 | imgm = img * mask
107 | return imgm
108 |
109 | def __getitem__(self, index):
110 | cv2.setNumThreads(0)
111 | img_path, audio_feature = self.labels[index]
112 | img = np.array(Image.open(img_path).convert('RGB'))
113 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
114 | cut_pad1 = np.random.randint(0, 20)
115 | cut_pad2 = np.random.randint(0, 20)
116 | img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
117 |
118 | ####椭圆mask遮住衣领#####
119 | '''
120 | mask = np.zeros(img.shape, dtype=np.uint8)
121 | cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1)
122 | '''
123 | ####mask遮眼睛#####
124 | mask = np.ones(img.shape, dtype=np.uint8) * 255
125 | mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
126 | img = cv2.bitwise_and(img, mask)
127 |
128 | mask_B = img.copy()
129 | mask_B = cv2.resize(mask_B, (256, 256))
130 | ##########脖子分割加mask#############
131 | # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck"))
132 | # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
133 | # mask_B = cv2.bitwise_and(img, 255 - img_edge)
134 | # img_edge[:128, :, :] = img[:128, :, :]
135 |
136 | ##########增加脖子椭圆mask#############
137 | '''
138 | maske = np.zeros(img.shape, dtype=np.uint8)
139 | cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50),
140 | (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)),
141 | 0, 0, 360, (255, 255, 255), -1)
142 | maske[:img.shape[0] // 2, :, :] = 0
143 | mask_B = cv2.bitwise_and(mask_B, 255-maske)
144 | '''
145 | ##########之前老的矩形mask#############
146 | mask_end = np.random.randint(236, 256)
147 | mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
148 | ##########之前老的矩形mask#############
149 | ##########蔡星宇三角mask#############
150 | # mask_B = self.add_mouth_mask2(mask_B)
151 | ##########蔡星宇三角mask#############
152 | # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
153 | img = Image.fromarray(img)
154 | mask_B = Image.fromarray(mask_B)
155 | img = self.transforms_image(img)
156 | mask_B = self.transforms_image(mask_B)
157 | # lab = Image.open(lab_path).convert('RGB')
158 | # lab = self.transforms_label(lab)
159 | audio = np.zeros((256, 256), dtype=np.float32)
160 | audio_feature = np.array(audio_feature)
161 | audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
162 | audio = torch.tensor([audio])
163 |
164 | imgA_path, _ = random.sample(self.labels, 1)[0]
165 | imgA = np.array(Image.open(imgA_path).convert('RGB'))
166 | cut_pad1 = np.random.randint(0, 20)
167 | cut_pad2 = np.random.randint(0, 20)
168 | imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
169 | # mask = np.ones(imgA.shape, dtype=np.uint8) * 255
170 | # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
171 | imgA = cv2.bitwise_and(imgA, mask)
172 | imgA = Image.fromarray(imgA)
173 | imgA = self.transforms_image(imgA)
174 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
175 |
176 | def __len__(self):
177 | """Return the total number of images in the dataset."""
178 | return len(self.labels)
179 |
180 |
181 | if __name__ == '__main__':
182 | from options.train_options import TrainOptions
183 |
184 | opt = TrainOptions().parse()
185 | dataset = L2FaceDataset(opt)
186 | dataset_size = len(dataset)
187 | print(dataset_size)
188 | for i, data in enumerate(dataset):
189 | print(data)
--------------------------------------------------------------------------------
/landmark2face_wy/data/l2faceaudio_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 | from data.base_dataset import BaseDataset, get_params, get_transform
4 | import torchvision.transforms as transforms
5 | from data.image_folder import make_dataset
6 | from PIL import Image, ImageEnhance
7 | import numpy as np
8 | import cv2
9 | import torch
10 |
11 |
12 | def get_idts(config_name):
13 | idts = list()
14 | with open(os.path.join('../config', config_name + '.txt')) as f:
15 | for line in f:
16 | line = line.strip()
17 | idts.append(line)
18 | return idts
19 |
20 |
21 | class L2FaceAudioDataset(BaseDataset):
22 | def __init__(self, opt, mode=None):
23 | BaseDataset.__init__(self, opt)
24 | img_size = opt.img_size
25 | idts = get_idts(opt.name.split('_')[0])
26 | print("---------load data list--------: ", idts)
27 | if mode == 'train':
28 | self.labels = []
29 | for idt_name in idts:
30 | # root = '../AnnVI/feature/{}'.format(idt_name)
31 | root = os.path.join(opt.feature_path, idt_name)
32 | if opt.audio_feature == "mfcc":
33 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
34 | else:
35 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
36 | training_data = torch.load(training_data_path)
37 | img_paths = training_data['img_paths']
38 | audio_features = training_data['audio_features']
39 | index = [i[0].split('/')[-1] for i in img_paths]
40 |
41 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
42 | # label_dir = '{}/512_landmark_crop'.format(root)
43 |
44 | # if 'man' in opt.name:
45 | # imgs.sort(key=lambda x:int(x.split('.')[0]))
46 | # else:
47 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
48 | for img in range(len(index)):
49 | img_path = os.path.join(image_dir, index[img])
50 | audio_feature = audio_features[img]
51 | self.labels.append([img_path, audio_feature])
52 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
53 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
54 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
55 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
56 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
57 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
58 | self.shuffle()
59 | elif mode == 'test':
60 | self.labels = []
61 | for idt_name in idts:
62 | # root = '../AnnVI/feature/{}'.format(idt_name)
63 | root = os.path.join(opt.feature_path, idt_name)
64 | if opt.audio_feature == "mfcc":
65 | training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
66 | else:
67 | training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
68 | training_data = torch.load(training_data_path)
69 | img_paths = training_data['img_paths']
70 | audio_features = training_data['audio_features']
71 | index = [i[0].split('/')[-1] for i in img_paths]
72 |
73 | image_dir = '{}/{}_dlib_crop'.format(root, img_size)
74 | # label_dir = '{}/512_landmark_crop'.format(root)
75 |
76 | # if 'man' in opt.name:
77 | # imgs.sort(key=lambda x:int(x.split('.')[0]))
78 | # else:
79 | # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
80 | for img in range(len(index)):
81 | img_path = os.path.join(image_dir, index[img])
82 | audio_feature = audio_features[img]
83 | self.labels.append([img_path, audio_feature])
84 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
85 | self.transforms_image = transforms.Compose([transforms.ToTensor(),
86 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
87 | # transforms.Resize([img_size, img_size], Image.BICUBIC),
88 | self.transforms_label = transforms.Compose([transforms.ToTensor(),
89 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
90 | self.shuffle()
91 |
92 | def shuffle(self):
93 | random.shuffle(self.labels)
94 |
95 | def add_mouth_mask2(self, img):
96 | mask = np.ones_like(img)
97 | rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
98 | mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
99 | x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
100 | x = np.flip(x, 0)
101 | y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
102 | zz1 = -y - x + 88 > 0
103 | zz2 = np.flip(zz1, 1)
104 | zz = (zz1 + zz2) > 0
105 | mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
106 | imgm = img * mask
107 | return imgm
108 |
109 | def __getitem__(self, index):
110 | cv2.setNumThreads(0)
111 | img_path, audio_feature = self.labels[index]
112 | img = np.array(Image.open(img_path).convert('RGB'))
113 | img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
114 | cut_pad1 = np.random.randint(0, 10)
115 | cut_pad2 = np.random.randint(0, 10)
116 | img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
117 |
118 | ####mask遮眼睛#####
119 | mask = np.ones(img.shape, dtype=np.uint8) * 255
120 | mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0
121 | img = cv2.bitwise_and(img, mask)
122 |
123 | mask_B = img.copy()
124 | mask_end = np.random.randint(236, 256)
125 | ##########之前老的矩形mask#############
126 | mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
127 | ##########之前老的矩形mask#############
128 | ##########蔡星宇三角mask#############
129 | # mask_B = self.add_mouth_mask2(mask_B)
130 | ##########蔡星宇三角mask#############
131 | # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
132 | img = Image.fromarray(img)
133 | mask_B = Image.fromarray(mask_B)
134 | img = self.transforms_image(img)
135 | mask_B = self.transforms_image(mask_B)
136 | # lab = Image.open(lab_path).convert('RGB')
137 | # lab = self.transforms_label(lab)
138 | audio = np.zeros((256, 256), dtype=np.float32)
139 | audio_feature = np.array(audio_feature)
140 | audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
141 | audio = torch.tensor([audio])
142 |
143 | imgA_path, _ = random.sample(self.labels, 1)[0]
144 | imgA = np.array(Image.open(imgA_path).convert('RGB'))
145 | cut_pad1 = np.random.randint(0, 10)
146 | cut_pad2 = np.random.randint(0, 10)
147 | imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
148 | imgA = cv2.bitwise_and(imgA, mask)
149 | imgA = Image.fromarray(imgA)
150 | imgA = self.transforms_image(imgA)
151 | return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
152 |
153 | def __len__(self):
154 | """Return the total number of images in the dataset."""
155 | return len(self.labels)
156 |
157 |
158 | if __name__ == '__main__':
159 | from options.train_options import TrainOptions
160 |
161 | opt = TrainOptions().parse()
162 | dataset = L2FaceDataset(opt)
163 | dataset_size = len(dataset)
164 | print(dataset_size)
165 | for i, data in enumerate(dataset):
166 | print(data)
--------------------------------------------------------------------------------
/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/loss/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/loss/__init__.py
--------------------------------------------------------------------------------
/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/__init__.py:
--------------------------------------------------------------------------------
1 | """This package contains modules related to objective functions, optimizations, and network architectures.
2 |
3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
4 | You need to implement the following five functions:
5 | -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt).
6 | -- : unpack data from dataset and apply preprocessing.
7 | -- : produce intermediate results.
8 | -- : calculate loss, gradients, and update network weights.
9 | -- : (optionally) add model-specific options and set default options.
10 |
11 | In the function <__init__>, you need to define four lists:
12 | -- self.loss_names (str list): specify the training losses that you want to plot and save.
13 | -- self.model_names (str list): define networks used in our training.
14 | -- self.visual_names (str list): specify the images that you want to display and save.
15 | -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
16 |
17 | Now you can use the model class by specifying flag '--model dummy'.
18 | See our template model class 'template_model.py' for more details.
19 | """
20 |
21 | import importlib
22 | from landmark2face_wy.models.base_model import BaseModel
23 |
24 |
25 | def find_model_using_name(model_name):
26 | """Import the module "models/[model_name]_model.py".
27 |
28 | In the file, the class called DatasetNameModel() will
29 | be instantiated. It has to be a subclass of BaseModel,
30 | and it is case-insensitive.
31 | """
32 | model_filename = "landmark2face_wy.models." + model_name + "_model"
33 | modellib = importlib.import_module(model_filename)
34 | model = None
35 | target_model_name = model_name.replace('_', '') + 'model'
36 | for name, cls in modellib.__dict__.items():
37 | if name.lower() == target_model_name.lower() \
38 | and issubclass(cls, BaseModel):
39 | model = cls
40 |
41 | if model is None:
42 | print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
43 | exit(0)
44 |
45 | return model
46 |
47 |
48 | def get_option_setter(model_name):
49 | """Return the static method of the model class."""
50 | model_class = find_model_using_name(model_name)
51 | return model_class.modify_commandline_options
52 |
53 |
54 | def create_model(opt):
55 | """Create a model given the option.
56 |
57 | This function warps the class CustomDatasetDataLoader.
58 | This is the main interface between this package and 'train.py'/'test.py'
59 |
60 | Example:
61 | >>> from landmark2face_wy.models import create_model
62 | >>> model = create_model(opt)
63 | """
64 | model = find_model_using_name(opt.model)
65 | instance = model(opt)
66 | print("model [%s] was created" % type(instance).__name__)
67 | return instance
68 |
--------------------------------------------------------------------------------
/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/options/__init__.py:
--------------------------------------------------------------------------------
1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
2 |
--------------------------------------------------------------------------------
/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : __init__.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 27/01/2018
6 | #
7 | # This file is part of Synchronized-BatchNorm-PyTorch.
8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
9 | # Distributed under MIT License.
10 |
11 | from .batchnorm import set_sbn_eps_mode
12 | from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
13 | from .batchnorm import patch_sync_batchnorm, convert_model
14 | from .replicate import DataParallelWithCallback, patch_replication_callback
15 |
--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/util/__init__.py:
--------------------------------------------------------------------------------
1 | """This package includes a miscellaneous collection of useful helper functions."""
2 |
--------------------------------------------------------------------------------
/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/license.txt
--------------------------------------------------------------------------------
/log/dh.log:
--------------------------------------------------------------------------------
1 | [2025-03-18 12:50:40,644] [run.py[line:153]] [INFO] [TransDhTask init]
2 | [2025-03-18 12:50:41,729] [run.py[line:158]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4]
3 | [2025-03-18 12:50:41,732] [run.py[line:158]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4]
4 | [2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]预处理失败,异常信息:[format video error]]
5 | [2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]]
6 | [2025-03-18 12:50:41,791] [run.py[line:158]] [INFO] [>>> 任务:1002 耗时:0.06167912483215332 ]
7 | [2025-03-18 12:50:57,817] [run.py[line:143]] [INFO] [TransDhTask init]
8 | [2025-03-18 12:50:58,906] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4]
9 | [2025-03-18 12:50:58,908] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4]
10 | [2025-03-18 12:50:58,964] [run.py[line:147]] [ERROR] [[1002]预处理失败,异常信息:[format video error]]
11 | [2025-03-18 12:50:58,965] [run.py[line:147]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]]
12 | [2025-03-18 12:50:58,966] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:0.059505462646484375 ]
13 | [2025-03-18 12:52:06,385] [run.py[line:143]] [INFO] [TransDhTask init]
14 | [2025-03-18 12:52:07,560] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./example/audio.wav video_url:./example/video.mp4]
15 | [2025-03-18 12:52:07,646] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./example/video.mp4 -crf 15 -vcodec copy -an -y ./1002_format.mp4]
16 | [2025-03-18 12:52:07,801] [run.py[line:147]] [INFO] [[1002] -> ffmpeg audio: ffmpeg -loglevel warning -i ./example/audio.wav -ac 1 -ar 16000 -acodec pcm_s16le -y ./1002_format.wav]
17 | [2025-03-18 12:52:07,922] [run.py[line:147]] [INFO] [[1002] -> 预处理耗时:0.35927414894104004s]
18 | [2025-03-18 12:52:10,169] [run.py[line:147]] [INFO] [[1002] -> get_aud_feat1 cost:2.245649576187134s]
19 | [2025-03-18 12:52:11,702] [process.py[line:108]] [INFO] [>>> init_wh_process进程启动]
20 | [2025-03-18 12:52:20,087] [process.py[line:108]] [INFO] [[1002]init_wh result :[0.8809176216714891], cost: 8.382684469223022 s]
21 | [2025-03-18 12:52:20,090] [run.py[line:147]] [INFO] [[1002] -> wh: [0.8809176216714891]]
22 | [2025-03-18 12:52:21,453] [process.py[line:108]] [INFO] [>>> 数字人图片处理进程启动]
23 | [2025-03-18 12:52:24,015] [process.py[line:108]] [INFO] [[1002]任务视频驱动队列启动 batch_size:4, len:150]
24 | [2025-03-18 12:52:24,050] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 开始循环]
25 | [2025-03-18 12:52:24,085] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:4]
26 | [2025-03-18 12:52:24,112] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:8]
27 | [2025-03-18 12:52:24,122] [process.py[line:108]] [INFO] [>>> audio_transfer get message:4]
28 | [2025-03-18 12:52:24,139] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:12]
29 | [2025-03-18 12:52:24,148] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:16]
30 | [2025-03-18 12:52:24,161] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:20]
31 | [2025-03-18 12:52:24,173] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:24]
32 | [2025-03-18 12:52:24,185] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:28]
33 | [2025-03-18 12:52:24,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:32]
34 | [2025-03-18 12:52:24,208] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:36]
35 | [2025-03-18 12:52:24,222] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:40]
36 | [2025-03-18 12:52:24,232] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:44]
37 | [2025-03-18 12:52:25,722] [process.py[line:108]] [INFO] [[1002] -> frame_id:[4] 模糊置信度:[0.969]]
38 | [2025-03-18 12:52:25,723] [process.py[line:108]] [INFO] [[1002] -> need chaofen .]
39 | [2025-03-18 12:52:25,905] [utils.py[line:145]] [INFO] [Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.]
40 | [2025-03-18 12:52:25,906] [utils.py[line:148]] [INFO] [Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.]
41 | [2025-03-18 12:52:25,907] [utils.py[line:160]] [INFO] [NumExpr defaulting to 8 threads.]
42 | [2025-03-18 12:52:26,083] [process.py[line:108]] [INFO] [[4] -> chaofen cost:1.9595112800598145s]
43 | [2025-03-18 12:52:31,071] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:4, cost:6.948575258255005s]
44 | [2025-03-18 12:52:31,116] [process.py[line:108]] [INFO] [>>> audio_transfer get message:8]
45 | [2025-03-18 12:52:31,126] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:48]
46 | [2025-03-18 12:52:31,347] [process.py[line:108]] [INFO] [[8] -> chaofen cost:0.2294461727142334s]
47 | [2025-03-18 12:52:31,576] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:8, cost:0.45979762077331543s]
48 | [2025-03-18 12:52:31,605] [process.py[line:108]] [INFO] [>>> audio_transfer get message:12]
49 | [2025-03-18 12:52:31,615] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:52]
50 | [2025-03-18 12:52:31,818] [process.py[line:108]] [INFO] [[12] -> chaofen cost:0.21271824836730957s]
51 | [2025-03-18 12:52:32,036] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:12, cost:0.43187427520751953s]
52 | [2025-03-18 12:52:32,060] [process.py[line:108]] [INFO] [>>> audio_transfer get message:16]
53 | [2025-03-18 12:52:32,072] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:56]
54 | [2025-03-18 12:52:32,279] [process.py[line:108]] [INFO] [[16] -> chaofen cost:0.21899199485778809s]
55 | [2025-03-18 12:52:32,530] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:16, cost:0.47049522399902344s]
56 | [2025-03-18 12:52:32,552] [process.py[line:108]] [INFO] [>>> audio_transfer get message:20]
57 | [2025-03-18 12:52:32,567] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:60]
58 | [2025-03-18 12:52:32,766] [process.py[line:108]] [INFO] [[20] -> chaofen cost:0.21334147453308105s]
59 | [2025-03-18 12:52:32,993] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:20, cost:0.4411466121673584s]
60 | [2025-03-18 12:52:33,015] [process.py[line:108]] [INFO] [>>> audio_transfer get message:24]
61 | [2025-03-18 12:52:33,028] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:64]
62 | [2025-03-18 12:52:33,229] [process.py[line:108]] [INFO] [[24] -> chaofen cost:0.21344351768493652s]
63 | [2025-03-18 12:52:33,457] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:24, cost:0.44205546379089355s]
64 | [2025-03-18 12:52:33,479] [process.py[line:108]] [INFO] [>>> audio_transfer get message:28]
65 | [2025-03-18 12:52:33,493] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:68]
66 | [2025-03-18 12:52:33,697] [process.py[line:108]] [INFO] [[28] -> chaofen cost:0.21679949760437012s]
67 | [2025-03-18 12:52:33,924] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:28, cost:0.4448537826538086s]
68 | [2025-03-18 12:52:33,946] [process.py[line:108]] [INFO] [>>> audio_transfer get message:32]
69 | [2025-03-18 12:52:33,960] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:72]
70 | [2025-03-18 12:52:34,159] [process.py[line:108]] [INFO] [[32] -> chaofen cost:0.21156740188598633s]
71 | [2025-03-18 12:52:34,381] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:32, cost:0.43474769592285156s]
72 | [2025-03-18 12:52:34,403] [process.py[line:108]] [INFO] [>>> audio_transfer get message:36]
73 | [2025-03-18 12:52:34,417] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:76]
74 | [2025-03-18 12:52:34,618] [process.py[line:108]] [INFO] [[36] -> chaofen cost:0.21408891677856445s]
75 | [2025-03-18 12:52:34,844] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:36, cost:0.4406392574310303s]
76 | [2025-03-18 12:52:34,867] [process.py[line:108]] [INFO] [>>> audio_transfer get message:40]
77 | [2025-03-18 12:52:34,881] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:80]
78 | [2025-03-18 12:52:35,099] [process.py[line:108]] [INFO] [[40] -> chaofen cost:0.23105645179748535s]
79 | [2025-03-18 12:52:35,328] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:40, cost:0.46161866188049316s]
80 | [2025-03-18 12:52:35,350] [process.py[line:108]] [INFO] [>>> audio_transfer get message:44]
81 | [2025-03-18 12:52:35,363] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:84]
82 | [2025-03-18 12:52:35,577] [process.py[line:108]] [INFO] [[44] -> chaofen cost:0.22576594352722168s]
83 | [2025-03-18 12:52:35,808] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:44, cost:0.4577639102935791s]
84 | [2025-03-18 12:52:35,832] [process.py[line:108]] [INFO] [>>> audio_transfer get message:48]
85 | [2025-03-18 12:52:35,846] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:88]
86 | [2025-03-18 12:52:36,047] [process.py[line:108]] [INFO] [[48] -> chaofen cost:0.21441864967346191s]
87 | [2025-03-18 12:52:36,278] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:48, cost:0.4459846019744873s]
88 | [2025-03-18 12:52:36,301] [process.py[line:108]] [INFO] [>>> audio_transfer get message:52]
89 | [2025-03-18 12:52:36,315] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:92]
90 | [2025-03-18 12:52:36,521] [process.py[line:108]] [INFO] [[52] -> chaofen cost:0.2181704044342041s]
91 | [2025-03-18 12:52:36,777] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:52, cost:0.47586750984191895s]
92 | [2025-03-18 12:52:36,798] [process.py[line:108]] [INFO] [>>> audio_transfer get message:56]
93 | [2025-03-18 12:52:36,817] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:96]
94 | [2025-03-18 12:52:37,014] [process.py[line:108]] [INFO] [[56] -> chaofen cost:0.2147221565246582s]
95 | [2025-03-18 12:52:37,247] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:56, cost:0.4486660957336426s]
96 | [2025-03-18 12:52:37,266] [process.py[line:108]] [INFO] [>>> audio_transfer get message:60]
97 | [2025-03-18 12:52:37,281] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:100]
98 | [2025-03-18 12:52:37,483] [process.py[line:108]] [INFO] [[60] -> chaofen cost:0.21598410606384277s]
99 | [2025-03-18 12:52:37,703] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:60, cost:0.43683695793151855s]
100 | [2025-03-18 12:52:37,722] [process.py[line:108]] [INFO] [>>> audio_transfer get message:64]
101 | [2025-03-18 12:52:37,736] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:104]
102 | [2025-03-18 12:52:37,941] [process.py[line:108]] [INFO] [[64] -> chaofen cost:0.2180624008178711s]
103 | [2025-03-18 12:52:38,163] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:64, cost:0.4412345886230469s]
104 | [2025-03-18 12:52:38,183] [process.py[line:108]] [INFO] [>>> audio_transfer get message:68]
105 | [2025-03-18 12:52:38,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:108]
106 | [2025-03-18 12:52:38,397] [process.py[line:108]] [INFO] [[68] -> chaofen cost:0.21321654319763184s]
107 | [2025-03-18 12:52:38,637] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:68, cost:0.45404863357543945s]
108 | [2025-03-18 12:52:38,656] [process.py[line:108]] [INFO] [>>> audio_transfer get message:72]
109 | [2025-03-18 12:52:38,670] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:112]
110 | [2025-03-18 12:52:38,877] [process.py[line:108]] [INFO] [[72] -> chaofen cost:0.21999263763427734s]
111 | [2025-03-18 12:52:39,100] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:72, cost:0.4440436363220215s]
112 | [2025-03-18 12:52:39,119] [process.py[line:108]] [INFO] [>>> audio_transfer get message:76]
113 | [2025-03-18 12:52:39,133] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:116]
114 | [2025-03-18 12:52:39,347] [process.py[line:108]] [INFO] [[76] -> chaofen cost:0.22693967819213867s]
115 | [2025-03-18 12:52:39,568] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:76, cost:0.4492220878601074s]
116 | [2025-03-18 12:52:39,586] [process.py[line:108]] [INFO] [>>> audio_transfer get message:80]
117 | [2025-03-18 12:52:39,601] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:120]
118 | [2025-03-18 12:52:39,801] [process.py[line:108]] [INFO] [[80] -> chaofen cost:0.21407222747802734s]
119 | [2025-03-18 12:52:40,024] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:80, cost:0.4377562999725342s]
120 | [2025-03-18 12:52:40,052] [process.py[line:108]] [INFO] [>>> audio_transfer get message:84]
121 | [2025-03-18 12:52:40,068] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:124]
122 | [2025-03-18 12:52:40,270] [process.py[line:108]] [INFO] [[84] -> chaofen cost:0.21637320518493652s]
123 | [2025-03-18 12:52:40,494] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:84, cost:0.44118523597717285s]
124 | [2025-03-18 12:52:40,513] [process.py[line:108]] [INFO] [>>> audio_transfer get message:88]
125 | [2025-03-18 12:52:40,527] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:128]
126 | [2025-03-18 12:52:40,731] [process.py[line:108]] [INFO] [[88] -> chaofen cost:0.2170412540435791s]
127 | [2025-03-18 12:52:40,951] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:88, cost:0.4383111000061035s]
128 | [2025-03-18 12:52:40,971] [process.py[line:108]] [INFO] [>>> audio_transfer get message:92]
129 | [2025-03-18 12:52:40,984] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:132]
130 | [2025-03-18 12:52:41,187] [process.py[line:108]] [INFO] [[92] -> chaofen cost:0.2148122787475586s]
131 | [2025-03-18 12:52:41,416] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:92, cost:0.4454326629638672s]
132 | [2025-03-18 12:52:41,439] [process.py[line:108]] [INFO] [>>> audio_transfer get message:96]
133 | [2025-03-18 12:52:41,451] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:136]
134 | [2025-03-18 12:52:41,663] [process.py[line:108]] [INFO] [[96] -> chaofen cost:0.222761869430542s]
135 | [2025-03-18 12:52:41,887] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:96, cost:0.4477369785308838s]
136 | [2025-03-18 12:52:41,906] [process.py[line:108]] [INFO] [>>> audio_transfer get message:100]
137 | [2025-03-18 12:52:41,920] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:140]
138 | [2025-03-18 12:52:42,123] [process.py[line:108]] [INFO] [[100] -> chaofen cost:0.21576929092407227s]
139 | [2025-03-18 12:52:42,359] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:100, cost:0.4525878429412842s]
140 | [2025-03-18 12:52:42,379] [process.py[line:108]] [INFO] [>>> audio_transfer get message:104]
141 | [2025-03-18 12:52:42,394] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:144]
142 | [2025-03-18 12:52:42,596] [process.py[line:108]] [INFO] [[104] -> chaofen cost:0.21553897857666016s]
143 | [2025-03-18 12:52:42,836] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:104, cost:0.45633435249328613s]
144 | [2025-03-18 12:52:42,855] [process.py[line:108]] [INFO] [>>> audio_transfer get message:108]
145 | [2025-03-18 12:52:42,870] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:148]
146 | [2025-03-18 12:52:42,873] [process.py[line:108]] [INFO] [append imgs over]
147 | [2025-03-18 12:52:42,879] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据结束]
148 | [2025-03-18 12:52:43,073] [process.py[line:108]] [INFO] [[108] -> chaofen cost:0.21662592887878418s]
149 | [2025-03-18 12:52:43,297] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:108, cost:0.4421381950378418s]
150 | [2025-03-18 12:52:43,318] [process.py[line:108]] [INFO] [>>> audio_transfer get message:112]
151 | [2025-03-18 12:52:43,332] [process.py[line:108]] [INFO] [[1002]任务预处理进程结束]
152 | [2025-03-18 12:52:43,531] [process.py[line:108]] [INFO] [[112] -> chaofen cost:0.21228814125061035s]
153 | [2025-03-18 12:52:43,791] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:112, cost:0.47336626052856445s]
154 | [2025-03-18 12:52:43,811] [process.py[line:108]] [INFO] [>>> audio_transfer get message:116]
155 | [2025-03-18 12:52:44,034] [process.py[line:108]] [INFO] [[116] -> chaofen cost:0.2223985195159912s]
156 | [2025-03-18 12:52:44,262] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:116, cost:0.4509873390197754s]
157 | [2025-03-18 12:52:44,281] [process.py[line:108]] [INFO] [>>> audio_transfer get message:120]
158 | [2025-03-18 12:52:44,499] [process.py[line:108]] [INFO] [[120] -> chaofen cost:0.21637916564941406s]
159 | [2025-03-18 12:52:44,742] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:120, cost:0.46120476722717285s]
160 | [2025-03-18 12:52:44,762] [process.py[line:108]] [INFO] [>>> audio_transfer get message:124]
161 | [2025-03-18 12:52:44,981] [process.py[line:108]] [INFO] [[124] -> chaofen cost:0.21886157989501953s]
162 | [2025-03-18 12:52:45,240] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:124, cost:0.4781684875488281s]
163 | [2025-03-18 12:52:45,258] [process.py[line:108]] [INFO] [>>> audio_transfer get message:128]
164 | [2025-03-18 12:52:45,474] [process.py[line:108]] [INFO] [[128] -> chaofen cost:0.21480226516723633s]
165 | [2025-03-18 12:52:45,708] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:128, cost:0.44920992851257324s]
166 | [2025-03-18 12:52:45,726] [process.py[line:108]] [INFO] [>>> audio_transfer get message:132]
167 | [2025-03-18 12:52:45,943] [process.py[line:108]] [INFO] [[132] -> chaofen cost:0.21567535400390625s]
168 | [2025-03-18 12:52:46,181] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:132, cost:0.45519399642944336s]
169 | [2025-03-18 12:52:46,200] [process.py[line:108]] [INFO] [>>> audio_transfer get message:136]
170 | [2025-03-18 12:52:46,418] [process.py[line:108]] [INFO] [[136] -> chaofen cost:0.21763992309570312s]
171 | [2025-03-18 12:52:46,662] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:136, cost:0.4619452953338623s]
172 | [2025-03-18 12:52:46,681] [process.py[line:108]] [INFO] [>>> audio_transfer get message:140]
173 | [2025-03-18 12:52:46,900] [process.py[line:108]] [INFO] [[140] -> chaofen cost:0.21794748306274414s]
174 | [2025-03-18 12:52:47,146] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:140, cost:0.4646177291870117s]
175 | [2025-03-18 12:52:47,166] [process.py[line:108]] [INFO] [>>> audio_transfer get message:144]
176 | [2025-03-18 12:52:47,382] [process.py[line:108]] [INFO] [[144] -> chaofen cost:0.21491503715515137s]
177 | [2025-03-18 12:52:47,619] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:144, cost:0.4536001682281494s]
178 | [2025-03-18 12:52:47,639] [process.py[line:108]] [INFO] [>>> audio_transfer get message:148]
179 | [2025-03-18 12:52:47,857] [process.py[line:108]] [INFO] [[148] -> chaofen cost:0.21780657768249512s]
180 | [2025-03-18 12:52:48,098] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:148, cost:0.459348201751709s]
181 | [2025-03-18 12:52:48,104] [process.py[line:108]] [INFO] [>>> audio_transfer get exception msg:-1]
182 | [2025-03-18 12:52:48,105] [process.py[line:108]] [INFO] [[1002]任务数字人图片处理已完成]
183 | [2025-03-18 12:52:48,146] [run.py[line:43]] [INFO] [Custom VideoWriter [1002]视频帧队列处理已结束]
184 | [2025-03-18 12:52:48,151] [run.py[line:46]] [INFO] [Custom VideoWriter Silence Video saved in /mnt/nfs/bj4-v100-23/data1/yubosun/git_proj/heygem/heygem_ori_so/1002-t.mp4]
185 | [2025-03-18 12:52:48,155] [run.py[line:118]] [INFO] [Custom command:ffmpeg -loglevel warning -y -i ./example/audio.wav -i ./1002-t.mp4 -c:a aac -c:v libx264 -crf 15 -strict -2 ./1002-r.mp4]
186 | [2025-03-18 12:53:06,908] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:59.3451771736145 ]
187 |
--------------------------------------------------------------------------------
/model_lib/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_wrapper import ONNXModel
2 | from .model_base import ModelBase
3 |
4 |
5 |
--------------------------------------------------------------------------------
/model_lib/base_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/8/26
3 |
4 |
5 | from .onnx_model import ONNXModel
6 |
7 |
--------------------------------------------------------------------------------
/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/model_lib/model_base.py:
--------------------------------------------------------------------------------
1 | # -- coding: utf-8 --
2 | # @Time : 2022/7/29
3 |
4 |
5 |
6 | from .base_wrapper import ONNXModel
7 | from pathlib import Path
8 |
9 |
10 | try:
11 | from .base_wrapper import TRTWrapper, TRTWrapperSelf
12 | except:
13 | pass
14 |
15 |
16 | # from cv2box.utils import try_import
17 |
18 | class ModelBase:
19 | def __init__(self, model_info, provider):
20 | self.model_path = model_info['model_path']
21 |
22 | if 'input_dynamic_shape' in model_info.keys():
23 | self.input_dynamic_shape = model_info['input_dynamic_shape']
24 | else:
25 | self.input_dynamic_shape = None
26 |
27 | if 'picklable' in model_info.keys():
28 | picklable = model_info['picklable']
29 | else:
30 | picklable = False
31 |
32 | if 'trt_wrapper_self' in model_info.keys():
33 | TRTWrapper = TRTWrapperSelf
34 |
35 | # init model
36 | if Path(self.model_path).suffix == '.engine':
37 | self.model_type = 'trt'
38 | self.model = TRTWrapper(self.model_path)
39 | elif Path(self.model_path).suffix == '.tjm':
40 | self.model_type = 'tjm'
41 | self.model = TJMWrapper(self.model_path, provider=provider)
42 | elif Path(self.model_path).suffix in ['.onnx', '.bin']:
43 | self.model_type = 'onnx'
44 | if not picklable:
45 | if 'encrypt' in model_info.keys():
46 | self.model_path = load_encrypt_model(self.model_path, key=model_info['encrypt'])
47 | self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape)
48 | else:
49 | self.model = OnnxModelPickable(self.model_path, provider=provider, )
50 | else:
51 | raise 'check model suffix , support engine/tjm/onnx now.'
52 |
--------------------------------------------------------------------------------
/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cv2box==0.5.9
2 | apstone==0.0.8
3 | appdirs==1.4.4
4 | audioread==2.1.9
5 | typeguard==2.13.3
6 | cffi==1.15.0
7 | charset-normalizer==2.0.12
8 | click==8.1.3
9 | colorama==0.4.4
10 | cycler==0.11.0
11 | decorator==5.1.1
12 | filelock==3.7.1
13 | flatbuffers==2.0
14 | fonttools==4.36.0
15 | freetype-py==2.3.0
16 | huggingface-hub==0.0.8
17 | idna==3.3
18 | imageio==2.19.3
19 | importlib-metadata==4.11.4
20 | joblib==1.1.0
21 | kiwisolver==1.4.4
22 | kornia==0.6.6
23 | librosa==0.8.1
24 | matplotlib==3.5.3
25 | networkx==2.6.3
26 | numba==0.55.2
27 | numexpr==2.8.6
28 | numpy==1.21.6
29 | onnxruntime-gpu==1.9.0
30 | opencv-python==4.7.0.72
31 | packaging==21.3
32 | pillow==9.1.1
33 | pooch==1.6.0
34 | protobuf==4.21.5
35 | psutil==5.9.1
36 | pycparser==2.21
37 | pyglet==1.5.26
38 | pyopengl==3.1.0
39 | pyparsing==3.0.9
40 | pyrender==0.1.45
41 | python-dateutil==2.8.2
42 | pywavelets==1.3.0
43 | pyyaml==6.0
44 | regex==2022.6.2
45 | requests==2.27.1
46 | resampy==0.2.2
47 | sacremoses==0.0.53
48 | scikit-image==0.19.3
49 | scikit-learn==1.0.2
50 | scipy==1.7.1
51 | six==1.16.0
52 | soundfile==0.10.3.post1
53 | threadpoolctl==3.1.0
54 | tifffile==2021.11.2
55 | tokenizers==0.10.3
56 | torch==1.11.0+cu113
57 | torchaudio==0.11.0+cu113
58 | torchvision==0.12.0+cu113
59 | tqdm==4.64.0
60 | transformers==4.6.1
61 | trimesh==3.12.7
62 | typeguard==2.13.3
63 | typing-extensions==4.2.0
64 | urllib3==1.26.9
65 | zipp==3.8.0
66 |
--------------------------------------------------------------------------------
/requirements_0.txt:
--------------------------------------------------------------------------------
1 | aiofiles==23.2.1
2 | annotated-types==0.7.0
3 | anyio==4.5.2
4 | apstone==0.0.8
5 | audioread==3.0.1
6 | blinker==1.8.2
7 | certifi==2025.1.31
8 | cffi==1.17.1
9 | charset-normalizer==3.4.1
10 | click==8.1.8
11 | coloredlogs==15.0.1
12 | contourpy==1.1.1
13 | cv2box==0.5.9
14 | cycler==0.12.1
15 | decorator==5.2.1
16 | einops==0.8.1
17 | exceptiongroup==1.2.2
18 | fastapi==0.115.11
19 | ffmpy==0.5.0
20 | filelock==3.16.1
21 | Flask==3.0.3
22 | flatbuffers==25.2.10
23 | fonttools==4.56.0
24 | fsspec==2025.3.0
25 | gradio==4.44.1
26 | gradio_client==1.3.0
27 | h11==0.14.0
28 | httpcore==1.0.7
29 | httpx==0.28.1
30 | huggingface-hub==0.29.3
31 | humanfriendly==10.0
32 | idna==3.10
33 | imageio==2.35.1
34 | importlib_metadata==8.5.0
35 | importlib_resources==6.4.5
36 | itsdangerous==2.2.0
37 | Jinja2==3.1.6
38 | joblib==1.4.2
39 | kiwisolver==1.4.7
40 | lazy_loader==0.4
41 | librosa==0.11.0
42 | llvmlite==0.41.1
43 | markdown-it-py==3.0.0
44 | MarkupSafe==2.1.5
45 | matplotlib==3.7.5
46 | mdurl==0.1.2
47 | mpmath==1.3.0
48 | msgpack==1.1.0
49 | networkx==3.1
50 | numba==0.58.1
51 | numexpr==2.8.6
52 | numpy==1.24.4
53 | onnxruntime-gpu==1.16.0
54 | opencv-python==4.11.0.86
55 | orjson==3.10.15
56 | packaging==24.2
57 | pandas==2.0.3
58 | pillow==10.4.0
59 | platformdirs==4.3.6
60 | pooch==1.8.2
61 | protobuf==5.29.4
62 | pycparser==2.22
63 | pydantic==2.10.6
64 | pydantic_core==2.27.2
65 | pydub==0.25.1
66 | Pygments==2.19.1
67 | pyparsing==3.1.4
68 | python-dateutil==2.9.0.post0
69 | python-multipart==0.0.20
70 | pytz==2025.1
71 | PyWavelets==1.4.1
72 | PyYAML==6.0.2
73 | requests==2.32.3
74 | rich==13.9.4
75 | ruff==0.11.1
76 | scikit-image==0.21.0
77 | scikit-learn==1.3.2
78 | scipy==1.10.1
79 | semantic-version==2.10.0
80 | shellingham==1.5.4
81 | six==1.17.0
82 | sniffio==1.3.1
83 | soundfile==0.13.1
84 | soxr==0.3.7
85 | spark-parser==1.8.9
86 | starlette==0.44.0
87 | sympy==1.13.3
88 | threadpoolctl==3.5.0
89 | tifffile==2023.7.10
90 | tomlkit==0.12.0
91 | torch==1.11.0+cu113
92 | torchaudio==0.11.0+cu113
93 | torchvision==0.12.0+cu113
94 | tqdm==4.67.1
95 | typeguard==2.13.3
96 | typer==0.15.2
97 | typing_extensions==4.12.2
98 | tzdata==2025.1
99 | urllib3==2.2.3
100 | uvicorn==0.33.0
101 | websockets==12.0
102 | Werkzeug==3.0.6
103 | xdis==6.1.3
104 | zipp==3.20.2
105 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import gc
3 | import json
4 | import os
5 | import subprocess
6 | import sys
7 | import threading
8 | import time
9 | import traceback
10 | import uuid
11 | from enum import Enum
12 |
13 | import queue
14 | import cv2
15 | from flask import Flask, request
16 |
17 | if sys.version_info.major != 3 or sys.version_info.minor != 8:
18 | print("请使用 Python 3.8 版本运行此脚本")
19 | sys.exit(1)
20 |
21 | import service.trans_dh_service
22 |
23 | from h_utils.custom import CustomError
24 | from y_utils.config import GlobalConfig
25 | from y_utils.logger import logger
26 |
27 |
28 | def get_args():
29 | parser = argparse.ArgumentParser(
30 | formatter_class=(argparse.ArgumentDefaultsHelpFormatter)
31 | )
32 |
33 | parser.add_argument(
34 | "--audio_path",
35 | type=str,
36 | default="example/audio.wav",
37 | help="path to local audio file",
38 | )
39 | parser.add_argument(
40 | "--video_path",
41 | type=str,
42 | default="example/video.mp4",
43 | help="path to local video file",
44 | )
45 | opt = parser.parse_args()
46 | return opt
47 |
48 |
49 | def write_video(
50 | output_imgs_queue,
51 | temp_dir,
52 | result_dir,
53 | work_id,
54 | audio_path,
55 | result_queue,
56 | width,
57 | height,
58 | fps,
59 | watermark_switch=0,
60 | digital_auth=0,
61 | ):
62 | output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
63 | fourcc = cv2.VideoWriter_fourcc(*"mp4v")
64 | result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
65 | video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
66 | print("Custom VideoWriter init done")
67 | try:
68 | while True:
69 | state, reason, value_ = output_imgs_queue.get()
70 | if type(state) == bool and state == True:
71 | logger.info(
72 | "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
73 | )
74 | logger.info(
75 | "Custom VideoWriter Silence Video saved in {}".format(
76 | os.path.realpath(output_mp4)
77 | )
78 | )
79 | video_write.release()
80 | break
81 | else:
82 | if type(state) == bool and state == False:
83 | logger.error(
84 | "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
85 | work_id, reason
86 | )
87 | )
88 | raise CustomError(reason)
89 | for result_img in value_:
90 | video_write.write(result_img)
91 | if video_write is not None:
92 | video_write.release()
93 | if watermark_switch == 1 and digital_auth == 1:
94 | logger.info(
95 | "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
96 | )
97 | if width > height:
98 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
99 | audio_path,
100 | output_mp4,
101 | GlobalConfig.instance().watermark_path,
102 | GlobalConfig.instance().digital_auth_path,
103 | result_path,
104 | )
105 | logger.info("command:{}".format(command))
106 | else:
107 | command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
108 | audio_path,
109 | output_mp4,
110 | GlobalConfig.instance().watermark_path,
111 | GlobalConfig.instance().digital_auth_path,
112 | result_path,
113 | )
114 | logger.info("command:{}".format(command))
115 | elif watermark_switch == 1 and digital_auth == 0:
116 | logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
117 | command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
118 | audio_path,
119 | output_mp4,
120 | GlobalConfig.instance().watermark_path,
121 | result_path,
122 | )
123 | logger.info("command:{}".format(command))
124 | elif watermark_switch == 0 and digital_auth == 1:
125 | logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
126 | if width > height:
127 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
128 | audio_path,
129 | output_mp4,
130 | GlobalConfig.instance().digital_auth_path,
131 | result_path,
132 | )
133 | logger.info("command:{}".format(command))
134 | else:
135 | command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
136 | audio_path,
137 | output_mp4,
138 | GlobalConfig.instance().digital_auth_path,
139 | result_path,
140 | )
141 | logger.info("command:{}".format(command))
142 | else:
143 | command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
144 | audio_path, output_mp4, result_path
145 | )
146 | logger.info("Custom command:{}".format(command))
147 | subprocess.call(command, shell=True)
148 | print("###### Custom Video Writer write over")
149 | print(f"###### Video result saved in {os.path.realpath(result_path)}")
150 | exit(0)
151 | result_queue.put([True, result_path])
152 | except Exception as e:
153 | logger.error(
154 | "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format(
155 | work_id, e.__str__()
156 | )
157 | )
158 | result_queue.put(
159 | [
160 | False,
161 | "[{}]视频帧队列处理异常结束,异常原因:[{}]".format(
162 | work_id, e.__str__()
163 | ),
164 | ]
165 | )
166 | logger.info("Custom VideoWriter 后处理进程结束")
167 |
168 |
169 | service.trans_dh_service.write_video = write_video
170 |
171 |
172 | def main():
173 | opt = get_args()
174 | if not os.path.exists(opt.audio_path):
175 | audio_url = "example/audio.wav"
176 | else:
177 | audio_url = opt.audio_path
178 |
179 | if not os.path.exists(opt.video_path):
180 | video_url = "example/video.mp4"
181 | else:
182 | video_url = opt.video_path
183 | sys.argv = [sys.argv[0]]
184 | task = service.trans_dh_service.TransDhTask()
185 | time.sleep(10) # somehow, this works...
186 |
187 | code = "1004"
188 | task.work(audio_url, video_url, code, 0, 0, 0, 0)
189 |
190 |
191 | if __name__ == "__main__":
192 | main()
193 |
194 | # python run.py
195 | # python run.py --audio_path example/audio.wav --video_path example/video.mp4
196 |
--------------------------------------------------------------------------------
/service/__init__.py:
--------------------------------------------------------------------------------
1 | #!/user/bin/env python
2 | # coding=utf-8
3 | """
4 | @project : face2face_train
5 | @author : huyi
6 | @file : __init__.py.py
7 | @ide : PyCharm
8 | @time : 2023-12-06 14:46:40
9 | """
10 |
--------------------------------------------------------------------------------
/service/server.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/service/server.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/sources.list:
--------------------------------------------------------------------------------
1 | # 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
2 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
3 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
4 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
5 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
6 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
7 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
8 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
9 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
10 |
11 | # 预发布软件源,不建议启用
12 | # deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
13 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
14 |
--------------------------------------------------------------------------------
/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml:
--------------------------------------------------------------------------------
1 | # network architecture
2 | # encoder related
3 | encoder: conformer
4 | encoder_conf:
5 | output_size: 256 # dimension of attention
6 | attention_heads: 4
7 | linear_units: 2048 # the number of units of position-wise feed forward
8 | num_blocks: 12 # the number of encoder blocks
9 | dropout_rate: 0.1
10 | positional_dropout_rate: 0.1
11 | attention_dropout_rate: 0.0
12 | input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
13 | normalize_before: true
14 | cnn_module_kernel: 15
15 | use_cnn_module: True
16 | activation_type: 'swish'
17 | pos_enc_layer_type: 'rel_pos'
18 | selfattention_layer_type: 'rel_selfattn'
19 |
20 | # decoder related
21 | decoder: transformer
22 | decoder_conf:
23 | attention_heads: 4
24 | linear_units: 2048
25 | num_blocks: 6
26 | dropout_rate: 0.1
27 | positional_dropout_rate: 0.1
28 | self_attention_dropout_rate: 0.0
29 | src_attention_dropout_rate: 0.0
30 |
31 | # hybrid CTC/attention
32 | model_conf:
33 | ctc_weight: 0.3
34 | lsm_weight: 0.1 # label smoothing option
35 | length_normalized_loss: false
36 |
37 | # use raw_wav or kaldi feature
38 | raw_wav: false
39 |
40 | # feature extraction
41 | collate_conf:
42 | # waveform level config
43 | wav_distortion_conf:
44 | wav_dither: 0.1
45 | wav_distortion_rate: 0.0
46 | distortion_methods: []
47 | speed_perturb: true
48 | feature_extraction_conf:
49 | feature_type: 'fbank'
50 | mel_bins: 80
51 | frame_shift: 10
52 | frame_length: 25
53 | using_pitch: false
54 | # spec level config
55 | # spec_swap: false
56 | feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature
57 | spec_aug: true
58 | spec_aug_conf:
59 | warp_for_time: False
60 | num_t_mask: 2
61 | num_f_mask: 2
62 | max_t: 50
63 | max_f: 10
64 | max_w: 80
65 |
66 |
67 | # dataset related
68 | dataset_conf:
69 | max_length: 1300 #40960
70 | min_length: 0
71 | batch_type: 'static' # static or dynamic
72 | batch_size: 40
73 | sort: true
74 |
75 | grad_clip: 5
76 | accum_grad: 4
77 | max_epoch: 240
78 | log_interval: 100
79 |
80 | optim: adam
81 | optim_conf:
82 | lr: 0.0025 #0.0025
83 | scheduler: warmuplr # pytorch v1.1.0+ required
84 | scheduler_conf:
85 | warmup_steps: 100000
86 |
--------------------------------------------------------------------------------
/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml:
--------------------------------------------------------------------------------
1 | # network architecture
2 | # encoder related
3 | encoder: conformer
4 | encoder_conf:
5 | output_size: 256 # dimension of attention
6 | attention_heads: 4
7 | linear_units: 1024 # the number of units of position-wise feed forward
8 | num_blocks: 6 # the number of encoder blocks
9 | dropout_rate: 0.1
10 | positional_dropout_rate: 0.1
11 | attention_dropout_rate: 0.0
12 | input_layer: linear # encoder input type, you can chose linear,conv2d, conv2d6 and conv2d8
13 | normalize_before: true
14 | cnn_module_kernel: 15
15 | use_cnn_module: True
16 | activation_type: 'swish'
17 | pos_enc_layer_type: 'rel_pos'
18 | selfattention_layer_type: 'rel_selfattn'
19 |
20 | # decoder related
21 | decoder: transformer
22 | decoder_conf:
23 | attention_heads: 4
24 | linear_units: 1024
25 | num_blocks: 3
26 | dropout_rate: 0.1
27 | positional_dropout_rate: 0.1
28 | self_attention_dropout_rate: 0.0
29 | src_attention_dropout_rate: 0.0
30 |
31 | # hybrid CTC/attention
32 | model_conf:
33 | ctc_weight: 0.3
34 | lsm_weight: 0.1 # label smoothing option
35 | length_normalized_loss: false
36 |
37 | # use raw_wav or kaldi feature
38 | raw_wav: false
39 |
40 | # feature extraction
41 | collate_conf:
42 | # waveform level config
43 | wav_distortion_conf:
44 | wav_dither: 0.1
45 | wav_distortion_rate: 0.0
46 | distortion_methods: []
47 | speed_perturb: true
48 | feature_extraction_conf:
49 | feature_type: 'fbank'
50 | mel_bins: 80
51 | frame_shift: 10
52 | frame_length: 25
53 | using_pitch: false
54 | # spec level config
55 | # spec_swap: false
56 | feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature
57 | spec_aug: true
58 | spec_aug_conf:
59 | warp_for_time: False
60 | num_t_mask: 2
61 | num_f_mask: 2
62 | max_t: 50
63 | max_f: 10
64 | max_w: 80
65 |
66 |
67 | # dataset related
68 | dataset_conf:
69 | max_length: 1300 #40960
70 | min_length: 0
71 | batch_type: 'static' # static or dynamic
72 | batch_size: 40
73 | sort: true
74 |
75 | grad_clip: 5
76 | accum_grad: 4
77 | max_epoch: 240
78 | log_interval: 100
79 |
80 | optim: adam
81 | optim_conf:
82 | lr: 0.002
83 | scheduler: warmuplr # pytorch v1.1.0+ required
84 | scheduler_conf:
85 | warmup_steps: 50000
86 |
--------------------------------------------------------------------------------
/wenet/tools/_extract_feats.py:
--------------------------------------------------------------------------------
1 | import librosa
2 | # import tensorflow as tf
3 | import numpy as np
4 | from scipy.io import wavfile
5 | from scipy import signal
6 |
7 | import torchaudio.compliance.kaldi as kaldi
8 | import torchaudio
9 | # torchaudio.set_audio_backend("sox_io")
10 |
11 |
12 | def _extract_feature(wav_path):
13 | """ Extract acoustic fbank feature from origin waveform.
14 |
15 | Speed perturbation and wave amplitude distortion is optional.
16 |
17 | Args:
18 | batch: a list of tuple (wav id , wave path).
19 | speed_perturb: bool, whether or not to use speed pertubation.
20 | wav_distortion_conf: a dict , the config of wave amplitude distortion.
21 | feature_extraction_conf:a dict , the config of fbank extraction.
22 |
23 | Returns:
24 | (keys, feats, labels)
25 | """
26 | waveform, sample_rate = torchaudio.load_wav(wav_path)
27 |
28 | mat = kaldi.fbank(
29 | waveform,
30 | num_mel_bins=80,
31 | frame_length=25,
32 | frame_shift=10,
33 | dither=0.1,
34 | energy_floor=0.0,
35 | sample_frequency=sample_rate)
36 | mat = mat.detach().numpy()
37 |
38 | return mat
39 |
40 | def _extract_feature_norm(wav_path):
41 | """ Extract acoustic fbank feature from origin waveform.
42 |
43 | Speed perturbation and wave amplitude distortion is optional.
44 |
45 | Args:
46 | batch: a list of tuple (wav id , wave path).
47 | speed_perturb: bool, whether or not to use speed pertubation.
48 | wav_distortion_conf: a dict , the config of wave amplitude distortion.
49 | feature_extraction_conf:a dict , the config of fbank extraction.
50 |
51 | Returns:
52 | (keys, feats, labels)
53 | """
54 |
55 | waveform, sample_rate = torchaudio.load_wav(wav_path)
56 |
57 | mat = kaldi.fbank(
58 | waveform,
59 | num_mel_bins=80,
60 | frame_length=25,
61 | frame_shift=10,
62 | dither=0.1,
63 | energy_floor=0.0,
64 | sample_frequency=sample_rate)
65 | mat = mat.detach().numpy()
66 |
67 | return mat
68 |
69 |
70 | hparams = {
71 | 'sample_rate': 16000,#一秒16000个采样点
72 | 'preemphasis': 0.97,
73 | 'n_fft': 1024,
74 | 'hop_length': 200,#80个采样点为帧移动步长 5ms
75 | 'win_length': 800,#400个采样点为帧宽度,25ms
76 | 'num_mels': 80,
77 | 'n_mfcc': 13,
78 | 'window': 'hann',
79 | 'fmin': 0.,
80 | 'fmax': 8000.,
81 | 'ref_db': 20, #
82 | 'min_db': -80.0, # restrict the dynamic range of log power
83 | 'iterations': 100, # griffin_lim #iterations
84 | 'silence_db': -28.0,
85 | 'center': True,#是否将MFCC作为当前帧中间向量的结果。(数个向量作为一帧生成一个mfcc)
86 | }
87 |
88 | _mel_basis = None
89 |
90 |
91 | def load_wav(wav_f, sr=None):
92 | # wav_arr, _ = librosa.load(wav_f, sr=sr)
93 | # return wav_arr
94 | if type(wav_f)==str:
95 | wav_arr, _ = librosa.load(wav_f, sr=sr)
96 | else:
97 | wav_arr = wav_f
98 | return wav_arr
99 |
100 | def write_wav(write_path, wav_arr, sr):
101 | wav_arr *= 32767 / max(0.01, np.max(np.abs(wav_arr)))
102 | wavfile.write(write_path, sr, wav_arr.astype(np.int16))
103 | return
104 |
105 | def preempahsis(wav_arr, pre_param=hparams['preemphasis']):
106 | return signal.lfilter([1, -pre_param], [1], wav_arr)
107 |
108 | def deemphasis(wav_arr, pre_param=hparams['preemphasis']):
109 | return signal.lfilter([1], [1, -pre_param], wav_arr)
110 |
111 | def split_wav(wav_arr, top_db=-hparams['silence_db']):
112 | intervals = librosa.effects.split(wav_arr, top_db=top_db)
113 | return intervals
114 |
115 | def mulaw_encode(wav_arr, quantization_channels):
116 | mu = float(quantization_channels - 1)
117 | safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0)
118 | encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu)
119 | return encoded
120 |
121 | def mulaw_encode_quantize(wav_arr, quantization_channels):
122 | mu = float(quantization_channels - 1)
123 | safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0)
124 | encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu)
125 | return ((encoded + 1.) / 2 * mu + 0.5).astype(np.int32)
126 |
127 | def mulaw_decode(encoded, quantization_channels):
128 | mu = float(quantization_channels - 1)
129 | magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.)
130 | return np.sign(encoded) * magnitude
131 |
132 | def mulaw_decode_quantize(encoded, quantization_channels):
133 | mu = float(quantization_channels - 1)
134 | signal = 2 * (encoded.astype(np.float32) / mu) - 1.
135 | magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.)
136 | return np.sign(signal) * magnitude
137 |
138 | def mulaw_encode_quantize_tf(wav_batch, quantization_channels):
139 | with tf.variable_scope('mulaw_encode'):
140 | mu = tf.cast(quantization_channels - 1, tf.float32)
141 | safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0)
142 | encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu)
143 | return tf.cast((encoded + 1.) / 2 * mu + 0.5, tf.int32)
144 |
145 | # def mulaw_encode_tf(wav_batch, quantization_channels):
146 | # with tf.variable_scope('mulaw_encode'):
147 | # mu = tf.cast(quantization_channels - 1, tf.float32)
148 | # safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0)
149 | # encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu)
150 | # return encoded
151 |
152 | # def mulaw_decode_quantize_tf(encoded, quantization_channels):
153 | with tf.variable_scope('mulaw_decode'):
154 | mu = tf.cast(quantization_channels - 1, tf.float32)
155 | signal = 2 * (tf.cast(encoded, tf.float32) / mu) - 1.
156 | magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.)
157 | return tf.sign(signal) * magnitude
158 |
159 | # def mulaw_decode_tf(encoded, quantization_channels):
160 | with tf.variable_scope('mulaw_decode'):
161 | mu = tf.cast(quantization_channels - 1, tf.float32)
162 | magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.)
163 | return tf.sign(encoded) * magnitude
164 |
165 | def stft(wav_arr, n_fft=hparams['n_fft'],#短时傅里叶变化
166 | hop_len=hparams['hop_length'],
167 | win_len=hparams['win_length'],
168 | window=hparams['window'],
169 | center=hparams['center']):
170 | # return shape: [n_freqs, time]
171 | return librosa.core.stft(wav_arr, n_fft=n_fft, hop_length=hop_len,
172 | win_length=win_len, window=window, center=center)
173 |
174 | # def stft_tf(wav_arr, n_fft=hparams['n_fft'],
175 | # hop_len=hparams['hop_length'],
176 | # win_len=hparams['win_length'],
177 | # window=hparams['window']):
178 | # window_f = {'hann': tf.contrib.signal.hann_window,
179 | # 'hamming': tf.contrib.signal.hamming_window}[window]
180 | # # returned value is of shape [..., frames, fft_bins] and complex64 value
181 | # return tf.contrib.signal.stft(signals=wav_arr, frame_length=win_len,
182 | # frame_step=hop_len, fft_length=n_fft,
183 | # window_fn=window_f)
184 |
185 | def istft(stft_matrix, hop_len=hparams['hop_length'],
186 | win_len=hparams['win_length'], window=hparams['window']):
187 | # stft_matrix should be complex stft results instead of magnitude spectrogram
188 | # or power spectrogram, and of shape [n_freqs, time]
189 | return librosa.core.istft(stft_matrix, hop_length=hop_len,
190 | win_length=win_len, window=window)
191 |
192 | # def istft_tf(stft_matrix, hop_len=hparams['hop_length'], n_fft=hparams['n_fft'],
193 | # win_len=hparams['win_length'], window=hparams['window']):
194 | # window_f = {'hann': tf.contrib.signal.hann_window,
195 | # 'hamming': tf.contrib.signal.hamming_window}[window]
196 | # # stft_matrix should be of shape [..., frames, fft_bins]
197 | # return tf.contrib.signal.inverse_stft(stft_matrix, frame_length=win_len,
198 | # frame_step=hop_len, fft_length=n_fft,
199 | # window_fn=window_f)
200 |
201 | def spectrogram(wav_arr, n_fft=hparams['n_fft'],
202 | hop_len=hparams['hop_length'],
203 | win_len=hparams['win_length'],
204 | window=hparams['window'],
205 | center=hparams['center']):
206 | # return shape: [time, n_freqs]
207 | s = stft(wav_arr, n_fft=n_fft, hop_len=hop_len,
208 | win_len=win_len, window=window, center=center).T
209 | magnitude = np.abs(s) #幅度谱
210 | power = magnitude ** 2 #能量谱 #经过短时傅里叶变换得到magnitude(?)和其平方 为什么不是快速傅里叶变化
211 | return {'magnitude': magnitude,
212 | 'power': power,
213 | 'stft':s.T}
214 |
215 | def power_spec2mel(power_spec, sr=hparams['sample_rate'], n_fft=hparams['n_fft'],
216 | num_mels=hparams['num_mels'], fmin=hparams['fmin'], fmax=hparams['fmax']):
217 | # power_spec should be of shape [time, 1+n_fft/2]
218 | power_spec_t = power_spec.T
219 | global _mel_basis
220 | _mel_basis = (librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
221 | if _mel_basis is None else _mel_basis) # [n_mels, 1+n_fft/2]
222 | mel_spec = np.dot(_mel_basis, power_spec_t) # [n_mels, time]
223 | return mel_spec.T # mel谱
224 |
225 | def wav2melspec(wav_arr, sr=hparams['sample_rate'], n_fft=hparams['n_fft'],
226 | hop_len=hparams['hop_length'], win_len=hparams['win_length'],
227 | window=hparams['window'], num_mels=hparams['num_mels'],
228 | fmin=hparams['fmin'], fmax=hparams['fmax']):
229 | power_spec = spectrogram(wav_arr, n_fft, hop_len, win_len, window)['power']
230 | melspec = power_spec2mel(power_spec.T, sr, n_fft, num_mels, fmin, fmax)
231 | return melspec # [time, num_mels]
232 |
233 | def wav2mfcc(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],
234 | n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
235 | win_len=hparams['win_length'], window=hparams['window'],
236 | num_mels=hparams['num_mels'], fmin=0.0,
237 | fmax=None, ref_db=hparams['ref_db']):
238 | from scipy.fftpack import dct
239 | print("wav_arr1:",wav_arr.shape)
240 | wav_arr = preempahsis(wav_arr)
241 | print("wav_arr2:",wav_arr.shape)
242 |
243 | mag_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
244 | win_len=win_len, window=window)['magnitude']
245 | mel_spec = power_spec2mel(mag_spec, sr=sr, n_fft=n_fft, num_mels=num_mels,
246 | fmin=fmin, fmax=fmax)
247 | # log_melspec = power2db(mel_spec, ref_db=ref_db)
248 | log_melspec = librosa.amplitude_to_db(mel_spec)
249 | mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc]
250 | # mfcc = np.dot(librosa.filters.dct(n_mfcc, log_melspec.shape[1]), log_melspec.T)
251 | deltas = librosa.feature.delta(mfcc)
252 | delta_deltas = librosa.feature.delta(mfcc, order=2)
253 | mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0)
254 |
255 | return mfcc_feature.T
256 |
257 | def wav2mfcc_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],#使用这个
258 | n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
259 | win_len=hparams['win_length'], window=hparams['window'],
260 | num_mels=hparams['num_mels'], fmin=0.0,
261 | fmax=None, ref_db=hparams['ref_db'],
262 | center=hparams['center']):
263 | from scipy.fftpack import dct
264 | wav_arr = preempahsis(wav_arr)
265 | #经过一次滤波
266 | power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
267 | win_len=win_len, window=window, center=center)['power']
268 | mel_spec = power_spec2mel(power_spec, sr=sr, n_fft=n_fft, num_mels=num_mels,
269 | fmin=fmin, fmax=fmax) # mel谱
270 | log_melspec = power2db(mel_spec, ref_db=ref_db) #对数mel谱
271 |
272 |
273 | """下面是MFCC"""
274 | # mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc]
275 | # deltas = librosa.feature.delta(mfcc)
276 | # delta_deltas = librosa.feature.delta(mfcc, order=2)
277 | # mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0)
278 | # return mfcc_feature.T
279 | x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
280 | win_len=win_len, window=window, center=center)['stft']
281 | # print("log_melspec:", x_stft.shape)
282 | return log_melspec,x_stft
283 |
284 |
285 | def wav2linear_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'], # 使用这个
286 | n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
287 | win_len=hparams['win_length'], window=hparams['window'],
288 | num_mels=hparams['num_mels'], fmin=0.0,
289 | fmax=None, ref_db=hparams['ref_db'],
290 | center=hparams['center']):
291 | from scipy.fftpack import dct
292 | wav_arr = preempahsis(wav_arr)
293 | # 经过一次滤波
294 | power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
295 | win_len=win_len, window=window, center=center)['power']
296 | linear = _amp_to_db(power_spec, ref_db=ref_db) # 对数mel谱
297 | normalized_linear = _db_normalize(linear, min_db=hparams['min_db'])
298 | x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
299 | win_len=win_len, window=window, center=center)['stft']
300 |
301 |
302 | return normalized_linear,x_stft
303 |
304 | def _amp_to_db(x,ref_db=20):
305 | return 20 * np.log10(np.maximum(1e-5, x)) + ref_db
306 |
307 |
308 | def mel2log_mel(mel_spec, ref_db=hparams['ref_db'], min_db=hparams['min_db']):
309 | log_mel = power2db(mel_spec, ref_db)
310 | normalized = log_power_normalize(log_mel, min_db)
311 | return normalized
312 |
313 | def power2db(power_spec, ref_db=hparams['ref_db'], tol=1e-5):
314 | # power spectrogram is stft ** 2
315 | # returned value: (10. * log10(power_spec) - ref_db)
316 | return 10. * np.log10(power_spec + tol) - ref_db
317 |
318 | def db2power(power_db, ref_db=hparams['ref_db']):
319 | return np.power(10.0, 0.1 * (power_db + ref_db))
320 | #
321 | # def db2power_tf(power_db, ref_db=hparams['ref_db']):
322 | # return tf.pow(10.0, 0.1 * (power_db + ref_db))
323 |
324 | def log_power_normalize(log_power, min_db=hparams['min_db']):
325 | """
326 | :param log_power: in db, computed by power2db(spectrogram(wav_arr)['power'])
327 | :param min_db: minimum value of log_power in db
328 | :return: log_power normalized to [0., 1.]
329 | """
330 | assert min_db < 0. or "min_db should be a negative value like -80.0 or -100.0"
331 | return np.clip((log_power - min_db) / -min_db, 0., 1.)
332 |
333 | def log_power_denormalize(normalized_logpower, min_db=hparams['min_db']):
334 | return np.clip(normalized_logpower, 0., 1.) * -min_db + min_db
335 |
336 | # def log_power_denormalize_tf(normalized_logpower, min_db=hparams['min_db']):
337 | # return tf.clip_by_value(normalized_logpower, 0., 1.) * -min_db + min_db
338 |
339 | def griffin_lim(magnitude_spec, iterations=hparams['iterations']):
340 | """
341 | :param magnitude_spec: magnitude spectrogram of shape [time, n_freqs]
342 | obtained from spectrogram(wav_arr)['magnitude]
343 | :param iterations: number of iterations to estimate phase
344 | :return: waveform array
345 | """
346 | mag = magnitude_spec.T # transpose to [n_freqs, time]
347 | angles = np.exp(2j * np.pi * np.random.rand(*mag.shape))
348 | complex_mag = np.abs(mag).astype(np.complex)
349 | stft_0 = complex_mag * angles
350 | y = istft(stft_0)
351 | for i in range(iterations):
352 | angles = np.exp(1j * np.angle(stft(y)))
353 | y = istft(complex_mag * angles)
354 | return y
355 |
356 | # def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']):
357 | # # magnitude_spec: [frames, fft_bins], of type tf.float32
358 | # angles = tf.cast(
359 | # tf.exp(2j * np.pi * tf.cast(
360 | # tf.random_uniform(
361 | # tf.shape(magnitude_spec)),
362 | # dtype=tf.complex64)),
363 | # dtype=tf.complex64)
364 | # complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64)
365 | # stft_0 = complex_mag * angles
366 | # y = istft_tf(stft_0)
367 | # for i in range(iterations):
368 | # angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64))
369 | # y = istft_tf(complex_mag * angles)
370 | # return y
371 |
372 | def griffin_lim_test(wav_f, n_fft=hparams['n_fft'],
373 | hop_len=hparams['hop_length'],
374 | win_len=hparams['win_length'],
375 | window=hparams['window']):
376 | wav_arr = load_wav(wav_f)
377 | spec_dict = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
378 | win_len=win_len, window=window)
379 | mag_spec = spec_dict['magnitude']
380 | y = griffin_lim(mag_spec)
381 | write_wav('reconstructed1.wav', y, sr=16000)
382 |
383 | def stft2wav_test(stft_f, mean_f, std_f):
384 | spec = np.load(stft_f)
385 | mean = np.load(mean_f)
386 | std = np.load(std_f)
387 | spec = spec * std + mean
388 | spec = log_power_denormalize(spec)
389 | power_spec = db2power(spec)
390 | mag_spec = power_spec ** 0.5
391 | y = griffin_lim(mag_spec)
392 | y = deemphasis(y)
393 | write_wav('reconstructed2.wav', y, sr=16000)
394 | return y
395 | #
396 | # def stft2wav_tf_test(stft_f, mean_f, std_f):
397 | # # get inputs
398 | # spec = np.load(stft_f)
399 | # mean = np.load(mean_f)
400 | # std = np.load(std_f)
401 | # spec = spec * std + mean
402 | # # build graph
403 | # spec_pl = tf.placeholder(tf.float32, [None, None, 513])
404 | # denormalized = log_power_denormalize_tf(spec_pl)
405 | # mag_spec = tf.pow(db2power_tf(denormalized), 0.5)
406 | # wav = grinffin_lim_tf(mag_spec)
407 | # # set session and run
408 | # config = tf.ConfigProto()
409 | # config.gpu_options.allow_growth = True
410 | # sess = tf.Session(config=config)
411 | # wav_arr = sess.run(wav, feed_dict={spec_pl: np.expand_dims(spec, axis=0)})
412 | # sess.close()
413 | # y = deemphasis(np.squeeze(wav_arr))
414 | # write_wav('reconstructed_tf.wav', y, sr=16000)
415 | # return y
416 |
417 | # 超参数个数:1
418 | # return: db normalized to [0., 1.]
419 | def _db_normalize(db, min_db):
420 | return np.clip((db - min_db) / -min_db, 0., 1.)
421 |
422 |
423 |
424 |
425 | def mfcc_test():
426 | wav_f = './test.wav'
427 | wav_arr = load_wav(wav_f)
428 |
429 |
430 | mfcc = wav2mfcc_v2(wav_arr)
431 | mfcc1 = np.load('test.npy')
432 | print(mfcc.min(), mfcc1.min())
433 | print(mfcc.max(), mfcc1.max())
434 | print(mfcc.mean(), mfcc1.mean())
435 | print(np.abs(mfcc - mfcc1))
436 | print(np.mean(np.abs(mfcc - mfcc1)))
437 | import matplotlib.pyplot as plt
438 | plt.figure()
439 | plt.subplot(211)
440 | plt.imshow(mfcc.T, origin='lower')
441 | # plt.colorbar()
442 | plt.subplot(212)
443 | plt.imshow(mfcc1.T, origin='lower')
444 | # plt.colorbar()
445 | plt.tight_layout()
446 | plt.show()
447 | return
448 |
449 |
450 |
451 | if __name__ == '__main__':
452 | mfcc_test()
453 |
--------------------------------------------------------------------------------
/wenet/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/__init__.py
--------------------------------------------------------------------------------
/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/utils/cmvn.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import json
17 | import math
18 |
19 | import numpy as np
20 |
21 |
22 | def _load_json_cmvn(json_cmvn_file):
23 | """ Load the json format cmvn stats file and calculate cmvn
24 |
25 | Args:
26 | json_cmvn_file: cmvn stats file in json format
27 |
28 | Returns:
29 | a numpy array of [means, vars]
30 | """
31 | with open(json_cmvn_file) as f:
32 | cmvn_stats = json.load(f)
33 |
34 | means = cmvn_stats['mean_stat']
35 | variance = cmvn_stats['var_stat']
36 | count = cmvn_stats['frame_num']
37 | for i in range(len(means)):
38 | means[i] /= count
39 | variance[i] = variance[i] / count - means[i] * means[i]
40 | if variance[i] < 1.0e-20:
41 | variance[i] = 1.0e-20
42 | variance[i] = 1.0 / math.sqrt(variance[i])
43 | cmvn = np.array([means, variance])
44 | return cmvn
45 |
46 |
47 | def _load_kaldi_cmvn(kaldi_cmvn_file):
48 | """ Load the kaldi format cmvn stats file and calculate cmvn
49 |
50 | Args:
51 | kaldi_cmvn_file: kaldi text style global cmvn file, which
52 | is generated by:
53 | compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
54 |
55 | Returns:
56 | a numpy array of [means, vars]
57 | """
58 | means = []
59 | variance = []
60 | with open(kaldi_cmvn_file, 'r') as fid:
61 | # kaldi binary file start with '\0B'
62 | if fid.read(2) == '\0B':
63 | logging.error('kaldi cmvn binary file is not supported, please '
64 | 'recompute it by: compute-cmvn-stats --binary=false '
65 | ' scp:feats.scp global_cmvn')
66 | sys.exit(1)
67 | fid.seek(0)
68 | arr = fid.read().split()
69 | assert (arr[0] == '[')
70 | assert (arr[-2] == '0')
71 | assert (arr[-1] == ']')
72 | feat_dim = int((len(arr) - 2 - 2) / 2)
73 | for i in range(1, feat_dim + 1):
74 | means.append(float(arr[i]))
75 | count = float(arr[feat_dim + 1])
76 | for i in range(feat_dim + 2, 2 * feat_dim + 2):
77 | variance.append(float(arr[i]))
78 |
79 | for i in range(len(means)):
80 | means[i] /= count
81 | variance[i] = variance[i] / count - means[i] * means[i]
82 | if variance[i] < 1.0e-20:
83 | variance[i] = 1.0e-20
84 | variance[i] = 1.0 / math.sqrt(variance[i])
85 | cmvn = np.array([means, variance])
86 | return cmvn
87 |
88 |
89 | def load_cmvn(cmvn_file, is_json):
90 | if is_json:
91 | cmvn = _load_json_cmvn(cmvn_file)
92 | else:
93 | cmvn = _load_kaldi_cmvn(cmvn_file)
94 | return cmvn[0], cmvn[1]
95 |
--------------------------------------------------------------------------------
/wenet/utils/common.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/common.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/y_utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/user/bin/env python
2 | # coding=utf-8
3 | """
4 | @project : dhp-service
5 | @author : huyi
6 | @file : __init__.py.py
7 | @ide : PyCharm
8 | @time : 2021-08-18 16:29:13
9 | """
--------------------------------------------------------------------------------
/y_utils/config.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/config.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/y_utils/lcr.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/lcr.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/y_utils/liblcr.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/liblcr.so
--------------------------------------------------------------------------------
/y_utils/logger.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/logger.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/y_utils/md5.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/md5.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/y_utils/tools.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Holasyb918/HeyGem-Linux-Python-Hack/69c0cff5794c92b68cfea2aef60928055dd43d6c/y_utils/tools.cpython-38-x86_64-linux-gnu.so
--------------------------------------------------------------------------------