├── .idea ├── .gitignore ├── EasyAIVtuber.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── README.md ├── __pycache__ ├── action_animeV2.cpython-310.pyc ├── alive.cpython-310.pyc ├── args.cpython-310.pyc ├── models.cpython-310.pyc └── utils.cpython-310.pyc ├── action_animeV2.py ├── alive.py ├── args.py ├── assets └── 2347acc3-799f-4913-8035-ae077ba3dc22.gif ├── data ├── images │ ├── lambda_00.png │ └── lambda_01.png ├── models │ └── placeholder.txt ├── music │ └── Noisy Radio - Chipi Chipi Chapa Chapa.flac ├── song │ ├── 不分手的恋爱-汪苏泷.MP3 │ └── 不分手的恋爱-汪苏泷_voice.MP3 └── speech │ ├── error-svc.wav │ ├── uri_speech_0.wav │ └── uri_speech_1.wav ├── main.bat ├── main.py ├── models.py ├── requirements.txt ├── test.py ├── tha2 ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── util.cpython-310.pyc ├── compute │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── cached_computation_func.cpython-310.pyc │ │ └── cached_computation_protocol.cpython-310.pyc │ ├── cached_computation_func.py │ └── cached_computation_protocol.py ├── mocap │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── ifacialmocap_constants.cpython-310.pyc │ │ └── ifacialmocap_pose_converter.cpython-310.pyc │ ├── ifacialmocap_constants.py │ └── ifacialmocap_pose_converter.py ├── nn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── util.cpython-310.pyc │ ├── backbone │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── poser_args.cpython-310.pyc │ │ │ └── poser_encoder_decoder_00.cpython-310.pyc │ │ ├── poser_args.py │ │ └── poser_encoder_decoder_00.py │ ├── backcomp │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-310.pyc │ │ ├── nn │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── conv.cpython-310.pyc │ │ │ │ ├── encoder_decoder_module.cpython-310.pyc │ │ │ │ ├── init_function.cpython-310.pyc │ │ │ │ ├── resnet_block.cpython-310.pyc │ │ │ │ └── u_net_module.cpython-310.pyc │ │ │ ├── conv.py │ │ │ ├── encoder_decoder_module.py │ │ │ ├── init_function.py │ │ │ ├── resnet_block.py │ │ │ └── u_net_module.py │ │ └── tha │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── combiner.cpython-310.pyc │ │ │ └── two_algo_face_rotator.cpython-310.pyc │ │ │ ├── combiner.py │ │ │ ├── face_morpher.py │ │ │ └── two_algo_face_rotator.py │ ├── base │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── conv.cpython-310.pyc │ │ │ ├── init_function.cpython-310.pyc │ │ │ ├── module_factory.cpython-310.pyc │ │ │ ├── nonlinearity_factory.cpython-310.pyc │ │ │ ├── normalization.cpython-310.pyc │ │ │ ├── pass_through.cpython-310.pyc │ │ │ ├── resnet_block.cpython-310.pyc │ │ │ ├── spectral_norm.cpython-310.pyc │ │ │ └── util.cpython-310.pyc │ │ ├── conv.py │ │ ├── init_function.py │ │ ├── module_factory.py │ │ ├── nonlinearity_factory.py │ │ ├── normalization.py │ │ ├── pass_through.py │ │ ├── resnet_block.py │ │ ├── spectral_norm.py │ │ ├── util.py │ │ └── view_change.py │ ├── batch_module │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── batch_input_module.cpython-310.pyc │ │ ├── batch_input_model_factory.py │ │ └── batch_input_module.py │ ├── eyebrow │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── eyebrow_decomposer_00.cpython-310.pyc │ │ │ └── eyebrow_morphing_combiner_00.cpython-310.pyc │ │ ├── eyebrow_decomposer_00.py │ │ └── eyebrow_morphing_combiner_00.py │ ├── face │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── face_morpher_08.cpython-310.pyc │ │ └── face_morpher_08.py │ └── util.py ├── poser │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── general_poser_02.cpython-310.pyc │ │ └── poser.cpython-310.pyc │ ├── general_poser_02.py │ ├── modes │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── mode_20.cpython-310.pyc │ │ │ └── mode_20_wx.cpython-310.pyc │ │ ├── mode_20.py │ │ └── mode_20_wx.py │ └── poser.py └── util.py ├── tha3 ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── util.cpython-310.pyc ├── app │ ├── __init__.py │ ├── ifacialmocap_puppeteer.py │ └── manual_poser.py ├── compute │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── cached_computation_func.cpython-310.pyc │ │ └── cached_computation_protocol.cpython-310.pyc │ ├── cached_computation_func.py │ └── cached_computation_protocol.py ├── mocap │ ├── __init__.py │ ├── ifacialmocap_constants.py │ ├── ifacialmocap_pose.py │ ├── ifacialmocap_pose_converter.py │ ├── ifacialmocap_poser_converter_25.py │ └── ifacialmocap_v2.py ├── module │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── module_factory.cpython-310.pyc │ └── module_factory.py ├── nn │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── conv.cpython-310.pyc │ │ ├── image_processing_util.cpython-310.pyc │ │ ├── init_function.cpython-310.pyc │ │ ├── nonlinearity_factory.cpython-310.pyc │ │ ├── normalization.cpython-310.pyc │ │ ├── pass_through.cpython-310.pyc │ │ ├── resnet_block.cpython-310.pyc │ │ ├── resnet_block_seperable.cpython-310.pyc │ │ ├── separable_conv.cpython-310.pyc │ │ ├── spectral_norm.cpython-310.pyc │ │ └── util.cpython-310.pyc │ ├── common │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── conv_block_factory.cpython-310.pyc │ │ │ ├── poser_args.cpython-310.pyc │ │ │ ├── poser_encoder_decoder_00.cpython-310.pyc │ │ │ ├── poser_encoder_decoder_00_separable.cpython-310.pyc │ │ │ ├── resize_conv_encoder_decoder.cpython-310.pyc │ │ │ └── resize_conv_unet.cpython-310.pyc │ │ ├── conv_block_factory.py │ │ ├── poser_args.py │ │ ├── poser_encoder_decoder_00.py │ │ ├── poser_encoder_decoder_00_separable.py │ │ ├── resize_conv_encoder_decoder.py │ │ └── resize_conv_unet.py │ ├── conv.py │ ├── editor │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── editor_07.cpython-310.pyc │ │ └── editor_07.py │ ├── eyebrow_decomposer │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── eyebrow_decomposer_00.cpython-310.pyc │ │ │ └── eyebrow_decomposer_03.cpython-310.pyc │ │ ├── eyebrow_decomposer_00.py │ │ └── eyebrow_decomposer_03.py │ ├── eyebrow_morphing_combiner │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── eyebrow_morphing_combiner_00.cpython-310.pyc │ │ │ └── eyebrow_morphing_combiner_03.cpython-310.pyc │ │ ├── eyebrow_morphing_combiner_00.py │ │ └── eyebrow_morphing_combiner_03.py │ ├── face_morpher │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── face_morpher_08.cpython-310.pyc │ │ │ └── face_morpher_09.cpython-310.pyc │ │ ├── face_morpher_08.py │ │ └── face_morpher_09.py │ ├── image_processing_util.py │ ├── init_function.py │ ├── nonlinearity_factory.py │ ├── normalization.py │ ├── pass_through.py │ ├── resnet_block.py │ ├── resnet_block_seperable.py │ ├── separable_conv.py │ ├── spectral_norm.py │ ├── two_algo_body_rotator │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── two_algo_face_body_rotator_05.cpython-310.pyc │ │ └── two_algo_face_body_rotator_05.py │ └── util.py ├── poser │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── general_poser_02.cpython-310.pyc │ │ └── poser.cpython-310.pyc │ ├── general_poser_02.py │ ├── modes │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── pose_parameters.cpython-310.pyc │ │ │ ├── separable_float.cpython-310.pyc │ │ │ ├── separable_half.cpython-310.pyc │ │ │ ├── standard_float.cpython-310.pyc │ │ │ └── standard_half.cpython-310.pyc │ │ ├── load_poser.py │ │ ├── pose_parameters.py │ │ ├── separable_float.py │ │ ├── separable_half.py │ │ ├── standard_float.py │ │ └── standard_half.py │ └── poser.py └── util.py ├── utils.py ├── webui.bat └── webui.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/EasyAIVtuber.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EasyAIVtuber 2 | 3 | > 驱动你的纸片人老婆/女儿。 4 | Simply animate your 2D waifu. 5 | 6 | ![Wellerman-Uri](assets/2347acc3-799f-4913-8035-ae077ba3dc22.gif) 7 | 8 | [![](https://img.shields.io/badge/-完整效果展示Ⅰ-EEE?logo=bilibili)](https://www.bilibili.com/video/BV15H4y1o73x/?share_source=copy_web&vd_source=4641a345db4563ba087d0ed0ba8bdf85) 9 | [![](https://img.shields.io/badge/-完整效果展示Ⅱ-EEE?logo=bilibili)](https://www.bilibili.com/video/BV1Hp4y1c7TU/?share_source=copy_web&vd_source=4641a345db4563ba087d0ed0ba8bdf85) 10 | 11 | Fork自 [`yuyuyzl/EasyVtuber`](https://github.com/yuyuyzl/EasyVtuber)。由于是AI Vtuber,因此删减了原项目的面捕功能。 12 | 本项目配合stable diffusion等文生图模型为最佳食用方式。喜欢请点个星星哦~ 13 | 14 | **视频教程:制作中...0.0** 15 | 16 | ## Features not available in the original repo 17 | 18 | 1. 空闲自动做动作(眨眼、东张西望) 19 | 2. 说话动作(自动对口型) 20 | 3. 摇子(自动随节奏点头) 21 | 4. 唱歌动作(自动对口型,跟随节奏摇摆) 22 | 5. 睡大觉(使用`--sleep`参数控制入睡间隔) 23 | 6. API调用接口 24 | 7. webui方式调用 25 | 26 | ## Installation 27 | ### 安装依赖库 28 | 创建并激活虚拟环境 29 | ``` 30 | conda create -n eaiv python=3.10 31 | conda activate eaiv 32 | ``` 33 | 安装torch(最好是30系显卡及以上) 34 | ``` 35 | # CUDA 11.8 36 | pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118 37 | ``` 38 | 然后在项目目录下执行以下命令 39 | ``` 40 | pip install -r requirements.txt 41 | ``` 42 | 43 | ### 下载预训练模型 44 | 45 | 原模型文件地址:https://www.dropbox.com/s/y7b8jl4n2euv8xe/talking-head-anime-3-models.zip?dl=0 46 | 下载后解压到`data/models`文件夹中,与`placeholder.txt`同级 47 | 48 | 如果不想下载所有权重(四个版本),也可以在huggingface上下载:https://huggingface.co/ksuriuri/talking-head-anime-3-models 49 | 50 | 正确的目录层级为 51 | ``` 52 | + models 53 | - separable_float 54 | - separable_half 55 | - standard_float 56 | - standard_half 57 | - placeholder.txt 58 | ``` 59 | ### 安装OBS 60 | 可在网上自行搜索教程安装 61 | 62 | ### 安装UnityCapture 63 | 64 | > 注:如果电脑上安装过VTube Studio,也许OBS的视频采集设备的设备中就会有 VTubeStudioCam(没做过实验不太确定)。 65 | > 若有此设备,便无需执行下面步骤安装UnityCapture,直接使用 VTubeStudioCam 即可 66 | 67 | 为了能够在OBS上看到纸片老婆并且使用透明背景,需要安装UnityCapture 68 | 参考 https://github.com/schellingb/UnityCapture#installation 69 | 只需要正常走完Install.bat,在OBS的视频采集设备中便能看到对应的设备(Unity Video Capture)。 70 | 71 | #### 如何使背景透明 72 | 在OBS添加完视频采集设备以后,右键视频采集设备-设置-取消激活-分辨率类型选自定义-分辨率512x512(与`--output_size`参数一致)-视频格式选ARGB-激活 73 | 74 | ## Usage 75 | ### 快速测试 76 | 1. 打开OBS,添加视频采集设备并按要求([安装UnityCapture](#安装unitycapture))进行配置 77 | 2. 将`main.bat`中第一行的虚拟环境的路径修改为你自己的虚拟环境路径 78 | 3. 运行`main.bat`,等待初始化完毕,如配置无误,这时OBS中便能够看到人物在动 79 | 4. 二选一 80 | 1. 简单测试:运行`test.py` 81 | 2. 运行webui:将`webui.bat`中第一行的虚拟环境的路径修改为你自己的虚拟环境路径,然后运行`webui.bat` 82 | 83 | 具体使用可参考 [API Details](#api-details) 84 | 85 | ### 启动参数 86 | 87 | | 参数名 | 类型 | 说明 | 88 | |:-----------------:|:-----:|:-------------------------------------------------------------------------------------------:| 89 | |--character|str|`data/images`目录下的输入图像文件名,不需要带扩展名| 90 | |--output_size|str|格式为`512x512`,必须是4的倍数。
增大它并不会让图像更清晰,但配合extend_movement会增大可动范围| 91 | |--simplify|int|可用值为`1` `2` `3` `4`,值越大CPU运算量越小,但动作精度越低| 92 | |--output_webcam|str|可用值为`unitycapture`,选择对应的输出种类,不传不输出到摄像头| 93 | |--model| str |可用值为`standard_float` `standard_half` `separable_float` `separable_half`,
显存占用不同,选择合适的即可| 94 | |--port|int|本地API的端口号,默认为7888,若7888被占用则需要更改| 95 | |--sleep|int|入睡间隔,默认为20,空闲状态下20秒后会睡大觉,设置为-1即可不进入睡觉状态| 96 | |--extend_movement|float|暂时没有用)根据头部位置,对模型输出图像进一步进行移动和旋转使得上半身可动
传入的数值表示移动倍率(建议值为1)| 97 | 98 | ## API Details 99 | 100 | API使用Flask来开发,默认运行在 http://127.0.0.1:7888 (默认端口为7888),可在`main.bat`的`--port`中修改端口号。 101 | 使用post请求 http://127.0.0.1:7888/alive ,并传入参数即可做出对应动作,具体示例可参考`test.py`。 102 | 103 | ### 根据音频说话 104 | 105 | **`REQUEST`** 106 | ```json 107 | { 108 | "type": "speak", 109 | "speech_path": "your speech path" 110 | } 111 | ``` 112 | 113 | 在`"speech_path"`中填写你的语音音频路径,支持wav, mp3, flac等格式(pygame支持的格式) 114 | 115 | **`RESPONSE`** 116 | ```json 117 | { 118 | "status": "success" 119 | } 120 | ``` 121 | 122 | ### 根据音乐节奏摇 123 | 124 | **`REQUEST`** 125 | ```json 126 | { 127 | "type": "rhythm", 128 | "music_path": "your music path", 129 | "beat": 2 130 | } 131 | ``` 132 | 133 | 在`"music_path"`中填写你的音频路径,支持wav, mp3, flac等格式(pygame支持的格式)。 134 | `"beat"`(可选):取值为 `1` `2` `4`,控制节拍,默认为2 135 | 136 | **`RESPONSE`** 137 | ```json 138 | { 139 | "status": "success" 140 | } 141 | ``` 142 | 143 | ### 根据音乐和人声唱歌 144 | 145 | **`REQUEST`** 146 | ```json 147 | { 148 | "type": "sing", 149 | "music_path": "your music path", 150 | "voice_path": "your voice path", 151 | "mouth_offset": 0.0, 152 | "beat": 2 153 | } 154 | ``` 155 | 156 | 口型驱动的原理是根据音量大小来控制嘴巴的大小,因此需要事先将人声提取出来以更精准地控制口型。 157 | 假设你有一首歌,路径为`path/music.wav`,利用UVR5等工具分离出人声音频`path/voice.wav`,然后将`path/music.wav`填入`"music_path"`, 158 | 将`path/voice.wav`填入`"voice_path"`,支持wav, mp3, flac等格式(pygame支持的格式)。 159 | `"mouth_offset"`(可选):取值区间为 `[0, 1]`,默认为`0`,如果角色唱歌时的嘴张的不够大,可以试试将这个值设大 160 | `"beat"`(可选):取值为`1` `2` `4`,默认为`2`,控制节拍 161 | 162 | **`RESPONSE`** 163 | ```json 164 | { 165 | "status": "success" 166 | } 167 | ``` 168 | 169 | ### 停止当前动作 170 | 171 | **`REQUEST`** 172 | ```json 173 | { 174 | "type": "stop" 175 | } 176 | ``` 177 | 178 | **`RESPONSE`** 179 | ```json 180 | { 181 | "status": "success" 182 | } 183 | ``` 184 | 185 | ### 更换当前图片 186 | 187 | **`REQUEST`** 188 | ```json 189 | { 190 | "type": "change_img", 191 | "img": "your image path" 192 | } 193 | ``` 194 | 195 | 在`"img"`中填写图片路径,图片大小最好是`512x512`,png格式 196 | 197 | **`RESPONSE`** 198 | ```json 199 | { 200 | "status": "success" 201 | } 202 | ``` 203 | 204 | ## Star History 205 | 206 | [![Star History Chart](https://api.star-history.com/svg?repos=Ksuriuri/EasyAIVtuber&type=Date)](https://star-history.com/#Ksuriuri/EasyAIVtuber&Date) 207 | -------------------------------------------------------------------------------- /__pycache__/action_animeV2.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/__pycache__/action_animeV2.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/alive.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/__pycache__/alive.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/args.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/__pycache__/args.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /alive.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiprocessing import Value, Process, Queue 3 | import librosa 4 | import time 5 | import pygame 6 | 7 | 8 | def error_speech(): 9 | pygame.mixer.init() 10 | pygame.mixer.music.load('data/speech/error-svc.wav') 11 | pygame.mixer.music.play() 12 | while pygame.mixer.music.get_busy(): # 在音频播放为完成之前不退出程序 13 | time.sleep(0.1) # 减轻循环负担 14 | pygame.quit() 15 | 16 | 17 | def generate_voice_data(speech_path, mouth_offset=0.0): 18 | # 提取节奏强度 19 | time_ratio = 0.06 20 | y, sr = librosa.load(speech_path) 21 | frame_intervals = int(sr * time_ratio) 22 | voice_strengths = np.array([np.max(y[i:i + frame_intervals]) for i in range(0, len(y), frame_intervals)]) 23 | voice_strengths[voice_strengths >= 0.1] += mouth_offset 24 | voice_strengths = np.clip(voice_strengths, 0., 1.).tolist() 25 | voice_strengths = [round(vst, 2) for i, vst in enumerate(voice_strengths)] 26 | voice_times = [0] 27 | last = time_ratio 28 | for i in range(len(voice_strengths)): 29 | voice_times.append(round(last, 1)) 30 | last += time_ratio 31 | return voice_times, voice_strengths 32 | 33 | 34 | def generate_beat_data(music_path, beat=2): 35 | # 提取音频节奏 36 | # beat取值1,2,4,控制点头节奏速度 37 | if beat not in [1, 2, 4]: 38 | beat = 2 39 | y, sr = librosa.load(music_path) 40 | tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) 41 | beat_times = librosa.frames_to_time(beat_frames, sr=sr) 42 | beat_times = np.concatenate([[0], beat_times]).tolist() 43 | beat_times = [round(bt, 2) for bt in beat_times[::beat]] 44 | onset_env = librosa.onset.onset_strength(y=y, sr=sr) 45 | frame_intervals = int(len(y) / len(onset_env)) 46 | beat_strengths = np.array([np.max(y[i:i + frame_intervals]) for i in range(0, len(y), frame_intervals)]) 47 | beat_strengths = np.clip(beat_strengths[beat_frames[::beat]], 0., 1.).tolist() 48 | return beat_times, beat_strengths 49 | 50 | 51 | class Alive(Process): 52 | def __init__(self, alive_args): # 53 | super().__init__() 54 | self.is_speech = alive_args['is_speech'] 55 | self.speech_q = alive_args['speech_q'] 56 | 57 | self.is_singing = alive_args['is_singing'] 58 | self.is_music_play = alive_args['is_music_play'] 59 | self.beat_q = alive_args['beat_q'] 60 | self.mouth_q = alive_args['mouth_q'] 61 | 62 | def speak(self, speech_path): 63 | try: 64 | voice_times, voice_strengths = generate_voice_data(speech_path) 65 | 66 | self.speech_q.put_nowait({'voice_strengths': voice_strengths, 67 | 'voice_times': np.array(voice_times) + time.perf_counter() - 0.15}) 68 | self.is_speech.value = True 69 | 70 | # 播放 71 | pygame.mixer.init() 72 | pygame.mixer.music.load(speech_path) 73 | pygame.mixer.music.play() 74 | while pygame.mixer.music.get_busy() and self.is_speech.value: # 在音频播放为完成之前不退出程序 75 | time.sleep(0.1) # 减轻循环负担 76 | pygame.quit() 77 | self.is_speech.value = False 78 | except Exception as ex: 79 | print(ex) 80 | error_speech() 81 | 82 | def sing(self, music_path, voice_path, mouth_offset, beat): 83 | try: 84 | beat_times, beat_strengths = generate_beat_data(music_path, beat) 85 | voice_times, voice_strengths = generate_voice_data(voice_path, mouth_offset) 86 | 87 | self.beat_q.put_nowait({'beat_times': np.array(beat_times) + time.perf_counter() - 0.15, 88 | 'beat_strengths': beat_strengths}) 89 | self.mouth_q.put_nowait({'voice_times': np.array(voice_times) + time.perf_counter() - 0.15, 90 | 'voice_strengths': voice_strengths}) 91 | self.is_singing.value = True 92 | 93 | # 播放 94 | pygame.mixer.init() 95 | pygame.mixer.music.load(music_path) 96 | pygame.mixer.music.play() 97 | while pygame.mixer.music.get_busy() and self.is_singing.value: # 在音频播放为完成之前不退出程序 98 | time.sleep(0.1) # 减轻循环负担 99 | pygame.quit() 100 | self.is_singing.value = False 101 | except Exception as ex: 102 | print(ex) 103 | error_speech() 104 | 105 | def rhythm(self, music_path, beat): 106 | try: 107 | # # 淡入淡出 108 | # sr, music_data = wavfile.read(music_path) 109 | # factors = np.arange(sr) / sr 110 | # factors = np.concatenate([factors, np.ones(len(music_data) - 2 * sr), factors[::-1]]) 111 | # music_data = music_data * factors 112 | # music_data = np.clip(music_data, -32767, 32767) 113 | # wavfile.write(music_path, sr, music_data.astype(np.int16)) 114 | 115 | # 提取节奏点,节奏强度 116 | beat_times, beat_strengths = generate_beat_data(music_path, beat) 117 | 118 | self.beat_q.put_nowait({'beat_times': np.array(beat_times) + time.perf_counter() - 0.15, 119 | 'beat_strengths': beat_strengths}) 120 | self.is_music_play.value = True 121 | # 播放 122 | pygame.mixer.init() 123 | pygame.mixer.music.load(music_path) 124 | pygame.mixer.music.play() 125 | while pygame.mixer.music.get_busy() and self.is_music_play.value: # 在音频播放为完成之前不退出程序 126 | time.sleep(0.1) # 减轻循环负担 127 | pygame.quit() 128 | self.is_music_play.value = False 129 | except Exception as ex: 130 | print(ex) 131 | error_speech() 132 | 133 | 134 | # if __name__ == "__main__": 135 | # error_speech() 136 | -------------------------------------------------------------------------------- /args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | 4 | 5 | def convert_to_byte(size): 6 | result = re.search('(\d+\.?\d*)(b|kb|mb|gb|tb)', size.lower()) 7 | if result and result.groups(): 8 | unit = result.groups()[1] 9 | amount = float(result.groups()[0]) 10 | index = ['b', 'kb', 'mb', 'gb', 'tb'].index(unit) 11 | return amount * pow(1024, index) 12 | raise ValueError("Invalid size provided, value is " + size) 13 | 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--debug', action='store_true') 17 | parser.add_argument('--eyebrow', action='store_true') 18 | parser.add_argument('--extend_movement', type=float) 19 | parser.add_argument('--input', type=str, default='cam') 20 | parser.add_argument('--character', type=str, default='y') 21 | parser.add_argument('--output_dir', type=str) 22 | parser.add_argument('--output_webcam', type=str) 23 | parser.add_argument('--output_size', type=str, default='512x512') 24 | parser.add_argument('--model', type=str, default='standard_float') 25 | parser.add_argument('--debug_input', action='store_true') 26 | parser.add_argument('--perf', type=str) 27 | parser.add_argument('--anime4k', action='store_true') 28 | parser.add_argument('--alpha_split', action='store_true') 29 | parser.add_argument('--bongo', action='store_true') 30 | parser.add_argument('--cache', type=str, default='256mb') # 256mb 31 | parser.add_argument('--gpu_cache', type=str, default='256mb') # 256mb 32 | parser.add_argument('--simplify', type=int, default=1) 33 | parser.add_argument('--port', type=int, default=7888) 34 | parser.add_argument('--sleep', type=int, default=20) 35 | args = parser.parse_args() 36 | args.output_w = int(args.output_size.split('x')[0]) 37 | args.output_h = int(args.output_size.split('x')[1]) 38 | if args.cache is not None: 39 | args.max_cache_len = int(convert_to_byte(args.cache) / 262144 / 4) 40 | else: 41 | args.max_cache_len = 0 42 | if args.gpu_cache is not None: 43 | args.max_gpu_cache_len = int(convert_to_byte(args.gpu_cache) / 589824 / 4) 44 | else: 45 | args.max_gpu_cache_len = 0 46 | if args.output_webcam is None and args.output_dir is None: args.debug = True 47 | -------------------------------------------------------------------------------- /assets/2347acc3-799f-4913-8035-ae077ba3dc22.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/assets/2347acc3-799f-4913-8035-ae077ba3dc22.gif -------------------------------------------------------------------------------- /data/images/lambda_00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/images/lambda_00.png -------------------------------------------------------------------------------- /data/images/lambda_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/images/lambda_01.png -------------------------------------------------------------------------------- /data/models/placeholder.txt: -------------------------------------------------------------------------------- 1 | This is the folder to extract the models to. -------------------------------------------------------------------------------- /data/music/Noisy Radio - Chipi Chipi Chapa Chapa.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/music/Noisy Radio - Chipi Chipi Chapa Chapa.flac -------------------------------------------------------------------------------- /data/song/不分手的恋爱-汪苏泷.MP3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/song/不分手的恋爱-汪苏泷.MP3 -------------------------------------------------------------------------------- /data/song/不分手的恋爱-汪苏泷_voice.MP3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/song/不分手的恋爱-汪苏泷_voice.MP3 -------------------------------------------------------------------------------- /data/speech/error-svc.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/speech/error-svc.wav -------------------------------------------------------------------------------- /data/speech/uri_speech_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/speech/uri_speech_0.wav -------------------------------------------------------------------------------- /data/speech/uri_speech_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/data/speech/uri_speech_1.wav -------------------------------------------------------------------------------- /main.bat: -------------------------------------------------------------------------------- 1 | D:\anaconda3\envs\eaiv\python.exe main.py ^ 2 | --character lambda_00 ^ 3 | --output_size 512x512 ^ 4 | --simplify 1 ^ 5 | --output_webcam unitycapture ^ 6 | --model standard_half ^ 7 | --anime4k ^ 8 | --sleep 20 ^ 9 | --port 7888 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | librosa==0.10.1 5 | pygame==2.5.2 6 | flask_restful==0.3.10 7 | protobuf==3.20.1 8 | pynput==1.7.6 9 | mediapipe==0.8.11 10 | opencv_python==4.5.5.64 11 | Pillow==9.1.0 12 | pyanime4k==2.5.2 13 | pyvirtualcam==0.9.1 14 | gradio==4.18.0 15 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """ 2 | 先运行main.bat 3 | """ 4 | import requests 5 | 6 | # # 根据音频说话 7 | data = { 8 | "type": "speak", 9 | "speech_path": r"data/speech/uri_speech_0.wav" # 修改为你的语音音频路径 10 | } 11 | 12 | # # 根据音频节奏摇 13 | # data = { 14 | # "type": "rhythm", 15 | # "music_path": r"data/music/Noisy Radio - Chipi Chipi Chapa Chapa.flac", # 修改为你的音频路径 16 | # "beat": 1 17 | # } 18 | 19 | # # 根据音频和人声唱歌 20 | # data = { 21 | # "type": "sing", 22 | # "music_path": r"data/song/不分手的恋爱-汪苏泷.MP3", # 修改为原曲路径 23 | # "voice_path": r"data/song/不分手的恋爱-汪苏泷_voice.MP3", # 修改为人声音频路径 24 | # "mouth_offset": 0.0, 25 | # "beat": 2 26 | # } 27 | 28 | # # 停止所有动作 29 | # data = { 30 | # "type": "stop", 31 | # } 32 | 33 | # # 换皮肤 34 | # data = { 35 | # "type": "change_img", 36 | # "img": r'data/images/lambda_01.png' 37 | # } 38 | 39 | res = requests.post('http://127.0.0.1:7888/alive', json=data) 40 | print(res.json()) 41 | -------------------------------------------------------------------------------- /tha2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/__init__.py -------------------------------------------------------------------------------- /tha2/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/compute/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/compute/__init__.py -------------------------------------------------------------------------------- /tha2/compute/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/compute/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/compute/__pycache__/cached_computation_func.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/compute/__pycache__/cached_computation_func.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/compute/__pycache__/cached_computation_protocol.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/compute/__pycache__/cached_computation_protocol.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/compute/cached_computation_func.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Dict, List 2 | 3 | from torch import Tensor 4 | 5 | from tha2.nn.batch_module.batch_input_module import BatchInputModule 6 | 7 | TensorCachedComputationFunc = Callable[ 8 | [Dict[str, BatchInputModule], List[Tensor], Dict[str, List[Tensor]]], Tensor] 9 | TensorListCachedComputationFunc = Callable[ 10 | [Dict[str, BatchInputModule], List[Tensor], Dict[str, List[Tensor]]], List[Tensor]] 11 | -------------------------------------------------------------------------------- /tha2/compute/cached_computation_protocol.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, List 3 | 4 | from torch import Tensor 5 | 6 | from tha2.nn.batch_module.batch_input_module import BatchInputModule 7 | from tha2.compute.cached_computation_func import TensorCachedComputationFunc, TensorListCachedComputationFunc 8 | 9 | 10 | class CachedComputationProtocol(ABC): 11 | def get_output(self, 12 | key: str, 13 | modules: Dict[str, BatchInputModule], 14 | batch: List[Tensor], 15 | outputs: Dict[str, List[Tensor]]): 16 | if key in outputs: 17 | return outputs[key] 18 | else: 19 | output = self.compute_output(key, modules, batch, outputs) 20 | outputs[key] = output 21 | return outputs[key] 22 | 23 | @abstractmethod 24 | def compute_output(self, 25 | key: str, 26 | modules: Dict[str, BatchInputModule], 27 | batch: List[Tensor], 28 | outputs: Dict[str, List[Tensor]]) -> List[Tensor]: 29 | pass 30 | 31 | def get_output_tensor_func(self, key: str, index: int) -> TensorCachedComputationFunc: 32 | def func(modules: Dict[str, BatchInputModule], 33 | batch: List[Tensor], 34 | outputs: Dict[str, List[Tensor]]): 35 | return self.get_output(key, modules, batch, outputs)[index] 36 | return func 37 | 38 | def get_output_tensor_list_func(self, key: str) -> TensorListCachedComputationFunc: 39 | def func(modules: Dict[str, BatchInputModule], 40 | batch: List[Tensor], 41 | outputs: Dict[str, List[Tensor]]): 42 | return self.get_output(key, modules, batch, outputs) 43 | return func -------------------------------------------------------------------------------- /tha2/mocap/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/mocap/__init__.py -------------------------------------------------------------------------------- /tha2/mocap/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/mocap/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/mocap/__pycache__/ifacialmocap_constants.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/mocap/__pycache__/ifacialmocap_constants.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/mocap/__pycache__/ifacialmocap_pose_converter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/mocap/__pycache__/ifacialmocap_pose_converter.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/mocap/ifacialmocap_pose_converter.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, List 3 | 4 | 5 | class IFacialMocapPoseConverter(ABC): 6 | @abstractmethod 7 | def convert(self, ifacialmocap_pose: Dict[str, float]) -> List[float]: 8 | pass -------------------------------------------------------------------------------- /tha2/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/__init__.py -------------------------------------------------------------------------------- /tha2/nn/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backbone/__init__.py -------------------------------------------------------------------------------- /tha2/nn/backbone/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backbone/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backbone/__pycache__/poser_args.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backbone/__pycache__/poser_args.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backbone/__pycache__/poser_encoder_decoder_00.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backbone/__pycache__/poser_encoder_decoder_00.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backbone/poser_args.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from torch.nn import Sigmoid, Sequential, Tanh 4 | 5 | from tha2.nn.base.conv import create_conv3, create_conv3_from_block_args 6 | from tha2.nn.base.nonlinearity_factory import ReLUFactory 7 | from tha2.nn.base.normalization import InstanceNorm2dFactory 8 | from tha2.nn.base.util import BlockArgs 9 | 10 | 11 | class PoserArgs00: 12 | def __init__(self, 13 | image_size: int, 14 | input_image_channels: int, 15 | output_image_channels: int, 16 | start_channels: int, 17 | num_pose_params: int, 18 | block_args: Optional[BlockArgs] = None): 19 | self.num_pose_params = num_pose_params 20 | self.start_channels = start_channels 21 | self.output_image_channels = output_image_channels 22 | self.input_image_channels = input_image_channels 23 | self.image_size = image_size 24 | if block_args is None: 25 | self.block_args = BlockArgs( 26 | normalization_layer_factory=InstanceNorm2dFactory(), 27 | nonlinearity_factory=ReLUFactory(inplace=True)) 28 | else: 29 | self.block_args = block_args 30 | 31 | def create_alpha_block(self): 32 | from torch.nn import Sequential 33 | return Sequential( 34 | create_conv3( 35 | in_channels=self.start_channels, 36 | out_channels=1, 37 | bias=True, 38 | initialization_method=self.block_args.initialization_method, 39 | use_spectral_norm=False), 40 | Sigmoid()) 41 | 42 | def create_all_channel_alpha_block(self): 43 | from torch.nn import Sequential 44 | return Sequential( 45 | create_conv3( 46 | in_channels=self.start_channels, 47 | out_channels=self.output_image_channels, 48 | bias=True, 49 | initialization_method=self.block_args.initialization_method, 50 | use_spectral_norm=False), 51 | Sigmoid()) 52 | 53 | def create_color_change_block(self): 54 | return Sequential( 55 | create_conv3_from_block_args( 56 | in_channels=self.start_channels, 57 | out_channels=self.output_image_channels, 58 | bias=True, 59 | block_args=self.block_args), 60 | Tanh()) 61 | 62 | def create_grid_change_block(self): 63 | return create_conv3( 64 | in_channels=self.start_channels, 65 | out_channels=2, 66 | bias=False, 67 | initialization_method='zero', 68 | use_spectral_norm=False) -------------------------------------------------------------------------------- /tha2/nn/backbone/poser_encoder_decoder_00.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Optional, List 3 | 4 | import torch 5 | from torch import Tensor 6 | from torch.nn import ModuleList, Module 7 | 8 | from tha2.nn.backbone.poser_args import PoserArgs00 9 | from tha2.nn.base.conv import create_conv3_block_from_block_args, create_downsample_block_from_block_args, \ 10 | create_upsample_block_from_block_args 11 | from tha2.nn.base.nonlinearity_factory import ReLUFactory 12 | from tha2.nn.base.normalization import InstanceNorm2dFactory 13 | from tha2.nn.base.resnet_block import ResnetBlock 14 | from tha2.nn.base.util import BlockArgs 15 | 16 | 17 | class PoserEncoderDecoder00Args(PoserArgs00): 18 | def __init__(self, 19 | image_size: int, 20 | input_image_channels: int, 21 | output_image_channels: int, 22 | num_pose_params: int , 23 | start_channels: int, 24 | bottleneck_image_size, 25 | num_bottleneck_blocks, 26 | max_channels: int, 27 | block_args: Optional[BlockArgs] = None): 28 | super().__init__( 29 | image_size, input_image_channels, output_image_channels, start_channels, num_pose_params, block_args) 30 | self.max_channels = max_channels 31 | self.num_bottleneck_blocks = num_bottleneck_blocks 32 | self.bottleneck_image_size = bottleneck_image_size 33 | assert bottleneck_image_size > 1 34 | 35 | if block_args is None: 36 | self.block_args = BlockArgs( 37 | normalization_layer_factory=InstanceNorm2dFactory(), 38 | nonlinearity_factory=ReLUFactory(inplace=True)) 39 | else: 40 | self.block_args = block_args 41 | 42 | 43 | class PoserEncoderDecoder00(Module): 44 | def __init__(self, args: PoserEncoderDecoder00Args): 45 | super().__init__() 46 | self.args = args 47 | 48 | self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1 49 | 50 | self.downsample_blocks = ModuleList() 51 | self.downsample_blocks.append( 52 | create_conv3_block_from_block_args( 53 | args.input_image_channels, 54 | args.start_channels, 55 | args.block_args)) 56 | 57 | current_image_size = args.image_size 58 | current_num_channels = args.start_channels 59 | while current_image_size > args.bottleneck_image_size: 60 | next_image_size = current_image_size // 2 61 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 62 | self.downsample_blocks.append(create_downsample_block_from_block_args( 63 | in_channels=current_num_channels, 64 | out_channels=next_num_channels, 65 | is_output_1x1=False, 66 | block_args=args.block_args)) 67 | current_image_size = next_image_size 68 | current_num_channels = next_num_channels 69 | assert len(self.downsample_blocks) == self.num_levels 70 | 71 | self.bottleneck_blocks = ModuleList() 72 | self.bottleneck_blocks.append(create_conv3_block_from_block_args( 73 | in_channels=current_num_channels + args.num_pose_params, 74 | out_channels=current_num_channels, 75 | block_args=args.block_args)) 76 | for i in range(1, args.num_bottleneck_blocks): 77 | self.bottleneck_blocks.append( 78 | ResnetBlock.create( 79 | num_channels=current_num_channels, 80 | is1x1=False, 81 | block_args=args.block_args)) 82 | 83 | self.upsample_blocks = ModuleList() 84 | while current_image_size < args.image_size: 85 | next_image_size = current_image_size * 2 86 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 87 | self.upsample_blocks.append(create_upsample_block_from_block_args( 88 | in_channels=current_num_channels, 89 | out_channels=next_num_channels, 90 | block_args=args.block_args)) 91 | current_image_size = next_image_size 92 | current_num_channels = next_num_channels 93 | 94 | def get_num_output_channels_from_level(self, level: int): 95 | return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level)) 96 | 97 | def get_num_output_channels_from_image_size(self, image_size: int): 98 | return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels) 99 | 100 | def forward(self, image: Tensor, pose: Optional[Tensor] = None) -> List[Tensor]: 101 | if self.args.num_pose_params != 0: 102 | assert pose is not None 103 | else: 104 | assert pose is None 105 | outputs = [] 106 | feature = image 107 | outputs.append(feature) 108 | for block in self.downsample_blocks: 109 | feature = block(feature) 110 | outputs.append(feature) 111 | if pose is not None: 112 | n, c = pose.shape 113 | pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.bottleneck_image_size, self.args.bottleneck_image_size) 114 | feature = torch.cat([feature, pose], dim=1) 115 | for block in self.bottleneck_blocks: 116 | feature = block(feature) 117 | outputs.append(feature) 118 | for block in self.upsample_blocks: 119 | feature = block(feature) 120 | outputs.append(feature) 121 | outputs.reverse() 122 | return outputs 123 | -------------------------------------------------------------------------------- /tha2/nn/backcomp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/__init__.py -------------------------------------------------------------------------------- /tha2/nn/backcomp/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__init__.py -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__pycache__/conv.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__pycache__/conv.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__pycache__/encoder_decoder_module.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__pycache__/encoder_decoder_module.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__pycache__/init_function.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__pycache__/init_function.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__pycache__/resnet_block.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__pycache__/resnet_block.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/__pycache__/u_net_module.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/nn/__pycache__/u_net_module.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/conv.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Conv2d, Module, Sequential, InstanceNorm2d, ReLU, ConvTranspose2d 2 | 3 | from tha2.nn.backcomp.nn.init_function import create_init_function 4 | 5 | 6 | def Conv7(in_channels: int, out_channels: int, initialization_method='he') -> Module: 7 | init = create_init_function(initialization_method) 8 | return init(Conv2d(in_channels, out_channels, kernel_size=7, stride=1, padding=3, bias=False)) 9 | 10 | 11 | def Conv3(in_channels: int, out_channels: int, initialization_method='he') -> Module: 12 | init = create_init_function(initialization_method) 13 | return init(Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)) 14 | 15 | 16 | def Conv7Block(in_channels: int, out_channels: int, initialization_method='he') -> Module: 17 | return Sequential( 18 | Conv7(in_channels, out_channels, initialization_method), 19 | InstanceNorm2d(out_channels, affine=True), 20 | ReLU(inplace=True)) 21 | 22 | 23 | def DownsampleBlock(in_channels: int, initialization_method='he') -> Module: 24 | init = create_init_function(initialization_method) 25 | return Sequential( 26 | init(Conv2d(in_channels, in_channels * 2, kernel_size=4, stride=2, padding=1, bias=False)), 27 | InstanceNorm2d(in_channels * 2, affine=True), 28 | ReLU(inplace=True)) 29 | 30 | 31 | def UpsampleBlock(in_channels: int, out_channels: int, initialization_method='he') -> Module: 32 | init = create_init_function(initialization_method) 33 | return Sequential( 34 | init(ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=False)), 35 | InstanceNorm2d(out_channels, affine=True), 36 | ReLU(inplace=True)) 37 | -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/encoder_decoder_module.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module, ModuleList 2 | 3 | from tha2.nn.backcomp.nn.conv import Conv7Block, DownsampleBlock, UpsampleBlock 4 | from tha2.nn.backcomp.nn.resnet_block import ResNetBlock 5 | 6 | # image_size: int = 256, 7 | # image_channels: int = 4, 8 | # pose_size: int = 3, 9 | # intermediate_channels: int = 64, 10 | # bottleneck_image_size: int = 32, 11 | # bottleneck_block_count: int = 6, 12 | # initialization_method: str = 'he', 13 | # align_corners: bool = True): 14 | 15 | class EncoderDecoderModule(Module): 16 | def __init__(self, 17 | image_size: int, 18 | image_channels: int, 19 | output_channels: int, 20 | bottleneck_image_size: int, 21 | bottleneck_block_count: int, 22 | initialization_method: str = 'he'): 23 | super().__init__() 24 | 25 | self.module_list = ModuleList() 26 | self.module_list.append(Conv7Block(image_channels, output_channels)) 27 | current_size = image_size 28 | current_channels = output_channels 29 | while current_size > bottleneck_image_size: 30 | self.module_list.append(DownsampleBlock(current_channels, initialization_method)) 31 | current_size //= 2 32 | current_channels *= 2 33 | 34 | for i in range(bottleneck_block_count): 35 | self.module_list.append(ResNetBlock(current_channels, initialization_method)) 36 | 37 | while current_size < image_size: 38 | self.module_list.append(UpsampleBlock(current_channels, current_channels // 2, initialization_method)) 39 | current_size *= 2 40 | current_channels //= 2 41 | 42 | def forward(self, x): 43 | for module in self.module_list: 44 | x = module(x) 45 | return x 46 | -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/init_function.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module 2 | from torch.nn.init import kaiming_normal_, xavier_normal_ 3 | 4 | 5 | def create_init_function(method: str = 'none'): 6 | def init(module: Module): 7 | if method == 'none': 8 | return module 9 | elif method == 'he': 10 | kaiming_normal_(module.weight) 11 | return module 12 | elif method == 'xavier': 13 | xavier_normal_(module.weight) 14 | return module 15 | else: 16 | raise ("Invalid initialization method %s" % method) 17 | 18 | return init -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/resnet_block.py: -------------------------------------------------------------------------------- 1 | from torch import relu 2 | from torch.nn import Module, InstanceNorm2d 3 | 4 | from tha2.nn.backcomp.nn.conv import Conv3 5 | 6 | 7 | class ResNetBlock(Module): 8 | def __init__(self, num_channels: int, initialization_method: str = 'he'): 9 | super().__init__() 10 | self.conv1 = Conv3(num_channels, num_channels, initialization_method) 11 | self.norm1 = InstanceNorm2d(num_features=num_channels, affine=True) 12 | self.conv2 = Conv3(num_channels, num_channels, initialization_method) 13 | self.norm2 = InstanceNorm2d(num_features=num_channels, affine=True) 14 | 15 | def forward(self, x): 16 | return x + self.norm2(self.conv2(relu(self.norm1(self.conv1(x))))) 17 | -------------------------------------------------------------------------------- /tha2/nn/backcomp/nn/u_net_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Module, ModuleList 3 | 4 | from tha2.nn.backcomp.nn.conv import Conv7Block, DownsampleBlock, UpsampleBlock 5 | from tha2.nn.backcomp.nn.resnet_block import ResNetBlock 6 | 7 | 8 | class UNetModule(Module): 9 | def __init__(self, 10 | image_size: int, 11 | image_channels: int, 12 | output_channels: int, 13 | bottleneck_image_size: int, 14 | bottleneck_block_count: int, 15 | initialization_method: str = 'he'): 16 | super().__init__() 17 | self.downward_modules = ModuleList() 18 | self.downward_module_channel_count = {} 19 | 20 | self.downward_modules.append(Conv7Block(image_channels, output_channels, initialization_method)) 21 | self.downward_module_channel_count[image_size] = output_channels 22 | 23 | # Downsampling 24 | current_channels = output_channels 25 | current_image_size = image_size 26 | while current_image_size > bottleneck_image_size: 27 | self.downward_modules.append(DownsampleBlock(current_channels, initialization_method)) 28 | current_channels = current_channels * 2 29 | current_image_size = current_image_size // 2 30 | self.downward_module_channel_count[current_image_size] = current_channels 31 | 32 | # Bottleneck 33 | self.bottleneck_modules = ModuleList() 34 | for i in range(bottleneck_block_count): 35 | self.bottleneck_modules.append(ResNetBlock(current_channels, initialization_method)) 36 | 37 | # Upsampling 38 | self.upsampling_modules = ModuleList() 39 | while current_image_size < image_size: 40 | if current_image_size == bottleneck_image_size: 41 | input_channels = current_channels 42 | else: 43 | input_channels = current_channels + self.downward_module_channel_count[current_image_size] 44 | self.upsampling_modules.insert(0, 45 | UpsampleBlock(input_channels, current_channels // 2, initialization_method)) 46 | current_channels = current_channels // 2 47 | current_image_size = current_image_size * 2 48 | 49 | self.upsampling_modules.insert( 50 | 0, Conv7Block(current_channels + output_channels, output_channels, initialization_method)) 51 | 52 | def forward(self, x): 53 | downward_outputs = [] 54 | for module in self.downward_modules: 55 | x = module(x) 56 | downward_outputs.append(x) 57 | for module in self.bottleneck_modules: 58 | x = module(x) 59 | x = self.upsampling_modules[-1](x) 60 | for i in range(len(self.upsampling_modules) - 2, -1, -1): 61 | y = torch.cat([x, downward_outputs[i]], dim=1) 62 | x = self.upsampling_modules[i](y) 63 | return x 64 | -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/tha/__init__.py -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/tha/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/__pycache__/combiner.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/tha/__pycache__/combiner.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/__pycache__/two_algo_face_rotator.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/backcomp/tha/__pycache__/two_algo_face_rotator.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/combiner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import Sequential, Sigmoid, Tanh 4 | 5 | from tha2.nn.backcomp.nn.conv import Conv7 6 | from tha2.nn.backcomp.nn.u_net_module import UNetModule 7 | from tha2.nn.batch_module.batch_input_module import BatchInputModule, BatchInputModuleFactory 8 | 9 | 10 | class Combiner(BatchInputModule): 11 | def __init__(self, 12 | image_size: int = 256, 13 | image_channels: int = 4, 14 | pose_size: int = 3, 15 | intermediate_channels: int = 64, 16 | bottleneck_image_size: int = 32, 17 | bottleneck_block_count: int = 6, 18 | initialization_method: str = 'he'): 19 | super().__init__() 20 | self.main_body = UNetModule( 21 | image_size=image_size, 22 | image_channels=2 * image_channels + pose_size, 23 | output_channels=intermediate_channels, 24 | bottleneck_image_size=bottleneck_image_size, 25 | bottleneck_block_count=bottleneck_block_count, 26 | initialization_method=initialization_method) 27 | self.combine_alpha_mask = Sequential( 28 | Conv7(intermediate_channels, image_channels, initialization_method), 29 | Sigmoid()) 30 | self.retouch_alpha_mask = Sequential( 31 | Conv7(intermediate_channels, image_channels, initialization_method), 32 | Sigmoid()) 33 | self.retouch_color_change = Sequential( 34 | Conv7(intermediate_channels, image_channels, initialization_method), 35 | Tanh()) 36 | 37 | def forward(self, first_image: Tensor, second_image: Tensor, pose: Tensor): 38 | pose = pose.unsqueeze(2).unsqueeze(3) 39 | pose = pose.expand(pose.size(0), pose.size(1), first_image.size(2), first_image.size(3)) 40 | 41 | x = torch.cat([first_image, second_image, pose], dim=1) 42 | y = self.main_body(x) 43 | combine_alpha_mask = self.combine_alpha_mask(y) 44 | combined_image = combine_alpha_mask * first_image + (1 - combine_alpha_mask) * second_image 45 | retouch_alpha_mask = self.retouch_alpha_mask(y) 46 | retouch_color_change = self.retouch_color_change(y) 47 | final_image = retouch_alpha_mask * combined_image + (1 - retouch_alpha_mask) * retouch_color_change 48 | # return [final_image, combined_image, combine_alpha_mask, retouch_alpha_mask, retouch_color_change] 49 | return final_image 50 | 51 | def forward_from_batch(self, batch): 52 | return self.forward(batch[0], batch[1], batch[2]) 53 | 54 | 55 | class CombinerFactory(BatchInputModuleFactory): 56 | def __init__(self, 57 | image_size: int = 256, 58 | image_channels: int = 4, 59 | pose_size: int = 3, 60 | intermediate_channels: int = 64, 61 | bottleneck_image_size: int = 32, 62 | bottleneck_block_count: int = 6, 63 | initialization_method: str = 'he'): 64 | super().__init__() 65 | self.image_size = image_size 66 | self.image_channels = image_channels 67 | self.pose_size = pose_size 68 | self.intermediate_channels = intermediate_channels 69 | self.bottleneck_image_size = bottleneck_image_size 70 | self.bottleneck_block_count = bottleneck_block_count 71 | self.initialization_method = initialization_method 72 | 73 | def create(self) -> BatchInputModule: 74 | return Combiner( 75 | self.image_size, 76 | self.image_channels, 77 | self.pose_size, 78 | self.intermediate_channels, 79 | self.bottleneck_image_size, 80 | self.bottleneck_block_count, 81 | self.initialization_method) -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/face_morpher.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import Sequential, Tanh, Sigmoid 4 | 5 | from tha2.nn.backcomp.nn.conv import Conv7 6 | from tha2.nn.backcomp.nn.encoder_decoder_module import EncoderDecoderModule 7 | from tha2.nn.batch_module.batch_input_module import BatchInputModuleFactory, BatchInputModule 8 | 9 | 10 | class FaceMorpher(BatchInputModule): 11 | def __init__(self, 12 | image_size: int = 256, 13 | image_channels: int = 4, 14 | pose_size: int = 3, 15 | intermediate_channels: int = 64, 16 | bottleneck_image_size: int = 32, 17 | bottleneck_block_count: int = 6, 18 | initialization_method: str = 'he'): 19 | super().__init__() 20 | self.main_body = EncoderDecoderModule( 21 | image_size=image_size, 22 | image_channels=image_channels + pose_size, 23 | output_channels=intermediate_channels, 24 | bottleneck_image_size=bottleneck_image_size, 25 | bottleneck_block_count=bottleneck_block_count, 26 | initialization_method=initialization_method) 27 | self.color_change = Sequential( 28 | Conv7(intermediate_channels, image_channels, initialization_method), 29 | Tanh()) 30 | self.alpha_mask = Sequential( 31 | Conv7(intermediate_channels, image_channels, initialization_method), 32 | Sigmoid()) 33 | 34 | def forward(self, image: Tensor, pose: Tensor): 35 | pose = pose.unsqueeze(2).unsqueeze(3) 36 | pose = pose.expand(pose.size(0), pose.size(1), image.size(2), image.size(3)) 37 | x = torch.cat([image, pose], dim=1) 38 | y = self.main_body(x) 39 | color = self.color_change(y) 40 | alpha = self.alpha_mask(y) 41 | output_image = alpha * image + (1 - alpha) * color 42 | return [output_image, alpha, color] 43 | 44 | def forward_from_batch(self, batch): 45 | return self.forward(batch[0], batch[1]) 46 | 47 | 48 | class FaceMorpherSpec(BatchInputModuleFactory): 49 | def __init__(self, 50 | image_size: int = 256, 51 | image_channels: int = 4, 52 | pose_size: int = 3, 53 | intermediate_channels: int = 64, 54 | bottleneck_image_size: int = 32, 55 | bottleneck_block_count: int = 6, 56 | initialization_method: str = 'he'): 57 | super().__init__() 58 | self.image_size = image_size 59 | self.image_channels = image_channels 60 | self.pose_size = pose_size 61 | self.intermediate_channels = intermediate_channels 62 | self.bottleneck_image_size = bottleneck_image_size 63 | self.bottleneck_block_count = bottleneck_block_count 64 | self.initialization_method = initialization_method 65 | 66 | def create(self) -> BatchInputModule: 67 | return FaceMorpher( 68 | self.image_size, 69 | self.image_channels, 70 | self.pose_size, 71 | self.intermediate_channels, 72 | self.bottleneck_image_size, 73 | self.bottleneck_block_count, 74 | self.initialization_method) 75 | -------------------------------------------------------------------------------- /tha2/nn/backcomp/tha/two_algo_face_rotator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import Sequential, Tanh, Sigmoid 4 | from torch.nn.functional import affine_grid, grid_sample 5 | 6 | from tha2.nn.backcomp.nn.conv import Conv7 7 | from tha2.nn.backcomp.nn.encoder_decoder_module import EncoderDecoderModule 8 | from tha2.nn.batch_module.batch_input_module import BatchInputModule, BatchInputModuleFactory 9 | 10 | 11 | class TwoAlgoFaceRotator(BatchInputModule): 12 | COLOR_CHANGED_IMAGE_INDEX = 0 13 | RESAMPLED_IMAGE_INDEX = 1 14 | COLOR_CHANGE_INDEX = 2 15 | ALPHA_INDEX = 3 16 | GRID_CHANGE_INDEX = 4 17 | OUTPUT_LENGTH = 5 18 | 19 | def __init__(self, 20 | image_size: int = 256, 21 | image_channels: int = 4, 22 | pose_size: int = 3, 23 | intermediate_channels: int = 64, 24 | bottleneck_image_size: int = 32, 25 | bottleneck_block_count: int = 6, 26 | initialization_method: str = 'he', 27 | align_corners: bool = True): 28 | super().__init__() 29 | self.align_corners = align_corners 30 | self.main_body = EncoderDecoderModule( 31 | image_size=image_size, 32 | image_channels=image_channels + pose_size, 33 | output_channels=intermediate_channels, 34 | bottleneck_image_size=bottleneck_image_size, 35 | bottleneck_block_count=bottleneck_block_count, 36 | initialization_method=initialization_method) 37 | self.pumarola_color_change = Sequential( 38 | Conv7(intermediate_channels, image_channels, initialization_method), 39 | Tanh()) 40 | self.pumarola_alpha_mask = Sequential( 41 | Conv7(intermediate_channels, image_channels, initialization_method), 42 | Sigmoid()) 43 | self.zhou_grid_change = Conv7(intermediate_channels, 2, initialization_method) 44 | 45 | def forward(self, image: Tensor, pose: Tensor): 46 | n, c, h, w = image.shape 47 | 48 | pose = pose.unsqueeze(2).unsqueeze(3) 49 | pose = pose.expand(pose.shape[0], pose.shape[1], image.shape[2], image.shape[3]) 50 | x = torch.cat([image, pose], dim=1) 51 | y = self.main_body(x) 52 | 53 | color_change = self.pumarola_color_change(y) 54 | alpha_mask = self.pumarola_alpha_mask(y) 55 | color_changed = alpha_mask * image + (1 - alpha_mask) * color_change 56 | 57 | original_grid_change = self.zhou_grid_change(y) 58 | grid_change = torch.transpose(original_grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2) 59 | device = self.zhou_grid_change.weight.device 60 | identity = torch.Tensor([[1, 0, 0], [0, 1, 0]]).to(device).unsqueeze(0).repeat(n, 1, 1) 61 | base_grid = affine_grid(identity, [n, c, h, w], align_corners=self.align_corners) 62 | grid = base_grid + grid_change 63 | resampled = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=self.align_corners) 64 | 65 | # return [color_changed, resampled, color_change, alpha_mask, original_grid_change] 66 | return [color_changed, resampled] 67 | 68 | def forward_from_batch(self, batch): 69 | return self.forward(batch[0], batch[1]) 70 | 71 | 72 | class TwoAlgoFaceRotatorFactory(BatchInputModuleFactory): 73 | def __init__(self, 74 | image_size: int = 256, 75 | image_channels: int = 4, 76 | pose_size: int = 3, 77 | intermediate_channels: int = 64, 78 | bottleneck_image_size: int = 32, 79 | bottleneck_block_count: int = 6, 80 | initialization_method: str = 'he', 81 | align_corners: bool = True): 82 | super().__init__() 83 | self.image_size = image_size 84 | self.image_channels = image_channels 85 | self.pose_size = pose_size 86 | self.intermediate_channels = intermediate_channels 87 | self.bottleneck_image_size = bottleneck_image_size 88 | self.bottleneck_block_count = bottleneck_block_count 89 | self.initialization_method = initialization_method 90 | self.align_corners = align_corners 91 | 92 | def create(self) -> BatchInputModule: 93 | return TwoAlgoFaceRotator( 94 | self.image_size, 95 | self.image_channels, 96 | self.pose_size, 97 | self.intermediate_channels, 98 | self.bottleneck_image_size, 99 | self.bottleneck_block_count, 100 | self.initialization_method, 101 | self.align_corners) -------------------------------------------------------------------------------- /tha2/nn/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__init__.py -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/conv.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/conv.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/init_function.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/init_function.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/module_factory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/module_factory.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/nonlinearity_factory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/nonlinearity_factory.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/normalization.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/normalization.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/pass_through.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/pass_through.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/resnet_block.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/resnet_block.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/spectral_norm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/spectral_norm.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/base/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/base/init_function.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import zero_ 3 | from torch.nn import Module 4 | from torch.nn.init import kaiming_normal_, xavier_normal_, normal_ 5 | 6 | 7 | def create_init_function(method: str = 'none'): 8 | def init(module: Module): 9 | if method == 'none': 10 | return module 11 | elif method == 'he': 12 | kaiming_normal_(module.weight) 13 | return module 14 | elif method == 'xavier': 15 | xavier_normal_(module.weight) 16 | return module 17 | elif method == 'dcgan': 18 | normal_(module.weight, 0.0, 0.02) 19 | return module 20 | elif method == 'dcgan_001': 21 | normal_(module.weight, 0.0, 0.01) 22 | return module 23 | elif method == "zero": 24 | with torch.no_grad(): 25 | zero_(module.weight) 26 | return module 27 | else: 28 | raise ("Invalid initialization method %s" % method) 29 | 30 | return init 31 | -------------------------------------------------------------------------------- /tha2/nn/base/module_factory.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from torch.nn import Module 4 | 5 | 6 | class ModuleFactory(ABC): 7 | @abstractmethod 8 | def create(self) -> Module: 9 | pass -------------------------------------------------------------------------------- /tha2/nn/base/nonlinearity_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from torch.nn import Module, ReLU, LeakyReLU, ELU 4 | 5 | from tha2.nn.base.module_factory import ModuleFactory 6 | 7 | 8 | class ReLUFactory(ModuleFactory): 9 | def __init__(self, inplace: bool = False): 10 | self.inplace = inplace 11 | 12 | def create(self) -> Module: 13 | return ReLU(self.inplace) 14 | 15 | 16 | class LeakyReLUFactory(ModuleFactory): 17 | def __init__(self, inplace: bool = False, negative_slope: float = 1e-2): 18 | self.negative_slope = negative_slope 19 | self.inplace = inplace 20 | 21 | def create(self) -> Module: 22 | return LeakyReLU(inplace=self.inplace, negative_slope=self.negative_slope) 23 | 24 | 25 | class ELUFactory(ModuleFactory): 26 | def __init__(self, inplace: bool = False, alpha: float = 1.0): 27 | self.alpha = alpha 28 | self.inplace = inplace 29 | 30 | def create(self) -> Module: 31 | return ELU(inplace=self.inplace, alpha=self.alpha) 32 | 33 | 34 | def resolve_nonlinearity_factory(nonlinearity_fatory: Optional[ModuleFactory]) -> ModuleFactory: 35 | if nonlinearity_fatory is None: 36 | return ReLUFactory(inplace=True) 37 | else: 38 | return nonlinearity_fatory 39 | -------------------------------------------------------------------------------- /tha2/nn/base/normalization.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Optional 3 | 4 | import torch 5 | from torch.nn import Module, BatchNorm2d, InstanceNorm2d, Parameter 6 | from torch.nn.init import normal_, constant_ 7 | 8 | from tha2.nn.base.pass_through import PassThrough 9 | 10 | 11 | class PixelNormalization(Module): 12 | def __init__(self, epsilon=1e-8): 13 | super().__init__() 14 | self.epsilon = epsilon 15 | 16 | def forward(self, x): 17 | return x / torch.sqrt((x ** 2).mean(dim=1, keepdim=True) + self.epsilon) 18 | 19 | 20 | class NormalizationLayerFactory(ABC): 21 | def __init__(self): 22 | super().__init__() 23 | 24 | @abstractmethod 25 | def create(self, num_features: int, affine: bool = True) -> Module: 26 | pass 27 | 28 | @staticmethod 29 | def resolve_2d(factory: Optional['NormalizationLayerFactory']) -> 'NormalizationLayerFactory': 30 | if factory is None: 31 | return InstanceNorm2dFactory() 32 | else: 33 | return factory 34 | 35 | 36 | class Bias2d(Module): 37 | def __init__(self, num_features: int): 38 | super().__init__() 39 | self.num_features = num_features 40 | self.bias = Parameter(torch.zeros(1, num_features, 1, 1)) 41 | 42 | def forward(self, x): 43 | return x + self.bias 44 | 45 | 46 | class NoNorm2dFactory(NormalizationLayerFactory): 47 | def __init__(self): 48 | super().__init__() 49 | 50 | def create(self, num_features: int, affine: bool = True) -> Module: 51 | if affine: 52 | return Bias2d(num_features) 53 | else: 54 | return PassThrough() 55 | 56 | 57 | class BatchNorm2dFactory(NormalizationLayerFactory): 58 | def __init__(self, 59 | weight_mean: Optional[float] = None, 60 | weight_std: Optional[float] = None, 61 | bias: Optional[float] = None): 62 | super().__init__() 63 | self.bias = bias 64 | self.weight_std = weight_std 65 | self.weight_mean = weight_mean 66 | 67 | def get_weight_mean(self): 68 | if self.weight_mean is None: 69 | return 1.0 70 | else: 71 | return self.weight_mean 72 | 73 | def get_weight_std(self): 74 | if self.weight_std is None: 75 | return 0.02 76 | else: 77 | return self.weight_std 78 | 79 | def create(self, num_features: int, affine: bool = True) -> Module: 80 | module = BatchNorm2d(num_features=num_features, affine=affine) 81 | if affine: 82 | if self.weight_mean is not None or self.weight_std is not None: 83 | normal_(module.weight, self.get_weight_mean(), self.get_weight_std()) 84 | if self.bias is not None: 85 | constant_(module.bias, self.bias) 86 | return module 87 | 88 | 89 | class InstanceNorm2dFactory(NormalizationLayerFactory): 90 | def __init__(self): 91 | super().__init__() 92 | 93 | def create(self, num_features: int, affine: bool = True) -> Module: 94 | return InstanceNorm2d(num_features=num_features, affine=affine) 95 | 96 | 97 | class PixelNormFactory(NormalizationLayerFactory): 98 | def __init__(self): 99 | super().__init__() 100 | 101 | def create(self, num_features: int, affine: bool = True) -> Module: 102 | return PixelNormalization() -------------------------------------------------------------------------------- /tha2/nn/base/pass_through.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module 2 | 3 | 4 | class PassThrough(Module): 5 | def __init__(self): 6 | super().__init__() 7 | 8 | def forward(self, x): 9 | return x -------------------------------------------------------------------------------- /tha2/nn/base/resnet_block.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch.nn import Module, Sequential, Parameter 5 | 6 | from tha2.nn.base.conv import create_conv1, create_conv3 7 | from tha2.nn.base.module_factory import ModuleFactory 8 | from tha2.nn.base.nonlinearity_factory import resolve_nonlinearity_factory 9 | from tha2.nn.base.normalization import NormalizationLayerFactory 10 | from tha2.nn.base.util import BlockArgs 11 | 12 | 13 | class ResnetBlock(Module): 14 | @staticmethod 15 | def create(num_channels: int, 16 | is1x1: bool = False, 17 | use_scale_parameters: bool = False, 18 | block_args: Optional[BlockArgs] = None): 19 | if block_args is None: 20 | block_args = BlockArgs() 21 | return ResnetBlock(num_channels, 22 | is1x1, 23 | block_args.initialization_method, 24 | block_args.nonlinearity_factory, 25 | block_args.normalization_layer_factory, 26 | block_args.use_spectral_norm, 27 | use_scale_parameters) 28 | 29 | def __init__(self, 30 | num_channels: int, 31 | is1x1: bool = False, 32 | initialization_method: str = 'he', 33 | nonlinearity_factory: ModuleFactory = None, 34 | normalization_layer_factory: Optional[NormalizationLayerFactory] = None, 35 | use_spectral_norm: bool = False, 36 | use_scale_parameter: bool = False): 37 | super().__init__() 38 | self.use_scale_parameter = use_scale_parameter 39 | if self.use_scale_parameter: 40 | self.scale = Parameter(torch.zeros(1)) 41 | nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory) 42 | if is1x1: 43 | self.resnet_path = Sequential( 44 | create_conv1(num_channels, num_channels, initialization_method, 45 | bias=True, 46 | use_spectral_norm=use_spectral_norm), 47 | nonlinearity_factory.create(), 48 | create_conv1(num_channels, num_channels, initialization_method, 49 | bias=True, 50 | use_spectral_norm=use_spectral_norm)) 51 | else: 52 | self.resnet_path = Sequential( 53 | create_conv3(num_channels, num_channels, 54 | bias=False, initialization_method=initialization_method, 55 | use_spectral_norm=use_spectral_norm), 56 | NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True), 57 | nonlinearity_factory.create(), 58 | create_conv3(num_channels, num_channels, 59 | bias=False, initialization_method=initialization_method, 60 | use_spectral_norm=use_spectral_norm), 61 | NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True)) 62 | 63 | def forward(self, x): 64 | if self.use_scale_parameter: 65 | return x + self.scale * self.resnet_path(x) 66 | else: 67 | return x + self.resnet_path(x) 68 | -------------------------------------------------------------------------------- /tha2/nn/base/spectral_norm.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module 2 | from torch.nn.utils import spectral_norm 3 | 4 | 5 | def apply_spectral_norm(module: Module, use_spectrial_norm: bool = False) -> Module: 6 | if use_spectrial_norm: 7 | return spectral_norm(module) 8 | else: 9 | return module 10 | -------------------------------------------------------------------------------- /tha2/nn/base/util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from torch.nn import Module 4 | 5 | from tha2.nn.base.init_function import create_init_function 6 | from tha2.nn.base.module_factory import ModuleFactory 7 | from tha2.nn.base.nonlinearity_factory import resolve_nonlinearity_factory 8 | from tha2.nn.base.normalization import NormalizationLayerFactory 9 | from tha2.nn.base.spectral_norm import apply_spectral_norm 10 | 11 | 12 | def wrap_conv_or_linear_module(module: Module, initialization_method: str, use_spectral_norm: bool): 13 | init = create_init_function(initialization_method) 14 | return apply_spectral_norm(init(module), use_spectral_norm) 15 | 16 | 17 | class ImageArgs: 18 | def __init__(self, size: int = 64, num_channels: int = 3): 19 | self.num_channels = num_channels 20 | self.size = size 21 | 22 | 23 | class BlockArgs: 24 | def __init__(self, 25 | initialization_method: str = 'he', 26 | use_spectral_norm: bool = False, 27 | normalization_layer_factory: Optional[NormalizationLayerFactory] = None, 28 | nonlinearity_factory: Optional[ModuleFactory] = None): 29 | self.nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory) 30 | self.normalization_layer_factory = normalization_layer_factory 31 | self.use_spectral_norm = use_spectral_norm 32 | self.initialization_method = initialization_method 33 | 34 | def wrap_module(self, module: Module) -> Module: 35 | return wrap_conv_or_linear_module(module, self.initialization_method, self.use_spectral_norm) 36 | -------------------------------------------------------------------------------- /tha2/nn/base/view_change.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from torch import Tensor 4 | from torch.nn import Module 5 | 6 | 7 | class ViewChange(Module): 8 | def __init__(self, new_size: List[int]): 9 | super().__init__() 10 | self.new_size = new_size 11 | 12 | def forward(self, x: Tensor): 13 | n = x.shape[0] 14 | return x.view([n] + self.new_size) 15 | 16 | 17 | class ViewImageAsVector(Module): 18 | def __init__(self): 19 | super().__init__() 20 | 21 | def forward(self, x: Tensor): 22 | assert x.dim() == 4 23 | n, c, w, h = x.shape 24 | return x.view(n, c * w * h) 25 | 26 | 27 | class ViewVectorAsMultiChannelImage(Module): 28 | def __init__(self): 29 | super().__init__() 30 | 31 | def forward(self, x: Tensor): 32 | assert x.dim() == 2 33 | n, c = x.shape 34 | return x.view(n, c, 1, 1) 35 | 36 | 37 | class ViewVectorAsOneChannelImage(Module): 38 | def __init__(self): 39 | super().__init__() 40 | 41 | def forward(self, x: Tensor): 42 | assert x.dim() == 2 43 | n, c = x.shape 44 | return x.view(n, 1, c, 1) 45 | -------------------------------------------------------------------------------- /tha2/nn/batch_module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/batch_module/__init__.py -------------------------------------------------------------------------------- /tha2/nn/batch_module/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/batch_module/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/batch_module/__pycache__/batch_input_module.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/batch_module/__pycache__/batch_input_module.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/batch_module/batch_input_model_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Set 2 | 3 | from tha2.nn.batch_module.batch_input_module import BatchInputModule, BatchInputModuleFactory 4 | 5 | 6 | class BatchInputModelFactory: 7 | def __init__(self, module_factories: Dict[str, BatchInputModuleFactory]): 8 | self.module_factories = module_factories 9 | 10 | def get_module_names(self) -> Set[str]: 11 | return set(self.module_factories.keys()) 12 | 13 | def create(self) -> Dict[str, BatchInputModule]: 14 | output = {} 15 | for name in self.module_factories: 16 | output[name] = self.module_factories[name].create() 17 | return output 18 | 19 | def get_module_factory(self, module_name) -> BatchInputModuleFactory: 20 | return self.module_factories[module_name] -------------------------------------------------------------------------------- /tha2/nn/batch_module/batch_input_module.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha2.nn.base.module_factory import ModuleFactory 8 | 9 | 10 | class BatchInputModule(Module, ABC): 11 | def __init__(self): 12 | super().__init__() 13 | 14 | @abstractmethod 15 | def forward_from_batch(self, batch: List[Tensor]): 16 | pass 17 | 18 | 19 | class BatchInputModuleFactory(ModuleFactory): 20 | def __init__(self): 21 | super().__init__() 22 | 23 | @abstractmethod 24 | def create(self) -> BatchInputModule: 25 | pass 26 | -------------------------------------------------------------------------------- /tha2/nn/eyebrow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/eyebrow/__init__.py -------------------------------------------------------------------------------- /tha2/nn/eyebrow/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/eyebrow/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/eyebrow/__pycache__/eyebrow_decomposer_00.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/eyebrow/__pycache__/eyebrow_decomposer_00.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/eyebrow/__pycache__/eyebrow_morphing_combiner_00.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/eyebrow/__pycache__/eyebrow_morphing_combiner_00.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/eyebrow/eyebrow_decomposer_00.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | 6 | from tha2.nn.backbone.poser_encoder_decoder_00 import PoserEncoderDecoder00Args, PoserEncoderDecoder00 7 | from tha2.nn.util import apply_color_change 8 | from tha2.nn.batch_module.batch_input_module import BatchInputModule, BatchInputModuleFactory 9 | from tha2.nn.base.nonlinearity_factory import ReLUFactory 10 | from tha2.nn.base.normalization import InstanceNorm2dFactory 11 | from tha2.nn.base.util import BlockArgs 12 | 13 | 14 | class EyebrowDecomposer00Args(PoserEncoderDecoder00Args): 15 | def __init__(self, 16 | image_size: int = 128, 17 | image_channels: int = 4, 18 | start_channels: int = 64, 19 | bottleneck_image_size=16, 20 | num_bottleneck_blocks=6, 21 | max_channels: int = 512, 22 | block_args: Optional[BlockArgs] = None): 23 | super().__init__( 24 | image_size, 25 | image_channels, 26 | image_channels, 27 | 0, 28 | start_channels, 29 | bottleneck_image_size, 30 | num_bottleneck_blocks, 31 | max_channels, 32 | block_args) 33 | 34 | 35 | class EyebrowDecomposer00(BatchInputModule): 36 | def __init__(self, args: EyebrowDecomposer00Args): 37 | super().__init__() 38 | self.args = args 39 | self.body = PoserEncoderDecoder00(args) 40 | self.background_layer_alpha = self.args.create_alpha_block() 41 | self.background_layer_color_change = self.args.create_color_change_block() 42 | self.eyebrow_layer_alpha = self.args.create_alpha_block() 43 | self.eyebrow_layer_color_change = self.args.create_color_change_block() 44 | 45 | def forward(self, image: Tensor) -> List[Tensor]: 46 | feature = self.body(image)[0] 47 | 48 | background_layer_alpha = self.background_layer_alpha(feature) 49 | background_layer_color_change = self.background_layer_color_change(feature) 50 | background_layer_1 = apply_color_change(background_layer_alpha, background_layer_color_change, image) 51 | 52 | eyebrow_layer_alpha = self.eyebrow_layer_alpha(feature) 53 | eyebrow_layer_color_change = self.eyebrow_layer_color_change(feature) 54 | eyebrow_layer = apply_color_change(eyebrow_layer_alpha, image, eyebrow_layer_color_change) 55 | 56 | return [ 57 | eyebrow_layer, # 0 58 | eyebrow_layer_alpha, # 1 59 | eyebrow_layer_color_change, # 2 60 | background_layer_1, # 3 61 | background_layer_alpha, # 4 62 | background_layer_color_change, # 5 63 | ] 64 | 65 | EYEBROW_LAYER_INDEX = 0 66 | EYEBROW_LAYER_ALPHA_INDEX = 1 67 | EYEBROW_LAYER_COLOR_CHANGE_INDEX = 2 68 | BACKGROUND_LAYER_INDEX = 3 69 | BACKGROUND_LAYER_ALPHA_INDEX = 4 70 | BACKGROUND_LAYER_COLOR_CHANGE_INDEX = 5 71 | OUTPUT_LENGTH = 6 72 | 73 | def forward_from_batch(self, batch: List[Tensor]): 74 | return self.forward(batch[0]) 75 | 76 | 77 | class EyebrowDecomposer00Factory(BatchInputModuleFactory): 78 | def __init__(self, args: EyebrowDecomposer00Args): 79 | super().__init__() 80 | self.args = args 81 | 82 | def create(self) -> BatchInputModule: 83 | return EyebrowDecomposer00(self.args) 84 | 85 | 86 | if __name__ == "__main__": 87 | cuda = torch.device('cuda') 88 | args = EyebrowDecomposer00Args( 89 | image_size=128, 90 | image_channels=4, 91 | start_channels=64, 92 | bottleneck_image_size=16, 93 | num_bottleneck_blocks=3, 94 | block_args=BlockArgs( 95 | initialization_method='xavier', 96 | use_spectral_norm=False, 97 | normalization_layer_factory=InstanceNorm2dFactory(), 98 | nonlinearity_factory=ReLUFactory(inplace=True))) 99 | face_morpher = EyebrowDecomposer00(args).to(cuda) 100 | 101 | image = torch.randn(8, 4, 128, 128, device=cuda) 102 | outputs = face_morpher.forward(image) 103 | for i in range(len(outputs)): 104 | print(i, outputs[i].shape) 105 | -------------------------------------------------------------------------------- /tha2/nn/eyebrow/eyebrow_morphing_combiner_00.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | 6 | from tha2.nn.backbone.poser_encoder_decoder_00 import PoserEncoderDecoder00Args, PoserEncoderDecoder00 7 | from tha2.nn.util import apply_color_change, apply_grid_change, apply_rgb_change 8 | from tha2.nn.batch_module.batch_input_module import BatchInputModule, BatchInputModuleFactory 9 | from tha2.nn.base.nonlinearity_factory import ReLUFactory 10 | from tha2.nn.base.normalization import InstanceNorm2dFactory 11 | from tha2.nn.base.util import BlockArgs 12 | 13 | 14 | class EyebrowMorphingCombiner00Args(PoserEncoderDecoder00Args): 15 | def __init__(self, 16 | image_size: int = 128, 17 | image_channels: int = 4, 18 | num_pose_params: int = 12, 19 | start_channels: int = 64, 20 | bottleneck_image_size=16, 21 | num_bottleneck_blocks=6, 22 | max_channels: int = 512, 23 | block_args: Optional[BlockArgs] = None): 24 | super().__init__( 25 | image_size, 26 | 2 * image_channels, 27 | image_channels, 28 | num_pose_params, 29 | start_channels, 30 | bottleneck_image_size, 31 | num_bottleneck_blocks, 32 | max_channels, 33 | block_args) 34 | 35 | 36 | class EyebrowMorphingCombiner00(BatchInputModule): 37 | def __init__(self, args: EyebrowMorphingCombiner00Args): 38 | super().__init__() 39 | self.args = args 40 | self.body = PoserEncoderDecoder00(args) 41 | self.morphed_eyebrow_layer_grid_change = self.args.create_grid_change_block() 42 | self.morphed_eyebrow_layer_alpha = self.args.create_alpha_block() 43 | self.morphed_eyebrow_layer_color_change = self.args.create_color_change_block() 44 | self.combine_alpha = self.args.create_alpha_block() 45 | 46 | def forward(self, background_layer: Tensor, eyebrow_layer: Tensor, pose: Tensor) -> List[Tensor]: 47 | combined_image = torch.cat([background_layer, eyebrow_layer], dim=1) 48 | feature = self.body(combined_image, pose)[0] 49 | 50 | morphed_eyebrow_layer_grid_change = self.morphed_eyebrow_layer_grid_change(feature) 51 | morphed_eyebrow_layer_alpha = self.morphed_eyebrow_layer_alpha(feature) 52 | morphed_eyebrow_layer_color_change = self.morphed_eyebrow_layer_color_change(feature) 53 | warped_eyebrow_layer = apply_grid_change(morphed_eyebrow_layer_grid_change, eyebrow_layer) 54 | morphed_eyebrow_layer = apply_color_change( 55 | morphed_eyebrow_layer_alpha, morphed_eyebrow_layer_color_change, warped_eyebrow_layer) 56 | 57 | combine_alpha = self.combine_alpha(feature) 58 | eyebrow_image = apply_rgb_change(combine_alpha, morphed_eyebrow_layer, background_layer) 59 | eyebrow_image_no_combine_alpha = apply_rgb_change( 60 | (morphed_eyebrow_layer[:, 3:4, :, :] + 1.0) / 2.0, morphed_eyebrow_layer, background_layer) 61 | 62 | return [ 63 | eyebrow_image, # 0 64 | combine_alpha, # 1 65 | eyebrow_image_no_combine_alpha, # 2 66 | morphed_eyebrow_layer, # 3 67 | morphed_eyebrow_layer_alpha, # 4 68 | morphed_eyebrow_layer_color_change, # 5 69 | warped_eyebrow_layer, # 6 70 | morphed_eyebrow_layer_grid_change, # 7 71 | ] 72 | 73 | EYEBROW_IMAGE_INDEX = 0 74 | COMBINE_ALPHA_INDEX = 1 75 | EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX = 2 76 | MORPHED_EYEBROW_LAYER_INDEX = 3 77 | MORPHED_EYEBROW_LAYER_ALPHA_INDEX = 4 78 | MORPHED_EYEBROW_LAYER_COLOR_CHANGE_INDEX = 5 79 | WARPED_EYEBROW_LAYER_INDEX = 6 80 | MORPHED_EYEBROW_LAYER_GRID_CHANGE_INDEX = 7 81 | OUTPUT_LENGTH = 8 82 | 83 | def forward_from_batch(self, batch: List[Tensor]): 84 | return self.forward(batch[0], batch[1], batch[2]) 85 | 86 | 87 | class EyebrowMorphingCombiner00Factory(BatchInputModuleFactory): 88 | def __init__(self, args: EyebrowMorphingCombiner00Args): 89 | super().__init__() 90 | self.args = args 91 | 92 | def create(self) -> BatchInputModule: 93 | return EyebrowMorphingCombiner00(self.args) 94 | 95 | 96 | if __name__ == "__main__": 97 | cuda = torch.device('cuda') 98 | args = EyebrowMorphingCombiner00Args( 99 | image_size=128, 100 | image_channels=4, 101 | num_pose_params=12, 102 | start_channels=64, 103 | bottleneck_image_size=16, 104 | num_bottleneck_blocks=3, 105 | block_args=BlockArgs( 106 | initialization_method='xavier', 107 | use_spectral_norm=False, 108 | normalization_layer_factory=InstanceNorm2dFactory(), 109 | nonlinearity_factory=ReLUFactory(inplace=True))) 110 | face_morpher = EyebrowMorphingCombiner00(args).to(cuda) 111 | 112 | background_layer = torch.randn(8, 4, 128, 128, device=cuda) 113 | eyebrow_layer = torch.randn(8, 4, 128, 128, device=cuda) 114 | pose = torch.randn(8, 12, device=cuda) 115 | outputs = face_morpher.forward(background_layer, eyebrow_layer, pose) 116 | for i in range(len(outputs)): 117 | print(i, outputs[i].shape) 118 | -------------------------------------------------------------------------------- /tha2/nn/face/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/face/__init__.py -------------------------------------------------------------------------------- /tha2/nn/face/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/face/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/face/__pycache__/face_morpher_08.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/nn/face/__pycache__/face_morpher_08.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/nn/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn.functional import affine_grid, grid_sample 4 | 5 | 6 | def apply_rgb_change(alpha: Tensor, color_change: Tensor, image: Tensor): 7 | image_rgb = image[:, 0:3, :, :] 8 | color_change_rgb = color_change[:, 0:3, :, :] 9 | output_rgb = color_change_rgb * alpha + image_rgb * (1 - alpha) 10 | return torch.cat([output_rgb, image[:, 3:4, :, :]], dim=1) 11 | 12 | 13 | def apply_grid_change(grid_change, image: Tensor) -> Tensor: 14 | n, c, h, w = image.shape 15 | device = grid_change.device 16 | grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2) 17 | identity = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], device=device).unsqueeze(0).repeat(n, 1, 1) 18 | base_grid = affine_grid(identity, [n, c, h, w], align_corners=False) 19 | grid = base_grid + grid_change 20 | resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=False) 21 | return resampled_image 22 | 23 | 24 | def apply_color_change(alpha, color_change, image: Tensor) -> Tensor: 25 | return color_change * alpha + image * (1 - alpha) 26 | -------------------------------------------------------------------------------- /tha2/poser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/__init__.py -------------------------------------------------------------------------------- /tha2/poser/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/poser/__pycache__/general_poser_02.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/__pycache__/general_poser_02.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/poser/__pycache__/poser.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/__pycache__/poser.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/poser/general_poser_02.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Tuple, Dict, Callable 2 | 3 | import torch 4 | from torch import Tensor 5 | 6 | from tha2.poser.poser import PoseParameterGroup, Poser 7 | from tha2.nn.batch_module.batch_input_module import BatchInputModule 8 | from tha2.compute.cached_computation_func import TensorListCachedComputationFunc 9 | 10 | 11 | class GeneralPoser02(Poser): 12 | def __init__(self, 13 | module_loaders: Dict[str, Callable[[], BatchInputModule]], 14 | device: torch.device, 15 | output_length: int, 16 | pose_parameters: List[PoseParameterGroup], 17 | output_list_func: TensorListCachedComputationFunc, 18 | subrect: Optional[Tuple[Tuple[int, int], Tuple[int, int]]] = None, 19 | default_output_index: int = 0): 20 | self.default_output_index = default_output_index 21 | self.output_list_func = output_list_func 22 | self.subrect = subrect 23 | self.pose_parameters = pose_parameters 24 | self.device = device 25 | self.module_loaders = module_loaders 26 | 27 | self.modules = None 28 | 29 | self.num_parameters = 0 30 | for pose_parameter in self.pose_parameters: 31 | self.num_parameters += pose_parameter.get_arity() 32 | 33 | self.output_length = output_length 34 | 35 | def get_modules(self): 36 | if self.modules is None: 37 | self.modules = {} 38 | for key in self.module_loaders: 39 | module = self.module_loaders[key]() 40 | self.modules[key] = module 41 | module.to(self.device) 42 | module.train(False) 43 | return self.modules 44 | 45 | def get_pose_parameter_groups(self) -> List[PoseParameterGroup]: 46 | return self.pose_parameters 47 | 48 | def get_num_parameters(self) -> int: 49 | return self.num_parameters 50 | 51 | def pose(self, image: Tensor, pose: Tensor, output_index: Optional[int] = None) -> Tensor: 52 | if output_index is None: 53 | output_index = self.default_output_index 54 | output_list = self.get_posing_outputs(image, pose) 55 | return output_list[output_index] 56 | 57 | def get_posing_outputs(self, image: Tensor, pose: Tensor) -> List[Tensor]: 58 | modules = self.get_modules() 59 | 60 | if len(image.shape) == 3: 61 | image = image.unsqueeze(0) 62 | if len(pose.shape) == 1: 63 | pose = pose.unsqueeze(0) 64 | if self.subrect is not None: 65 | image = image[:, :, self.subrect[0][0]:self.subrect[0][1], self.subrect[1][0]:self.subrect[1][1]] 66 | batch = [image, pose] 67 | 68 | outputs = {} 69 | return self.output_list_func(modules, batch, outputs) 70 | 71 | def get_output_length(self) -> int: 72 | return self.output_length 73 | -------------------------------------------------------------------------------- /tha2/poser/modes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/modes/__init__.py -------------------------------------------------------------------------------- /tha2/poser/modes/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/modes/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/poser/modes/__pycache__/mode_20.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/modes/__pycache__/mode_20.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/poser/modes/__pycache__/mode_20_wx.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha2/poser/modes/__pycache__/mode_20_wx.cpython-310.pyc -------------------------------------------------------------------------------- /tha2/poser/poser.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from enum import Enum 3 | from typing import Tuple, List, Optional 4 | 5 | from torch import Tensor 6 | 7 | 8 | class PoseParameterCategory(Enum): 9 | EYEBROW = 1 10 | EYE = 2 11 | IRIS_MORPH = 3 12 | IRIS_ROTATION = 4 13 | MOUTH = 5 14 | FACE_ROTATION = 6 15 | 16 | 17 | class PoseParameterGroup: 18 | def __init__(self, 19 | group_name: str, 20 | parameter_index: int, 21 | category: PoseParameterCategory, 22 | arity: int = 1, 23 | discrete: bool = False, 24 | default_value: float = 0.0, 25 | range: Optional[Tuple[float, float]] = None): 26 | assert arity == 1 or arity == 2 27 | if range is None: 28 | range = (0.0, 1.0) 29 | if arity == 1: 30 | parameter_names = [group_name] 31 | else: 32 | parameter_names = [group_name + "_left", group_name + "_right"] 33 | assert len(parameter_names) == arity 34 | 35 | self.parameter_names = parameter_names 36 | self.range = range 37 | self.default_value = default_value 38 | self.discrete = discrete 39 | self.arity = arity 40 | self.category = category 41 | self.parameter_index = parameter_index 42 | self.group_name = group_name 43 | 44 | def get_arity(self) -> int: 45 | return self.arity 46 | 47 | def get_group_name(self) -> str: 48 | return self.group_name 49 | 50 | def get_parameter_names(self) -> List[str]: 51 | return self.parameter_names 52 | 53 | def is_discrete(self) -> bool: 54 | return self.discrete 55 | 56 | def get_range(self) -> Tuple[float, float]: 57 | return self.range 58 | 59 | def get_default_value(self): 60 | return self.default_value 61 | 62 | def get_parameter_index(self): 63 | return self.parameter_index 64 | 65 | def get_category(self) -> PoseParameterCategory: 66 | return self.category 67 | 68 | 69 | class PoseParameters: 70 | def __init__(self, pose_parameter_groups: List[PoseParameterGroup]): 71 | self.pose_parameter_groups = pose_parameter_groups 72 | 73 | def get_parameter_index(self, name: str) -> int: 74 | index = 0 75 | for parameter_group in self.pose_parameter_groups: 76 | for param_name in parameter_group.parameter_names: 77 | if name == param_name: 78 | return index 79 | index += 1 80 | raise RuntimeError("Cannot find parameter with name %s" % name) 81 | 82 | def get_parameter_name(self, index: int) -> str: 83 | assert index >= 0 and index < self.get_parameter_count() 84 | 85 | for group in self.pose_parameter_groups: 86 | if index < group.get_arity(): 87 | return group.get_parameter_names()[index] 88 | index -= group.arity 89 | 90 | raise RuntimeError("Something is wrong here!!!") 91 | 92 | def get_pose_parameter_groups(self): 93 | return self.pose_parameter_groups 94 | 95 | def get_parameter_count(self): 96 | count = 0 97 | for group in self.pose_parameter_groups: 98 | count += group.arity 99 | return count 100 | 101 | class Builder: 102 | def __init__(self): 103 | self.index = 0 104 | self.pose_parameter_groups = [] 105 | 106 | def add_parameter_group(self, 107 | group_name: str, 108 | category: PoseParameterCategory, 109 | arity: int = 1, 110 | discrete: bool = False, 111 | default_value: float = 0.0, 112 | range: Optional[Tuple[float, float]] = None): 113 | self.pose_parameter_groups.append( 114 | PoseParameterGroup( 115 | group_name, 116 | self.index, 117 | category, 118 | arity, 119 | discrete, 120 | default_value, 121 | range)) 122 | self.index += arity 123 | return self 124 | 125 | def build(self) -> 'PoseParameters': 126 | return PoseParameters(self.pose_parameter_groups) 127 | 128 | 129 | class Poser(ABC): 130 | @abstractmethod 131 | def get_output_length(self) -> int: 132 | pass 133 | 134 | @abstractmethod 135 | def get_pose_parameter_groups(self) -> List[PoseParameterGroup]: 136 | pass 137 | 138 | @abstractmethod 139 | def get_num_parameters(self) -> int: 140 | pass 141 | 142 | @abstractmethod 143 | def pose(self, image: Tensor, pose: Tensor, output_index: int = 0) -> Tensor: 144 | pass 145 | 146 | @abstractmethod 147 | def get_posing_outputs(self, image: Tensor, pose: Tensor) -> List[Tensor]: 148 | pass 149 | -------------------------------------------------------------------------------- /tha3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/__init__.py -------------------------------------------------------------------------------- /tha3/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/app/__init__.py -------------------------------------------------------------------------------- /tha3/compute/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/compute/__init__.py -------------------------------------------------------------------------------- /tha3/compute/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/compute/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/compute/__pycache__/cached_computation_func.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/compute/__pycache__/cached_computation_func.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/compute/__pycache__/cached_computation_protocol.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/compute/__pycache__/cached_computation_protocol.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/compute/cached_computation_func.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Dict, List 2 | 3 | from torch import Tensor 4 | from torch.nn import Module 5 | 6 | TensorCachedComputationFunc = Callable[ 7 | [Dict[str, Module], List[Tensor], Dict[str, List[Tensor]]], Tensor] 8 | TensorListCachedComputationFunc = Callable[ 9 | [Dict[str, Module], List[Tensor], Dict[str, List[Tensor]]], List[Tensor]] 10 | -------------------------------------------------------------------------------- /tha3/compute/cached_computation_protocol.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, List 3 | 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha3.compute.cached_computation_func import TensorCachedComputationFunc, TensorListCachedComputationFunc 8 | 9 | 10 | class CachedComputationProtocol(ABC): 11 | def get_output(self, 12 | key: str, 13 | modules: Dict[str, Module], 14 | batch: List[Tensor], 15 | outputs: Dict[str, List[Tensor]]): 16 | if key in outputs: 17 | return outputs[key] 18 | else: 19 | output = self.compute_output(key, modules, batch, outputs) 20 | outputs[key] = output 21 | return outputs[key] 22 | 23 | @abstractmethod 24 | def compute_output(self, 25 | key: str, 26 | modules: Dict[str, Module], 27 | batch: List[Tensor], 28 | outputs: Dict[str, List[Tensor]]) -> List[Tensor]: 29 | pass 30 | 31 | def get_output_tensor_func(self, key: str, index: int) -> TensorCachedComputationFunc: 32 | def func(modules: Dict[str, Module], 33 | batch: List[Tensor], 34 | outputs: Dict[str, List[Tensor]]): 35 | return self.get_output(key, modules, batch, outputs)[index] 36 | return func 37 | 38 | def get_output_tensor_list_func(self, key: str) -> TensorListCachedComputationFunc: 39 | def func(modules: Dict[str, Module], 40 | batch: List[Tensor], 41 | outputs: Dict[str, List[Tensor]]): 42 | return self.get_output(key, modules, batch, outputs) 43 | return func -------------------------------------------------------------------------------- /tha3/mocap/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/mocap/__init__.py -------------------------------------------------------------------------------- /tha3/mocap/ifacialmocap_pose.py: -------------------------------------------------------------------------------- 1 | from tha3.mocap.ifacialmocap_constants import BLENDSHAPE_NAMES, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, \ 2 | HEAD_BONE_QUAT, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, LEFT_EYE_BONE_QUAT, RIGHT_EYE_BONE_X, \ 3 | RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z, RIGHT_EYE_BONE_QUAT 4 | 5 | 6 | def create_default_ifacialmocap_pose(): 7 | data = {} 8 | 9 | for blendshape_name in BLENDSHAPE_NAMES: 10 | data[blendshape_name] = 0.0 11 | 12 | data[HEAD_BONE_X] = 0.0 13 | data[HEAD_BONE_Y] = 0.0 14 | data[HEAD_BONE_Z] = 0.0 15 | data[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] 16 | 17 | data[LEFT_EYE_BONE_X] = 0.0 18 | data[LEFT_EYE_BONE_Y] = 0.0 19 | data[LEFT_EYE_BONE_Z] = 0.0 20 | data[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] 21 | 22 | data[RIGHT_EYE_BONE_X] = 0.0 23 | data[RIGHT_EYE_BONE_Y] = 0.0 24 | data[RIGHT_EYE_BONE_Z] = 0.0 25 | data[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] 26 | 27 | return data -------------------------------------------------------------------------------- /tha3/mocap/ifacialmocap_pose_converter.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, List 3 | 4 | 5 | class IFacialMocapPoseConverter(ABC): 6 | @abstractmethod 7 | def convert(self, ifacialmocap_pose: Dict[str, float]) -> List[float]: 8 | pass 9 | 10 | @abstractmethod 11 | def init_pose_converter_panel(self, parent): 12 | pass -------------------------------------------------------------------------------- /tha3/mocap/ifacialmocap_v2.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from tha3.mocap.ifacialmocap_constants import BLENDSHAPE_NAMES, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, \ 4 | RIGHT_EYE_BONE_X, RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, \ 5 | HEAD_BONE_QUAT, LEFT_EYE_BONE_QUAT, RIGHT_EYE_BONE_QUAT 6 | 7 | IFACIALMOCAP_PORT = 49983 8 | IFACIALMOCAP_START_STRING = "iFacialMocap_sahuasouryya9218sauhuiayeta91555dy3719|sendDataVersion=v2".encode('utf-8') 9 | 10 | 11 | def parse_ifacialmocap_v2_pose(ifacialmocap_output): 12 | output = {} 13 | parts = ifacialmocap_output.split("|") 14 | for part in parts: 15 | part = part.strip() 16 | if len(part) == 0: 17 | continue 18 | if "&" in part: 19 | components = part.split("&") 20 | assert len(components) == 2 21 | key = components[0] 22 | value = float(components[1]) / 100.0 23 | if key.endswith("_L"): 24 | key = key[:-2] + "Left" 25 | elif key.endswith("_R"): 26 | key = key[:-2] + "Right" 27 | if key in BLENDSHAPE_NAMES: 28 | output[key] = value 29 | elif part.startswith("=head#"): 30 | components = part[len("=head#"):].split(",") 31 | assert len(components) == 6 32 | output[HEAD_BONE_X] = float(components[0]) * math.pi / 180 33 | output[HEAD_BONE_Y] = float(components[1]) * math.pi / 180 34 | output[HEAD_BONE_Z] = float(components[2]) * math.pi / 180 35 | elif part.startswith("rightEye#"): 36 | components = part[len("rightEye#"):].split(",") 37 | output[RIGHT_EYE_BONE_X] = float(components[0]) * math.pi / 180 38 | output[RIGHT_EYE_BONE_Y] = float(components[1]) * math.pi / 180 39 | output[RIGHT_EYE_BONE_Z] = float(components[2]) * math.pi / 180 40 | elif part.startswith("leftEye#"): 41 | components = part[len("leftEye#"):].split(",") 42 | output[LEFT_EYE_BONE_X] = float(components[0]) * math.pi / 180 43 | output[LEFT_EYE_BONE_Y] = float(components[1]) * math.pi / 180 44 | output[LEFT_EYE_BONE_Z] = float(components[2]) * math.pi / 180 45 | output[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] 46 | output[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] 47 | output[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] 48 | return output 49 | 50 | 51 | -------------------------------------------------------------------------------- /tha3/module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/module/__init__.py -------------------------------------------------------------------------------- /tha3/module/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/module/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/module/__pycache__/module_factory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/module/__pycache__/module_factory.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/module/module_factory.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from torch.nn import Module 4 | 5 | 6 | class ModuleFactory(ABC): 7 | @abstractmethod 8 | def create(self) -> Module: 9 | pass -------------------------------------------------------------------------------- /tha3/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__init__.py -------------------------------------------------------------------------------- /tha3/nn/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/conv.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/conv.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/image_processing_util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/image_processing_util.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/init_function.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/init_function.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/nonlinearity_factory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/nonlinearity_factory.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/normalization.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/normalization.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/pass_through.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/pass_through.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/resnet_block.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/resnet_block.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/resnet_block_seperable.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/resnet_block_seperable.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/separable_conv.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/separable_conv.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/spectral_norm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/spectral_norm.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__init__.py -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/conv_block_factory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/conv_block_factory.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/poser_args.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/poser_args.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/poser_encoder_decoder_00.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/poser_encoder_decoder_00.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/poser_encoder_decoder_00_separable.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/poser_encoder_decoder_00_separable.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/resize_conv_encoder_decoder.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/resize_conv_encoder_decoder.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/__pycache__/resize_conv_unet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/common/__pycache__/resize_conv_unet.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/common/conv_block_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from tha3.nn.conv import create_conv7_block_from_block_args, create_conv3_block_from_block_args, \ 4 | create_downsample_block_from_block_args, create_conv3 5 | from tha3.nn.resnet_block import ResnetBlock 6 | from tha3.nn.resnet_block_seperable import ResnetBlockSeparable 7 | from tha3.nn.separable_conv import create_separable_conv7_block, create_separable_conv3_block, \ 8 | create_separable_downsample_block, create_separable_conv3 9 | from tha3.nn.util import BlockArgs 10 | 11 | 12 | class ConvBlockFactory: 13 | def __init__(self, 14 | block_args: BlockArgs, 15 | use_separable_convolution: bool = False): 16 | self.use_separable_convolution = use_separable_convolution 17 | self.block_args = block_args 18 | 19 | def create_conv3(self, 20 | in_channels: int, 21 | out_channels: int, 22 | bias: bool, 23 | initialization_method: Optional[str] = None): 24 | if initialization_method is None: 25 | initialization_method = self.block_args.initialization_method 26 | if self.use_separable_convolution: 27 | return create_separable_conv3( 28 | in_channels, out_channels, bias, initialization_method, self.block_args.use_spectral_norm) 29 | else: 30 | return create_conv3( 31 | in_channels, out_channels, bias, initialization_method, self.block_args.use_spectral_norm) 32 | 33 | def create_conv7_block(self, in_channels: int, out_channels: int): 34 | if self.use_separable_convolution: 35 | return create_separable_conv7_block(in_channels, out_channels, self.block_args) 36 | else: 37 | return create_conv7_block_from_block_args(in_channels, out_channels, self.block_args) 38 | 39 | def create_conv3_block(self, in_channels: int, out_channels: int): 40 | if self.use_separable_convolution: 41 | return create_separable_conv3_block(in_channels, out_channels, self.block_args) 42 | else: 43 | return create_conv3_block_from_block_args(in_channels, out_channels, self.block_args) 44 | 45 | def create_downsample_block(self, in_channels: int, out_channels: int, is_output_1x1: bool): 46 | if self.use_separable_convolution: 47 | return create_separable_downsample_block(in_channels, out_channels, is_output_1x1, self.block_args) 48 | else: 49 | return create_downsample_block_from_block_args(in_channels, out_channels, is_output_1x1) 50 | 51 | def create_resnet_block(self, num_channels: int, is_1x1: bool): 52 | if self.use_separable_convolution: 53 | return ResnetBlockSeparable.create(num_channels, is_1x1, block_args=self.block_args) 54 | else: 55 | return ResnetBlock.create(num_channels, is_1x1, block_args=self.block_args) -------------------------------------------------------------------------------- /tha3/nn/common/poser_args.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from torch.nn import Sigmoid, Sequential, Tanh 4 | 5 | from tha3.nn.conv import create_conv3, create_conv3_from_block_args 6 | from tha3.nn.nonlinearity_factory import ReLUFactory 7 | from tha3.nn.normalization import InstanceNorm2dFactory 8 | from tha3.nn.util import BlockArgs 9 | 10 | 11 | class PoserArgs00: 12 | def __init__(self, 13 | image_size: int, 14 | input_image_channels: int, 15 | output_image_channels: int, 16 | start_channels: int, 17 | num_pose_params: int, 18 | block_args: Optional[BlockArgs] = None): 19 | self.num_pose_params = num_pose_params 20 | self.start_channels = start_channels 21 | self.output_image_channels = output_image_channels 22 | self.input_image_channels = input_image_channels 23 | self.image_size = image_size 24 | if block_args is None: 25 | self.block_args = BlockArgs( 26 | normalization_layer_factory=InstanceNorm2dFactory(), 27 | nonlinearity_factory=ReLUFactory(inplace=True)) 28 | else: 29 | self.block_args = block_args 30 | 31 | def create_alpha_block(self): 32 | from torch.nn import Sequential 33 | return Sequential( 34 | create_conv3( 35 | in_channels=self.start_channels, 36 | out_channels=1, 37 | bias=True, 38 | initialization_method=self.block_args.initialization_method, 39 | use_spectral_norm=False), 40 | Sigmoid()) 41 | 42 | def create_all_channel_alpha_block(self): 43 | from torch.nn import Sequential 44 | return Sequential( 45 | create_conv3( 46 | in_channels=self.start_channels, 47 | out_channels=self.output_image_channels, 48 | bias=True, 49 | initialization_method=self.block_args.initialization_method, 50 | use_spectral_norm=False), 51 | Sigmoid()) 52 | 53 | def create_color_change_block(self): 54 | return Sequential( 55 | create_conv3_from_block_args( 56 | in_channels=self.start_channels, 57 | out_channels=self.output_image_channels, 58 | bias=True, 59 | block_args=self.block_args), 60 | Tanh()) 61 | 62 | def create_grid_change_block(self): 63 | return create_conv3( 64 | in_channels=self.start_channels, 65 | out_channels=2, 66 | bias=False, 67 | initialization_method='zero', 68 | use_spectral_norm=False) -------------------------------------------------------------------------------- /tha3/nn/common/poser_encoder_decoder_00.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Optional, List 3 | 4 | import torch 5 | from torch import Tensor 6 | from torch.nn import ModuleList, Module 7 | 8 | from tha3.nn.common.poser_args import PoserArgs00 9 | from tha3.nn.conv import create_conv3_block_from_block_args, create_downsample_block_from_block_args, \ 10 | create_upsample_block_from_block_args 11 | from tha3.nn.nonlinearity_factory import ReLUFactory 12 | from tha3.nn.normalization import InstanceNorm2dFactory 13 | from tha3.nn.resnet_block import ResnetBlock 14 | from tha3.nn.util import BlockArgs 15 | 16 | 17 | class PoserEncoderDecoder00Args(PoserArgs00): 18 | def __init__(self, 19 | image_size: int, 20 | input_image_channels: int, 21 | output_image_channels: int, 22 | num_pose_params: int , 23 | start_channels: int, 24 | bottleneck_image_size, 25 | num_bottleneck_blocks, 26 | max_channels: int, 27 | block_args: Optional[BlockArgs] = None): 28 | super().__init__( 29 | image_size, input_image_channels, output_image_channels, start_channels, num_pose_params, block_args) 30 | self.max_channels = max_channels 31 | self.num_bottleneck_blocks = num_bottleneck_blocks 32 | self.bottleneck_image_size = bottleneck_image_size 33 | assert bottleneck_image_size > 1 34 | 35 | if block_args is None: 36 | self.block_args = BlockArgs( 37 | normalization_layer_factory=InstanceNorm2dFactory(), 38 | nonlinearity_factory=ReLUFactory(inplace=True)) 39 | else: 40 | self.block_args = block_args 41 | 42 | 43 | class PoserEncoderDecoder00(Module): 44 | def __init__(self, args: PoserEncoderDecoder00Args): 45 | super().__init__() 46 | self.args = args 47 | 48 | self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1 49 | 50 | self.downsample_blocks = ModuleList() 51 | self.downsample_blocks.append( 52 | create_conv3_block_from_block_args( 53 | args.input_image_channels, 54 | args.start_channels, 55 | args.block_args)) 56 | current_image_size = args.image_size 57 | current_num_channels = args.start_channels 58 | while current_image_size > args.bottleneck_image_size: 59 | next_image_size = current_image_size // 2 60 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 61 | self.downsample_blocks.append(create_downsample_block_from_block_args( 62 | in_channels=current_num_channels, 63 | out_channels=next_num_channels, 64 | is_output_1x1=False, 65 | block_args=args.block_args)) 66 | current_image_size = next_image_size 67 | current_num_channels = next_num_channels 68 | assert len(self.downsample_blocks) == self.num_levels 69 | 70 | self.bottleneck_blocks = ModuleList() 71 | self.bottleneck_blocks.append(create_conv3_block_from_block_args( 72 | in_channels=current_num_channels + args.num_pose_params, 73 | out_channels=current_num_channels, 74 | block_args=args.block_args)) 75 | for i in range(1, args.num_bottleneck_blocks): 76 | self.bottleneck_blocks.append( 77 | ResnetBlock.create( 78 | num_channels=current_num_channels, 79 | is1x1=False, 80 | block_args=args.block_args)) 81 | 82 | self.upsample_blocks = ModuleList() 83 | while current_image_size < args.image_size: 84 | next_image_size = current_image_size * 2 85 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 86 | self.upsample_blocks.append(create_upsample_block_from_block_args( 87 | in_channels=current_num_channels, 88 | out_channels=next_num_channels, 89 | block_args=args.block_args)) 90 | current_image_size = next_image_size 91 | current_num_channels = next_num_channels 92 | 93 | def get_num_output_channels_from_level(self, level: int): 94 | return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level)) 95 | 96 | def get_num_output_channels_from_image_size(self, image_size: int): 97 | return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels) 98 | 99 | def forward(self, image: Tensor, pose: Optional[Tensor] = None) -> List[Tensor]: 100 | if self.args.num_pose_params != 0: 101 | assert pose is not None 102 | else: 103 | assert pose is None 104 | outputs = [] 105 | feature = image 106 | outputs.append(feature) 107 | for block in self.downsample_blocks: 108 | feature = block(feature) 109 | outputs.append(feature) 110 | if pose is not None: 111 | n, c = pose.shape 112 | pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.bottleneck_image_size, self.args.bottleneck_image_size) 113 | feature = torch.cat([feature, pose], dim=1) 114 | for block in self.bottleneck_blocks: 115 | feature = block(feature) 116 | outputs.append(feature) 117 | for block in self.upsample_blocks: 118 | feature = block(feature) 119 | outputs.append(feature) 120 | outputs.reverse() 121 | return outputs 122 | -------------------------------------------------------------------------------- /tha3/nn/common/poser_encoder_decoder_00_separable.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Optional, List 3 | 4 | import torch 5 | from torch import Tensor 6 | from torch.nn import ModuleList, Module 7 | 8 | from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args 9 | from tha3.nn.resnet_block_seperable import ResnetBlockSeparable 10 | from tha3.nn.separable_conv import create_separable_conv3_block, create_separable_downsample_block, \ 11 | create_separable_upsample_block 12 | 13 | 14 | class PoserEncoderDecoder00Separable(Module): 15 | def __init__(self, args: PoserEncoderDecoder00Args): 16 | super().__init__() 17 | self.args = args 18 | 19 | self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1 20 | 21 | self.downsample_blocks = ModuleList() 22 | self.downsample_blocks.append( 23 | create_separable_conv3_block( 24 | args.input_image_channels, 25 | args.start_channels, 26 | args.block_args)) 27 | current_image_size = args.image_size 28 | current_num_channels = args.start_channels 29 | while current_image_size > args.bottleneck_image_size: 30 | next_image_size = current_image_size // 2 31 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 32 | self.downsample_blocks.append(create_separable_downsample_block( 33 | in_channels=current_num_channels, 34 | out_channels=next_num_channels, 35 | is_output_1x1=False, 36 | block_args=args.block_args)) 37 | current_image_size = next_image_size 38 | current_num_channels = next_num_channels 39 | assert len(self.downsample_blocks) == self.num_levels 40 | 41 | self.bottleneck_blocks = ModuleList() 42 | self.bottleneck_blocks.append(create_separable_conv3_block( 43 | in_channels=current_num_channels + args.num_pose_params, 44 | out_channels=current_num_channels, 45 | block_args=args.block_args)) 46 | for i in range(1, args.num_bottleneck_blocks): 47 | self.bottleneck_blocks.append( 48 | ResnetBlockSeparable.create( 49 | num_channels=current_num_channels, 50 | is1x1=False, 51 | block_args=args.block_args)) 52 | 53 | self.upsample_blocks = ModuleList() 54 | while current_image_size < args.image_size: 55 | next_image_size = current_image_size * 2 56 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 57 | self.upsample_blocks.append(create_separable_upsample_block( 58 | in_channels=current_num_channels, 59 | out_channels=next_num_channels, 60 | block_args=args.block_args)) 61 | current_image_size = next_image_size 62 | current_num_channels = next_num_channels 63 | 64 | def get_num_output_channels_from_level(self, level: int): 65 | return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level)) 66 | 67 | def get_num_output_channels_from_image_size(self, image_size: int): 68 | return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels) 69 | 70 | def forward(self, image: Tensor, pose: Optional[Tensor] = None) -> List[Tensor]: 71 | if self.args.num_pose_params != 0: 72 | assert pose is not None 73 | else: 74 | assert pose is None 75 | outputs = [] 76 | feature = image 77 | outputs.append(feature) 78 | for block in self.downsample_blocks: 79 | feature = block(feature) 80 | outputs.append(feature) 81 | if pose is not None: 82 | n, c = pose.shape 83 | pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.bottleneck_image_size, self.args.bottleneck_image_size) 84 | feature = torch.cat([feature, pose], dim=1) 85 | for block in self.bottleneck_blocks: 86 | feature = block(feature) 87 | outputs.append(feature) 88 | for block in self.upsample_blocks: 89 | feature = block(feature) 90 | outputs.append(feature) 91 | outputs.reverse() 92 | return outputs 93 | -------------------------------------------------------------------------------- /tha3/nn/common/resize_conv_encoder_decoder.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Optional, List 3 | 4 | import torch 5 | from torch import Tensor 6 | from torch.nn import Module, ModuleList, Sequential, Upsample 7 | 8 | from tha3.nn.common.conv_block_factory import ConvBlockFactory 9 | from tha3.nn.nonlinearity_factory import LeakyReLUFactory 10 | from tha3.nn.normalization import InstanceNorm2dFactory 11 | from tha3.nn.util import BlockArgs 12 | 13 | 14 | class ResizeConvEncoderDecoderArgs: 15 | def __init__(self, 16 | image_size: int, 17 | input_channels: int, 18 | start_channels: int, 19 | bottleneck_image_size, 20 | num_bottleneck_blocks, 21 | max_channels: int, 22 | block_args: Optional[BlockArgs] = None, 23 | upsample_mode: str = 'bilinear', 24 | use_separable_convolution=False): 25 | self.use_separable_convolution = use_separable_convolution 26 | self.upsample_mode = upsample_mode 27 | self.block_args = block_args 28 | self.max_channels = max_channels 29 | self.num_bottleneck_blocks = num_bottleneck_blocks 30 | self.bottleneck_image_size = bottleneck_image_size 31 | self.start_channels = start_channels 32 | self.image_size = image_size 33 | self.input_channels = input_channels 34 | 35 | 36 | class ResizeConvEncoderDecoder(Module): 37 | def __init__(self, args: ResizeConvEncoderDecoderArgs): 38 | super().__init__() 39 | self.args = args 40 | 41 | self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1 42 | 43 | conv_block_factory = ConvBlockFactory(args.block_args, args.use_separable_convolution) 44 | 45 | self.downsample_blocks = ModuleList() 46 | self.downsample_blocks.append(conv_block_factory.create_conv7_block(args.input_channels, args.start_channels)) 47 | current_image_size = args.image_size 48 | current_num_channels = args.start_channels 49 | while current_image_size > args.bottleneck_image_size: 50 | next_image_size = current_image_size // 2 51 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 52 | self.downsample_blocks.append(conv_block_factory.create_downsample_block( 53 | in_channels=current_num_channels, 54 | out_channels=next_num_channels, 55 | is_output_1x1=False)) 56 | current_image_size = next_image_size 57 | current_num_channels = next_num_channels 58 | assert len(self.downsample_blocks) == self.num_levels 59 | 60 | self.bottleneck_blocks = ModuleList() 61 | for i in range(args.num_bottleneck_blocks): 62 | self.bottleneck_blocks.append(conv_block_factory.create_resnet_block(current_num_channels, is_1x1=False)) 63 | 64 | self.output_image_sizes = [current_image_size] 65 | self.output_num_channels = [current_num_channels] 66 | self.upsample_blocks = ModuleList() 67 | if args.upsample_mode == 'nearest': 68 | align_corners = None 69 | else: 70 | align_corners = False 71 | while current_image_size < args.image_size: 72 | next_image_size = current_image_size * 2 73 | next_num_channels = self.get_num_output_channels_from_image_size(next_image_size) 74 | self.upsample_blocks.append( 75 | Sequential( 76 | Upsample(scale_factor=2, mode=args.upsample_mode, align_corners=align_corners), 77 | conv_block_factory.create_conv3_block( 78 | in_channels=current_num_channels, out_channels=next_num_channels))) 79 | current_image_size = next_image_size 80 | current_num_channels = next_num_channels 81 | self.output_image_sizes.append(current_image_size) 82 | self.output_num_channels.append(current_num_channels) 83 | 84 | def get_num_output_channels_from_level(self, level: int): 85 | return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level)) 86 | 87 | def get_num_output_channels_from_image_size(self, image_size: int): 88 | return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels) 89 | 90 | def forward(self, feature: Tensor) -> List[Tensor]: 91 | outputs = [] 92 | for block in self.downsample_blocks: 93 | feature = block(feature) 94 | for block in self.bottleneck_blocks: 95 | feature = block(feature) 96 | outputs.append(feature) 97 | for block in self.upsample_blocks: 98 | feature = block(feature) 99 | outputs.append(feature) 100 | return outputs 101 | 102 | 103 | if __name__ == "__main__": 104 | device = torch.device('cuda') 105 | args = ResizeConvEncoderDecoderArgs( 106 | image_size=512, 107 | input_channels=4 + 6, 108 | start_channels=32, 109 | bottleneck_image_size=32, 110 | num_bottleneck_blocks=6, 111 | max_channels=512, 112 | use_separable_convolution=True, 113 | block_args=BlockArgs( 114 | initialization_method='he', 115 | use_spectral_norm=False, 116 | normalization_layer_factory=InstanceNorm2dFactory(), 117 | nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1))) 118 | module = ResizeConvEncoderDecoder(args).to(device) 119 | print(module.output_image_sizes) 120 | print(module.output_num_channels) 121 | 122 | input = torch.zeros(8, 4 + 6, 512, 512, device=device) 123 | outputs = module(input) 124 | for output in outputs: 125 | print(output.shape) 126 | -------------------------------------------------------------------------------- /tha3/nn/editor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/editor/__init__.py -------------------------------------------------------------------------------- /tha3/nn/editor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/editor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/editor/__pycache__/editor_07.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/editor/__pycache__/editor_07.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_decomposer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_decomposer/__init__.py -------------------------------------------------------------------------------- /tha3/nn/eyebrow_decomposer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_decomposer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_decomposer/__pycache__/eyebrow_decomposer_00.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_decomposer/__pycache__/eyebrow_decomposer_00.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_decomposer/__pycache__/eyebrow_decomposer_03.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_decomposer/__pycache__/eyebrow_decomposer_03.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_decomposer/eyebrow_decomposer_00.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args, PoserEncoderDecoder00 8 | from tha3.nn.image_processing_util import apply_color_change 9 | from tha3.module.module_factory import ModuleFactory 10 | from tha3.nn.nonlinearity_factory import ReLUFactory 11 | from tha3.nn.normalization import InstanceNorm2dFactory 12 | from tha3.nn.util import BlockArgs 13 | 14 | 15 | class EyebrowDecomposer00Args(PoserEncoderDecoder00Args): 16 | def __init__(self, 17 | image_size: int = 128, 18 | image_channels: int = 4, 19 | start_channels: int = 64, 20 | bottleneck_image_size=16, 21 | num_bottleneck_blocks=6, 22 | max_channels: int = 512, 23 | block_args: Optional[BlockArgs] = None): 24 | super().__init__( 25 | image_size, 26 | image_channels, 27 | image_channels, 28 | 0, 29 | start_channels, 30 | bottleneck_image_size, 31 | num_bottleneck_blocks, 32 | max_channels, 33 | block_args) 34 | 35 | 36 | class EyebrowDecomposer00(Module): 37 | def __init__(self, args: EyebrowDecomposer00Args): 38 | super().__init__() 39 | self.args = args 40 | self.body = PoserEncoderDecoder00(args) 41 | self.background_layer_alpha = self.args.create_alpha_block() 42 | self.background_layer_color_change = self.args.create_color_change_block() 43 | self.eyebrow_layer_alpha = self.args.create_alpha_block() 44 | self.eyebrow_layer_color_change = self.args.create_color_change_block() 45 | 46 | def forward(self, image: Tensor, *args) -> List[Tensor]: 47 | feature = self.body(image)[0] 48 | 49 | background_layer_alpha = self.background_layer_alpha(feature) 50 | background_layer_color_change = self.background_layer_color_change(feature) 51 | background_layer_1 = apply_color_change(background_layer_alpha, background_layer_color_change, image) 52 | 53 | eyebrow_layer_alpha = self.eyebrow_layer_alpha(feature) 54 | eyebrow_layer_color_change = self.eyebrow_layer_color_change(feature) 55 | eyebrow_layer = apply_color_change(eyebrow_layer_alpha, image, eyebrow_layer_color_change) 56 | 57 | return [ 58 | eyebrow_layer, # 0 59 | eyebrow_layer_alpha, # 1 60 | eyebrow_layer_color_change, # 2 61 | background_layer_1, # 3 62 | background_layer_alpha, # 4 63 | background_layer_color_change, # 5 64 | ] 65 | 66 | EYEBROW_LAYER_INDEX = 0 67 | EYEBROW_LAYER_ALPHA_INDEX = 1 68 | EYEBROW_LAYER_COLOR_CHANGE_INDEX = 2 69 | BACKGROUND_LAYER_INDEX = 3 70 | BACKGROUND_LAYER_ALPHA_INDEX = 4 71 | BACKGROUND_LAYER_COLOR_CHANGE_INDEX = 5 72 | OUTPUT_LENGTH = 6 73 | 74 | 75 | class EyebrowDecomposer00Factory(ModuleFactory): 76 | def __init__(self, args: EyebrowDecomposer00Args): 77 | super().__init__() 78 | self.args = args 79 | 80 | def create(self) -> Module: 81 | return EyebrowDecomposer00(self.args) 82 | 83 | 84 | if __name__ == "__main__": 85 | cuda = torch.device('cuda') 86 | args = EyebrowDecomposer00Args( 87 | image_size=128, 88 | image_channels=4, 89 | start_channels=64, 90 | bottleneck_image_size=16, 91 | num_bottleneck_blocks=3, 92 | block_args=BlockArgs( 93 | initialization_method='xavier', 94 | use_spectral_norm=False, 95 | normalization_layer_factory=InstanceNorm2dFactory(), 96 | nonlinearity_factory=ReLUFactory(inplace=True))) 97 | face_morpher = EyebrowDecomposer00(args).to(cuda) 98 | 99 | image = torch.randn(8, 4, 128, 128, device=cuda) 100 | outputs = face_morpher.forward(image) 101 | for i in range(len(outputs)): 102 | print(i, outputs[i].shape) 103 | -------------------------------------------------------------------------------- /tha3/nn/eyebrow_decomposer/eyebrow_decomposer_03.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args 8 | from tha3.nn.common.poser_encoder_decoder_00_separable import PoserEncoderDecoder00Separable 9 | from tha3.nn.image_processing_util import apply_color_change 10 | from tha3.module.module_factory import ModuleFactory 11 | from tha3.nn.nonlinearity_factory import ReLUFactory 12 | from tha3.nn.normalization import InstanceNorm2dFactory 13 | from tha3.nn.util import BlockArgs 14 | 15 | 16 | class EyebrowDecomposer03Args(PoserEncoderDecoder00Args): 17 | def __init__(self, 18 | image_size: int = 128, 19 | image_channels: int = 4, 20 | start_channels: int = 64, 21 | bottleneck_image_size=16, 22 | num_bottleneck_blocks=6, 23 | max_channels: int = 512, 24 | block_args: Optional[BlockArgs] = None): 25 | super().__init__( 26 | image_size, 27 | image_channels, 28 | image_channels, 29 | 0, 30 | start_channels, 31 | bottleneck_image_size, 32 | num_bottleneck_blocks, 33 | max_channels, 34 | block_args) 35 | 36 | 37 | class EyebrowDecomposer03(Module): 38 | def __init__(self, args: EyebrowDecomposer03Args): 39 | super().__init__() 40 | self.args = args 41 | self.body = PoserEncoderDecoder00Separable(args) 42 | self.background_layer_alpha = self.args.create_alpha_block() 43 | self.background_layer_color_change = self.args.create_color_change_block() 44 | self.eyebrow_layer_alpha = self.args.create_alpha_block() 45 | self.eyebrow_layer_color_change = self.args.create_color_change_block() 46 | 47 | def forward(self, image: Tensor, *args) -> List[Tensor]: 48 | feature = self.body(image)[0] 49 | 50 | background_layer_alpha = self.background_layer_alpha(feature) 51 | background_layer_color_change = self.background_layer_color_change(feature) 52 | background_layer_1 = apply_color_change(background_layer_alpha, background_layer_color_change, image) 53 | 54 | eyebrow_layer_alpha = self.eyebrow_layer_alpha(feature) 55 | eyebrow_layer_color_change = self.eyebrow_layer_color_change(feature) 56 | eyebrow_layer = apply_color_change(eyebrow_layer_alpha, image, eyebrow_layer_color_change) 57 | 58 | return [ 59 | eyebrow_layer, # 0 60 | eyebrow_layer_alpha, # 1 61 | eyebrow_layer_color_change, # 2 62 | background_layer_1, # 3 63 | background_layer_alpha, # 4 64 | background_layer_color_change, # 5 65 | ] 66 | 67 | EYEBROW_LAYER_INDEX = 0 68 | EYEBROW_LAYER_ALPHA_INDEX = 1 69 | EYEBROW_LAYER_COLOR_CHANGE_INDEX = 2 70 | BACKGROUND_LAYER_INDEX = 3 71 | BACKGROUND_LAYER_ALPHA_INDEX = 4 72 | BACKGROUND_LAYER_COLOR_CHANGE_INDEX = 5 73 | OUTPUT_LENGTH = 6 74 | 75 | 76 | class EyebrowDecomposer03Factory(ModuleFactory): 77 | def __init__(self, args: EyebrowDecomposer03Args): 78 | super().__init__() 79 | self.args = args 80 | 81 | def create(self) -> Module: 82 | return EyebrowDecomposer03(self.args) 83 | 84 | 85 | if __name__ == "__main__": 86 | cuda = torch.device('cuda') 87 | args = EyebrowDecomposer03Args( 88 | image_size=128, 89 | image_channels=4, 90 | start_channels=64, 91 | bottleneck_image_size=16, 92 | num_bottleneck_blocks=6, 93 | block_args=BlockArgs( 94 | initialization_method='xavier', 95 | use_spectral_norm=False, 96 | normalization_layer_factory=InstanceNorm2dFactory(), 97 | nonlinearity_factory=ReLUFactory(inplace=True))) 98 | face_morpher = EyebrowDecomposer03(args).to(cuda) 99 | 100 | #image = torch.randn(8, 4, 128, 128, device=cuda) 101 | #outputs = face_morpher.forward(image) 102 | #for i in range(len(outputs)): 103 | # print(i, outputs[i].shape) 104 | 105 | state_dict = face_morpher.state_dict() 106 | index = 0 107 | for key in state_dict: 108 | print(f"[{index}]", key, state_dict[key].shape) 109 | index += 1 110 | -------------------------------------------------------------------------------- /tha3/nn/eyebrow_morphing_combiner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_morphing_combiner/__init__.py -------------------------------------------------------------------------------- /tha3/nn/eyebrow_morphing_combiner/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_morphing_combiner/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_morphing_combiner/__pycache__/eyebrow_morphing_combiner_00.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_morphing_combiner/__pycache__/eyebrow_morphing_combiner_00.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_morphing_combiner/__pycache__/eyebrow_morphing_combiner_03.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/eyebrow_morphing_combiner/__pycache__/eyebrow_morphing_combiner_03.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/eyebrow_morphing_combiner/eyebrow_morphing_combiner_00.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args, PoserEncoderDecoder00 8 | from tha3.nn.image_processing_util import apply_color_change, apply_grid_change, apply_rgb_change 9 | from tha3.module.module_factory import ModuleFactory 10 | from tha3.nn.nonlinearity_factory import ReLUFactory 11 | from tha3.nn.normalization import InstanceNorm2dFactory 12 | from tha3.nn.util import BlockArgs 13 | 14 | 15 | class EyebrowMorphingCombiner00Args(PoserEncoderDecoder00Args): 16 | def __init__(self, 17 | image_size: int = 128, 18 | image_channels: int = 4, 19 | num_pose_params: int = 12, 20 | start_channels: int = 64, 21 | bottleneck_image_size=16, 22 | num_bottleneck_blocks=6, 23 | max_channels: int = 512, 24 | block_args: Optional[BlockArgs] = None): 25 | super().__init__( 26 | image_size, 27 | 2 * image_channels, 28 | image_channels, 29 | num_pose_params, 30 | start_channels, 31 | bottleneck_image_size, 32 | num_bottleneck_blocks, 33 | max_channels, 34 | block_args) 35 | 36 | 37 | class EyebrowMorphingCombiner00(Module): 38 | def __init__(self, args: EyebrowMorphingCombiner00Args): 39 | super().__init__() 40 | self.args = args 41 | self.body = PoserEncoderDecoder00(args) 42 | self.morphed_eyebrow_layer_grid_change = self.args.create_grid_change_block() 43 | self.morphed_eyebrow_layer_alpha = self.args.create_alpha_block() 44 | self.morphed_eyebrow_layer_color_change = self.args.create_color_change_block() 45 | self.combine_alpha = self.args.create_alpha_block() 46 | 47 | def forward(self, background_layer: Tensor, eyebrow_layer: Tensor, pose: Tensor, *args) -> List[Tensor]: 48 | combined_image = torch.cat([background_layer, eyebrow_layer], dim=1) 49 | feature = self.body(combined_image, pose)[0] 50 | 51 | morphed_eyebrow_layer_grid_change = self.morphed_eyebrow_layer_grid_change(feature) 52 | morphed_eyebrow_layer_alpha = self.morphed_eyebrow_layer_alpha(feature) 53 | morphed_eyebrow_layer_color_change = self.morphed_eyebrow_layer_color_change(feature) 54 | warped_eyebrow_layer = apply_grid_change(morphed_eyebrow_layer_grid_change, eyebrow_layer) 55 | morphed_eyebrow_layer = apply_color_change( 56 | morphed_eyebrow_layer_alpha, morphed_eyebrow_layer_color_change, warped_eyebrow_layer) 57 | 58 | combine_alpha = self.combine_alpha(feature) 59 | eyebrow_image = apply_rgb_change(combine_alpha, morphed_eyebrow_layer, background_layer) 60 | eyebrow_image_no_combine_alpha = apply_rgb_change( 61 | (morphed_eyebrow_layer[:, 3:4, :, :] + 1.0) / 2.0, morphed_eyebrow_layer, background_layer) 62 | 63 | return [ 64 | eyebrow_image, # 0 65 | combine_alpha, # 1 66 | eyebrow_image_no_combine_alpha, # 2 67 | morphed_eyebrow_layer, # 3 68 | morphed_eyebrow_layer_alpha, # 4 69 | morphed_eyebrow_layer_color_change, # 5 70 | warped_eyebrow_layer, # 6 71 | morphed_eyebrow_layer_grid_change, # 7 72 | ] 73 | 74 | EYEBROW_IMAGE_INDEX = 0 75 | COMBINE_ALPHA_INDEX = 1 76 | EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX = 2 77 | MORPHED_EYEBROW_LAYER_INDEX = 3 78 | MORPHED_EYEBROW_LAYER_ALPHA_INDEX = 4 79 | MORPHED_EYEBROW_LAYER_COLOR_CHANGE_INDEX = 5 80 | WARPED_EYEBROW_LAYER_INDEX = 6 81 | MORPHED_EYEBROW_LAYER_GRID_CHANGE_INDEX = 7 82 | OUTPUT_LENGTH = 8 83 | 84 | 85 | class EyebrowMorphingCombiner00Factory(ModuleFactory): 86 | def __init__(self, args: EyebrowMorphingCombiner00Args): 87 | super().__init__() 88 | self.args = args 89 | 90 | def create(self) -> Module: 91 | return EyebrowMorphingCombiner00(self.args) 92 | 93 | 94 | if __name__ == "__main__": 95 | cuda = torch.device('cuda') 96 | args = EyebrowMorphingCombiner00Args( 97 | image_size=128, 98 | image_channels=4, 99 | num_pose_params=12, 100 | start_channels=64, 101 | bottleneck_image_size=16, 102 | num_bottleneck_blocks=3, 103 | block_args=BlockArgs( 104 | initialization_method='xavier', 105 | use_spectral_norm=False, 106 | normalization_layer_factory=InstanceNorm2dFactory(), 107 | nonlinearity_factory=ReLUFactory(inplace=True))) 108 | face_morpher = EyebrowMorphingCombiner00(args).to(cuda) 109 | 110 | background_layer = torch.randn(8, 4, 128, 128, device=cuda) 111 | eyebrow_layer = torch.randn(8, 4, 128, 128, device=cuda) 112 | pose = torch.randn(8, 12, device=cuda) 113 | outputs = face_morpher.forward(background_layer, eyebrow_layer, pose) 114 | for i in range(len(outputs)): 115 | print(i, outputs[i].shape) 116 | -------------------------------------------------------------------------------- /tha3/nn/eyebrow_morphing_combiner/eyebrow_morphing_combiner_03.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args 8 | from tha3.nn.common.poser_encoder_decoder_00_separable import PoserEncoderDecoder00Separable 9 | from tha3.nn.image_processing_util import apply_color_change, apply_rgb_change, GridChangeApplier 10 | from tha3.module.module_factory import ModuleFactory 11 | from tha3.nn.nonlinearity_factory import ReLUFactory 12 | from tha3.nn.normalization import InstanceNorm2dFactory 13 | from tha3.nn.util import BlockArgs 14 | 15 | 16 | class EyebrowMorphingCombiner03Args(PoserEncoderDecoder00Args): 17 | def __init__(self, 18 | image_size: int = 128, 19 | image_channels: int = 4, 20 | num_pose_params: int = 12, 21 | start_channels: int = 64, 22 | bottleneck_image_size=16, 23 | num_bottleneck_blocks=6, 24 | max_channels: int = 512, 25 | block_args: Optional[BlockArgs] = None): 26 | super().__init__( 27 | image_size, 28 | 2 * image_channels, 29 | image_channels, 30 | num_pose_params, 31 | start_channels, 32 | bottleneck_image_size, 33 | num_bottleneck_blocks, 34 | max_channels, 35 | block_args) 36 | 37 | 38 | class EyebrowMorphingCombiner03(Module): 39 | def __init__(self, args: EyebrowMorphingCombiner03Args): 40 | super().__init__() 41 | self.args = args 42 | self.body = PoserEncoderDecoder00Separable(args) 43 | self.morphed_eyebrow_layer_grid_change = self.args.create_grid_change_block() 44 | self.morphed_eyebrow_layer_alpha = self.args.create_alpha_block() 45 | self.morphed_eyebrow_layer_color_change = self.args.create_color_change_block() 46 | self.combine_alpha = self.args.create_alpha_block() 47 | self.grid_change_applier = GridChangeApplier() 48 | 49 | def forward(self, background_layer: Tensor, eyebrow_layer: Tensor, pose: Tensor, *args) -> List[Tensor]: 50 | combined_image = torch.cat([background_layer, eyebrow_layer], dim=1) 51 | feature = self.body(combined_image, pose)[0] 52 | 53 | morphed_eyebrow_layer_grid_change = self.morphed_eyebrow_layer_grid_change(feature) 54 | morphed_eyebrow_layer_alpha = self.morphed_eyebrow_layer_alpha(feature) 55 | morphed_eyebrow_layer_color_change = self.morphed_eyebrow_layer_color_change(feature) 56 | warped_eyebrow_layer = self.grid_change_applier.apply(morphed_eyebrow_layer_grid_change, eyebrow_layer) 57 | morphed_eyebrow_layer = apply_color_change( 58 | morphed_eyebrow_layer_alpha, morphed_eyebrow_layer_color_change, warped_eyebrow_layer) 59 | 60 | combine_alpha = self.combine_alpha(feature) 61 | eyebrow_image = apply_rgb_change(combine_alpha, morphed_eyebrow_layer, background_layer) 62 | eyebrow_image_no_combine_alpha = apply_rgb_change( 63 | (morphed_eyebrow_layer[:, 3:4, :, :] + 1.0) / 2.0, morphed_eyebrow_layer, background_layer) 64 | 65 | return [ 66 | eyebrow_image, # 0 67 | combine_alpha, # 1 68 | eyebrow_image_no_combine_alpha, # 2 69 | morphed_eyebrow_layer, # 3 70 | morphed_eyebrow_layer_alpha, # 4 71 | morphed_eyebrow_layer_color_change, # 5 72 | warped_eyebrow_layer, # 6 73 | morphed_eyebrow_layer_grid_change, # 7 74 | ] 75 | 76 | EYEBROW_IMAGE_INDEX = 0 77 | COMBINE_ALPHA_INDEX = 1 78 | EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX = 2 79 | MORPHED_EYEBROW_LAYER_INDEX = 3 80 | MORPHED_EYEBROW_LAYER_ALPHA_INDEX = 4 81 | MORPHED_EYEBROW_LAYER_COLOR_CHANGE_INDEX = 5 82 | WARPED_EYEBROW_LAYER_INDEX = 6 83 | MORPHED_EYEBROW_LAYER_GRID_CHANGE_INDEX = 7 84 | OUTPUT_LENGTH = 8 85 | 86 | 87 | class EyebrowMorphingCombiner03Factory(ModuleFactory): 88 | def __init__(self, args: EyebrowMorphingCombiner03Args): 89 | super().__init__() 90 | self.args = args 91 | 92 | def create(self) -> Module: 93 | return EyebrowMorphingCombiner03(self.args) 94 | 95 | 96 | if __name__ == "__main__": 97 | cuda = torch.device('cuda') 98 | args = EyebrowMorphingCombiner03Args( 99 | image_size=128, 100 | image_channels=4, 101 | num_pose_params=12, 102 | start_channels=64, 103 | bottleneck_image_size=16, 104 | num_bottleneck_blocks=3, 105 | block_args=BlockArgs( 106 | initialization_method='xavier', 107 | use_spectral_norm=False, 108 | normalization_layer_factory=InstanceNorm2dFactory(), 109 | nonlinearity_factory=ReLUFactory(inplace=True))) 110 | face_morpher = EyebrowMorphingCombiner03(args).to(cuda) 111 | 112 | background_layer = torch.randn(8, 4, 128, 128, device=cuda) 113 | eyebrow_layer = torch.randn(8, 4, 128, 128, device=cuda) 114 | pose = torch.randn(8, 12, device=cuda) 115 | outputs = face_morpher.forward(background_layer, eyebrow_layer, pose) 116 | for i in range(len(outputs)): 117 | print(i, outputs[i].shape) 118 | -------------------------------------------------------------------------------- /tha3/nn/face_morpher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/face_morpher/__init__.py -------------------------------------------------------------------------------- /tha3/nn/face_morpher/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/face_morpher/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/face_morpher/__pycache__/face_morpher_08.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/face_morpher/__pycache__/face_morpher_08.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/face_morpher/__pycache__/face_morpher_09.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/face_morpher/__pycache__/face_morpher_09.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/image_processing_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn.functional import affine_grid, grid_sample 4 | 5 | 6 | def apply_rgb_change(alpha: Tensor, color_change: Tensor, image: Tensor): 7 | image_rgb = image[:, 0:3, :, :] 8 | color_change_rgb = color_change[:, 0:3, :, :] 9 | output_rgb = color_change_rgb * alpha + image_rgb * (1 - alpha) 10 | return torch.cat([output_rgb, image[:, 3:4, :, :]], dim=1) 11 | 12 | 13 | def apply_grid_change(grid_change, image: Tensor) -> Tensor: 14 | n, c, h, w = image.shape 15 | device = grid_change.device 16 | grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2) 17 | identity = torch.tensor( 18 | [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], 19 | dtype=grid_change.dtype, 20 | device=device).unsqueeze(0).repeat(n, 1, 1) 21 | base_grid = affine_grid(identity, [n, c, h, w], align_corners=False) 22 | grid = base_grid + grid_change 23 | resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=False) 24 | return resampled_image 25 | 26 | 27 | class GridChangeApplier: 28 | def __init__(self): 29 | self.last_n = None 30 | self.last_device = None 31 | self.last_identity = None 32 | 33 | def apply(self, grid_change: Tensor, image: Tensor, align_corners: bool = False) -> Tensor: 34 | n, c, h, w = image.shape 35 | device = grid_change.device 36 | grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2) 37 | 38 | if n == self.last_n and device == self.last_device: 39 | identity = self.last_identity 40 | else: 41 | identity = torch.tensor( 42 | [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], 43 | dtype=grid_change.dtype, 44 | device=device, 45 | requires_grad=False) \ 46 | .unsqueeze(0).repeat(n, 1, 1) 47 | self.last_identity = identity 48 | self.last_n = n 49 | self.last_device = device 50 | base_grid = affine_grid(identity, [n, c, h, w], align_corners=align_corners) 51 | 52 | grid = base_grid + grid_change 53 | resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=align_corners) 54 | return resampled_image 55 | 56 | 57 | def apply_color_change(alpha, color_change, image: Tensor) -> Tensor: 58 | return color_change * alpha + image * (1 - alpha) 59 | -------------------------------------------------------------------------------- /tha3/nn/init_function.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | from torch import zero_ 5 | from torch.nn import Module 6 | from torch.nn.init import kaiming_normal_, xavier_normal_, normal_ 7 | 8 | 9 | def create_init_function(method: str = 'none') -> Callable[[Module], Module]: 10 | def init(module: Module): 11 | if method == 'none': 12 | return module 13 | elif method == 'he': 14 | kaiming_normal_(module.weight) 15 | return module 16 | elif method == 'xavier': 17 | xavier_normal_(module.weight) 18 | return module 19 | elif method == 'dcgan': 20 | normal_(module.weight, 0.0, 0.02) 21 | return module 22 | elif method == 'dcgan_001': 23 | normal_(module.weight, 0.0, 0.01) 24 | return module 25 | elif method == "zero": 26 | with torch.no_grad(): 27 | zero_(module.weight) 28 | return module 29 | else: 30 | raise ("Invalid initialization method %s" % method) 31 | 32 | return init 33 | 34 | 35 | class HeInitialization: 36 | def __init__(self, a: int = 0, mode: str = 'fan_in', nonlinearity: str = 'leaky_relu'): 37 | self.nonlinearity = nonlinearity 38 | self.mode = mode 39 | self.a = a 40 | 41 | def __call__(self, module: Module) -> Module: 42 | with torch.no_grad(): 43 | kaiming_normal_(module.weight, a=self.a, mode=self.mode, nonlinearity=self.nonlinearity) 44 | return module 45 | 46 | 47 | class NormalInitialization: 48 | def __init__(self, mean: float = 0.0, std: float = 1.0): 49 | self.std = std 50 | self.mean = mean 51 | 52 | def __call__(self, module: Module) -> Module: 53 | with torch.no_grad(): 54 | normal_(module.weight, self.mean, self.std) 55 | return module 56 | 57 | 58 | class XavierInitialization: 59 | def __init__(self, gain: float = 1.0): 60 | self.gain = gain 61 | 62 | def __call__(self, module: Module) -> Module: 63 | with torch.no_grad(): 64 | xavier_normal_(module.weight, self.gain) 65 | return module 66 | 67 | 68 | class ZeroInitialization: 69 | def __call__(self, module: Module) -> Module: 70 | with torch.no_grad: 71 | zero_(module.weight) 72 | return module 73 | 74 | class NoInitialization: 75 | def __call__(self, module: Module) -> Module: 76 | return module -------------------------------------------------------------------------------- /tha3/nn/nonlinearity_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from torch.nn import Module, ReLU, LeakyReLU, ELU, ReLU6, Hardswish, SiLU, Tanh, Sigmoid 4 | 5 | from tha3.module.module_factory import ModuleFactory 6 | 7 | 8 | class ReLUFactory(ModuleFactory): 9 | def __init__(self, inplace: bool = False): 10 | self.inplace = inplace 11 | 12 | def create(self) -> Module: 13 | return ReLU(self.inplace) 14 | 15 | 16 | class LeakyReLUFactory(ModuleFactory): 17 | def __init__(self, inplace: bool = False, negative_slope: float = 1e-2): 18 | self.negative_slope = negative_slope 19 | self.inplace = inplace 20 | 21 | def create(self) -> Module: 22 | return LeakyReLU(inplace=self.inplace, negative_slope=self.negative_slope) 23 | 24 | 25 | class ELUFactory(ModuleFactory): 26 | def __init__(self, inplace: bool = False, alpha: float = 1.0): 27 | self.alpha = alpha 28 | self.inplace = inplace 29 | 30 | def create(self) -> Module: 31 | return ELU(inplace=self.inplace, alpha=self.alpha) 32 | 33 | 34 | class ReLU6Factory(ModuleFactory): 35 | def __init__(self, inplace: bool = False): 36 | self.inplace = inplace 37 | 38 | def create(self) -> Module: 39 | return ReLU6(inplace=self.inplace) 40 | 41 | 42 | class SiLUFactory(ModuleFactory): 43 | def __init__(self, inplace: bool = False): 44 | self.inplace = inplace 45 | 46 | def create(self) -> Module: 47 | return SiLU(inplace=self.inplace) 48 | 49 | 50 | class HardswishFactory(ModuleFactory): 51 | def __init__(self, inplace: bool = False): 52 | self.inplace = inplace 53 | 54 | def create(self) -> Module: 55 | return Hardswish(inplace=self.inplace) 56 | 57 | 58 | class TanhFactory(ModuleFactory): 59 | def create(self) -> Module: 60 | return Tanh() 61 | 62 | 63 | class SigmoidFactory(ModuleFactory): 64 | def create(self) -> Module: 65 | return Sigmoid() 66 | 67 | 68 | def resolve_nonlinearity_factory(nonlinearity_fatory: Optional[ModuleFactory]) -> ModuleFactory: 69 | if nonlinearity_fatory is None: 70 | return ReLUFactory(inplace=False) 71 | else: 72 | return nonlinearity_fatory 73 | -------------------------------------------------------------------------------- /tha3/nn/normalization.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Optional 3 | 4 | import torch 5 | from torch import layer_norm 6 | from torch.nn import Module, BatchNorm2d, InstanceNorm2d, Parameter 7 | from torch.nn.init import normal_, constant_ 8 | 9 | from tha3.nn.pass_through import PassThrough 10 | 11 | 12 | class PixelNormalization(Module): 13 | def __init__(self, epsilon=1e-8): 14 | super().__init__() 15 | self.epsilon = epsilon 16 | 17 | def forward(self, x): 18 | return x / torch.sqrt((x ** 2).mean(dim=1, keepdim=True) + self.epsilon) 19 | 20 | 21 | class NormalizationLayerFactory(ABC): 22 | def __init__(self): 23 | super().__init__() 24 | 25 | @abstractmethod 26 | def create(self, num_features: int, affine: bool = True) -> Module: 27 | pass 28 | 29 | @staticmethod 30 | def resolve_2d(factory: Optional['NormalizationLayerFactory']) -> 'NormalizationLayerFactory': 31 | if factory is None: 32 | return InstanceNorm2dFactory() 33 | else: 34 | return factory 35 | 36 | 37 | class Bias2d(Module): 38 | def __init__(self, num_features: int): 39 | super().__init__() 40 | self.num_features = num_features 41 | self.bias = Parameter(torch.zeros(1, num_features, 1, 1)) 42 | 43 | def forward(self, x): 44 | return x + self.bias 45 | 46 | 47 | class NoNorm2dFactory(NormalizationLayerFactory): 48 | def __init__(self): 49 | super().__init__() 50 | 51 | def create(self, num_features: int, affine: bool = True) -> Module: 52 | if affine: 53 | return Bias2d(num_features) 54 | else: 55 | return PassThrough() 56 | 57 | 58 | class BatchNorm2dFactory(NormalizationLayerFactory): 59 | def __init__(self, 60 | weight_mean: Optional[float] = None, 61 | weight_std: Optional[float] = None, 62 | bias: Optional[float] = None): 63 | super().__init__() 64 | self.bias = bias 65 | self.weight_std = weight_std 66 | self.weight_mean = weight_mean 67 | 68 | def get_weight_mean(self): 69 | if self.weight_mean is None: 70 | return 1.0 71 | else: 72 | return self.weight_mean 73 | 74 | def get_weight_std(self): 75 | if self.weight_std is None: 76 | return 0.02 77 | else: 78 | return self.weight_std 79 | 80 | def create(self, num_features: int, affine: bool = True) -> Module: 81 | module = BatchNorm2d(num_features=num_features, affine=affine) 82 | if affine: 83 | if self.weight_mean is not None or self.weight_std is not None: 84 | normal_(module.weight, self.get_weight_mean(), self.get_weight_std()) 85 | if self.bias is not None: 86 | constant_(module.bias, self.bias) 87 | return module 88 | 89 | 90 | class InstanceNorm2dFactory(NormalizationLayerFactory): 91 | def __init__(self): 92 | super().__init__() 93 | 94 | def create(self, num_features: int, affine: bool = True) -> Module: 95 | return InstanceNorm2d(num_features=num_features, affine=affine) 96 | 97 | 98 | class PixelNormFactory(NormalizationLayerFactory): 99 | def __init__(self): 100 | super().__init__() 101 | 102 | def create(self, num_features: int, affine: bool = True) -> Module: 103 | return PixelNormalization() 104 | 105 | 106 | class LayerNorm2d(Module): 107 | def __init__(self, channels: int, affine: bool = True): 108 | super(LayerNorm2d, self).__init__() 109 | self.channels = channels 110 | self.affine = affine 111 | 112 | if self.affine: 113 | self.weight = Parameter(torch.ones(1, channels, 1, 1)) 114 | self.bias = Parameter(torch.zeros(1, channels, 1, 1)) 115 | 116 | def forward(self, x): 117 | shape = x.size()[1:] 118 | y = layer_norm(x, shape) * self.weight + self.bias 119 | return y 120 | 121 | class LayerNorm2dFactory(NormalizationLayerFactory): 122 | def __init__(self): 123 | super().__init__() 124 | 125 | def create(self, num_features: int, affine: bool = True) -> Module: 126 | return LayerNorm2d(channels=num_features, affine=affine) 127 | -------------------------------------------------------------------------------- /tha3/nn/pass_through.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module 2 | 3 | 4 | class PassThrough(Module): 5 | def __init__(self): 6 | super().__init__() 7 | 8 | def forward(self, x): 9 | return x -------------------------------------------------------------------------------- /tha3/nn/resnet_block.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch.nn import Module, Sequential, Parameter 5 | 6 | from tha3.module.module_factory import ModuleFactory 7 | from tha3.nn.conv import create_conv1, create_conv3 8 | from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory 9 | from tha3.nn.normalization import NormalizationLayerFactory 10 | from tha3.nn.util import BlockArgs 11 | 12 | 13 | class ResnetBlock(Module): 14 | @staticmethod 15 | def create(num_channels: int, 16 | is1x1: bool = False, 17 | use_scale_parameters: bool = False, 18 | block_args: Optional[BlockArgs] = None): 19 | if block_args is None: 20 | block_args = BlockArgs() 21 | return ResnetBlock(num_channels, 22 | is1x1, 23 | block_args.initialization_method, 24 | block_args.nonlinearity_factory, 25 | block_args.normalization_layer_factory, 26 | block_args.use_spectral_norm, 27 | use_scale_parameters) 28 | 29 | def __init__(self, 30 | num_channels: int, 31 | is1x1: bool = False, 32 | initialization_method: str = 'he', 33 | nonlinearity_factory: ModuleFactory = None, 34 | normalization_layer_factory: Optional[NormalizationLayerFactory] = None, 35 | use_spectral_norm: bool = False, 36 | use_scale_parameter: bool = False): 37 | super().__init__() 38 | self.use_scale_parameter = use_scale_parameter 39 | if self.use_scale_parameter: 40 | self.scale = Parameter(torch.zeros(1)) 41 | nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory) 42 | if is1x1: 43 | self.resnet_path = Sequential( 44 | create_conv1(num_channels, num_channels, initialization_method, 45 | bias=True, 46 | use_spectral_norm=use_spectral_norm), 47 | nonlinearity_factory.create(), 48 | create_conv1(num_channels, num_channels, initialization_method, 49 | bias=True, 50 | use_spectral_norm=use_spectral_norm)) 51 | else: 52 | self.resnet_path = Sequential( 53 | create_conv3(num_channels, num_channels, 54 | bias=False, initialization_method=initialization_method, 55 | use_spectral_norm=use_spectral_norm), 56 | NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True), 57 | nonlinearity_factory.create(), 58 | create_conv3(num_channels, num_channels, 59 | bias=False, initialization_method=initialization_method, 60 | use_spectral_norm=use_spectral_norm), 61 | NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True)) 62 | 63 | def forward(self, x): 64 | if self.use_scale_parameter: 65 | return x + self.scale * self.resnet_path(x) 66 | else: 67 | return x + self.resnet_path(x) 68 | -------------------------------------------------------------------------------- /tha3/nn/resnet_block_seperable.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch.nn import Module, Sequential, Parameter 5 | 6 | from tha3.module.module_factory import ModuleFactory 7 | from tha3.nn.conv import create_conv1 8 | from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory 9 | from tha3.nn.normalization import NormalizationLayerFactory 10 | from tha3.nn.separable_conv import create_separable_conv3 11 | from tha3.nn.util import BlockArgs 12 | 13 | 14 | class ResnetBlockSeparable(Module): 15 | @staticmethod 16 | def create(num_channels: int, 17 | is1x1: bool = False, 18 | use_scale_parameters: bool = False, 19 | block_args: Optional[BlockArgs] = None): 20 | if block_args is None: 21 | block_args = BlockArgs() 22 | return ResnetBlockSeparable( 23 | num_channels, 24 | is1x1, 25 | block_args.initialization_method, 26 | block_args.nonlinearity_factory, 27 | block_args.normalization_layer_factory, 28 | block_args.use_spectral_norm, 29 | use_scale_parameters) 30 | 31 | def __init__(self, 32 | num_channels: int, 33 | is1x1: bool = False, 34 | initialization_method: str = 'he', 35 | nonlinearity_factory: ModuleFactory = None, 36 | normalization_layer_factory: Optional[NormalizationLayerFactory] = None, 37 | use_spectral_norm: bool = False, 38 | use_scale_parameter: bool = False): 39 | super().__init__() 40 | self.use_scale_parameter = use_scale_parameter 41 | if self.use_scale_parameter: 42 | self.scale = Parameter(torch.zeros(1)) 43 | nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory) 44 | if is1x1: 45 | self.resnet_path = Sequential( 46 | create_conv1(num_channels, num_channels, initialization_method, 47 | bias=True, 48 | use_spectral_norm=use_spectral_norm), 49 | nonlinearity_factory.create(), 50 | create_conv1(num_channels, num_channels, initialization_method, 51 | bias=True, 52 | use_spectral_norm=use_spectral_norm)) 53 | else: 54 | self.resnet_path = Sequential( 55 | create_separable_conv3( 56 | num_channels, num_channels, 57 | bias=False, initialization_method=initialization_method, 58 | use_spectral_norm=use_spectral_norm), 59 | NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True), 60 | nonlinearity_factory.create(), 61 | create_separable_conv3( 62 | num_channels, num_channels, 63 | bias=False, initialization_method=initialization_method, 64 | use_spectral_norm=use_spectral_norm), 65 | NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True)) 66 | 67 | def forward(self, x): 68 | if self.use_scale_parameter: 69 | return x + self.scale * self.resnet_path(x) 70 | else: 71 | return x + self.resnet_path(x) 72 | -------------------------------------------------------------------------------- /tha3/nn/separable_conv.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from torch.nn import Sequential, Conv2d, ConvTranspose2d, Module 4 | 5 | from tha3.nn.normalization import NormalizationLayerFactory 6 | from tha3.nn.util import BlockArgs, wrap_conv_or_linear_module 7 | 8 | 9 | def create_separable_conv3(in_channels: int, out_channels: int, 10 | bias: bool = False, 11 | initialization_method='he', 12 | use_spectral_norm: bool = False) -> Module: 13 | return Sequential( 14 | wrap_conv_or_linear_module( 15 | Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False, groups=in_channels), 16 | initialization_method, 17 | use_spectral_norm), 18 | wrap_conv_or_linear_module( 19 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias), 20 | initialization_method, 21 | use_spectral_norm)) 22 | 23 | 24 | def create_separable_conv7(in_channels: int, out_channels: int, 25 | bias: bool = False, 26 | initialization_method='he', 27 | use_spectral_norm: bool = False) -> Module: 28 | return Sequential( 29 | wrap_conv_or_linear_module( 30 | Conv2d(in_channels, in_channels, kernel_size=7, stride=1, padding=3, bias=False, groups=in_channels), 31 | initialization_method, 32 | use_spectral_norm), 33 | wrap_conv_or_linear_module( 34 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias), 35 | initialization_method, 36 | use_spectral_norm)) 37 | 38 | 39 | def create_separable_conv3_block( 40 | in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None): 41 | if block_args is None: 42 | block_args = BlockArgs() 43 | return Sequential( 44 | wrap_conv_or_linear_module( 45 | Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False, groups=in_channels), 46 | block_args.initialization_method, 47 | block_args.use_spectral_norm), 48 | wrap_conv_or_linear_module( 49 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), 50 | block_args.initialization_method, 51 | block_args.use_spectral_norm), 52 | NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory).create(out_channels, affine=True), 53 | block_args.nonlinearity_factory.create()) 54 | 55 | 56 | def create_separable_conv7_block( 57 | in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None): 58 | if block_args is None: 59 | block_args = BlockArgs() 60 | return Sequential( 61 | wrap_conv_or_linear_module( 62 | Conv2d(in_channels, in_channels, kernel_size=7, stride=1, padding=3, bias=False, groups=in_channels), 63 | block_args.initialization_method, 64 | block_args.use_spectral_norm), 65 | wrap_conv_or_linear_module( 66 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), 67 | block_args.initialization_method, 68 | block_args.use_spectral_norm), 69 | NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory).create(out_channels, affine=True), 70 | block_args.nonlinearity_factory.create()) 71 | 72 | 73 | def create_separable_downsample_block( 74 | in_channels: int, out_channels: int, is_output_1x1: bool, block_args: Optional[BlockArgs] = None): 75 | if block_args is None: 76 | block_args = BlockArgs() 77 | if is_output_1x1: 78 | return Sequential( 79 | wrap_conv_or_linear_module( 80 | Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1, bias=False, groups=in_channels), 81 | block_args.initialization_method, 82 | block_args.use_spectral_norm), 83 | wrap_conv_or_linear_module( 84 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), 85 | block_args.initialization_method, 86 | block_args.use_spectral_norm), 87 | block_args.nonlinearity_factory.create()) 88 | else: 89 | return Sequential( 90 | wrap_conv_or_linear_module( 91 | Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1, bias=False, groups=in_channels), 92 | block_args.initialization_method, 93 | block_args.use_spectral_norm), 94 | wrap_conv_or_linear_module( 95 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), 96 | block_args.initialization_method, 97 | block_args.use_spectral_norm), 98 | NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory) 99 | .create(out_channels, affine=True), 100 | block_args.nonlinearity_factory.create()) 101 | 102 | 103 | def create_separable_upsample_block( 104 | in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None): 105 | if block_args is None: 106 | block_args = BlockArgs() 107 | return Sequential( 108 | wrap_conv_or_linear_module( 109 | ConvTranspose2d( 110 | in_channels, in_channels, kernel_size=4, stride=2, padding=1, bias=False, groups=in_channels), 111 | block_args.initialization_method, 112 | block_args.use_spectral_norm), 113 | wrap_conv_or_linear_module( 114 | Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), 115 | block_args.initialization_method, 116 | block_args.use_spectral_norm), 117 | NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory) 118 | .create(out_channels, affine=True), 119 | block_args.nonlinearity_factory.create()) 120 | -------------------------------------------------------------------------------- /tha3/nn/spectral_norm.py: -------------------------------------------------------------------------------- 1 | from torch.nn import Module 2 | from torch.nn.utils import spectral_norm 3 | 4 | 5 | def apply_spectral_norm(module: Module, use_spectrial_norm: bool = False) -> Module: 6 | if use_spectrial_norm: 7 | return spectral_norm(module) 8 | else: 9 | return module 10 | -------------------------------------------------------------------------------- /tha3/nn/two_algo_body_rotator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/two_algo_body_rotator/__init__.py -------------------------------------------------------------------------------- /tha3/nn/two_algo_body_rotator/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/two_algo_body_rotator/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/two_algo_body_rotator/__pycache__/two_algo_face_body_rotator_05.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/nn/two_algo_body_rotator/__pycache__/two_algo_face_body_rotator_05.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/nn/two_algo_body_rotator/two_algo_face_body_rotator_05.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch.nn import Module, Sequential, Tanh 6 | 7 | from tha3.nn.image_processing_util import GridChangeApplier 8 | from tha3.nn.common.resize_conv_encoder_decoder import ResizeConvEncoderDecoder, ResizeConvEncoderDecoderArgs 9 | from tha3.module.module_factory import ModuleFactory 10 | from tha3.nn.conv import create_conv3_from_block_args, create_conv3 11 | from tha3.nn.nonlinearity_factory import ReLUFactory, LeakyReLUFactory 12 | from tha3.nn.normalization import InstanceNorm2dFactory 13 | from tha3.nn.util import BlockArgs 14 | 15 | 16 | class TwoAlgoFaceBodyRotator05Args: 17 | def __init__(self, 18 | image_size: int = 512, 19 | image_channels: int = 4, 20 | num_pose_params: int = 6, 21 | start_channels: int = 32, 22 | bottleneck_image_size=32, 23 | num_bottleneck_blocks=6, 24 | max_channels: int = 512, 25 | upsample_mode: str = 'bilinear', 26 | block_args: Optional[BlockArgs] = None, 27 | use_separable_convolution=False): 28 | if block_args is None: 29 | block_args = BlockArgs( 30 | normalization_layer_factory=InstanceNorm2dFactory(), 31 | nonlinearity_factory=ReLUFactory(inplace=False)) 32 | 33 | self.use_separable_convolution = use_separable_convolution 34 | self.upsample_mode = upsample_mode 35 | self.max_channels = max_channels 36 | self.num_bottleneck_blocks = num_bottleneck_blocks 37 | self.bottleneck_image_size = bottleneck_image_size 38 | self.start_channels = start_channels 39 | self.num_pose_params = num_pose_params 40 | self.image_channels = image_channels 41 | self.image_size = image_size 42 | self.block_args = block_args 43 | 44 | 45 | class TwoAlgoFaceBodyRotator05(Module): 46 | def __init__(self, args: TwoAlgoFaceBodyRotator05Args): 47 | super().__init__() 48 | self.args = args 49 | 50 | self.encoder_decoder = ResizeConvEncoderDecoder( 51 | ResizeConvEncoderDecoderArgs( 52 | image_size=args.image_size, 53 | input_channels=args.image_channels + args.num_pose_params, 54 | start_channels=args.start_channels, 55 | bottleneck_image_size=args.bottleneck_image_size, 56 | num_bottleneck_blocks=args.num_bottleneck_blocks, 57 | max_channels=args.max_channels, 58 | block_args=args.block_args, 59 | upsample_mode=args.upsample_mode, 60 | use_separable_convolution=args.use_separable_convolution)) 61 | 62 | self.direct_creator = Sequential( 63 | create_conv3_from_block_args( 64 | in_channels=self.args.start_channels, 65 | out_channels=self.args.image_channels, 66 | bias=True, 67 | block_args=self.args.block_args), 68 | Tanh()) 69 | self.grid_change_creator = create_conv3( 70 | in_channels=self.args.start_channels, 71 | out_channels=2, 72 | bias=False, 73 | initialization_method='zero', 74 | use_spectral_norm=False) 75 | self.grid_change_applier = GridChangeApplier() 76 | 77 | def forward(self, image: Tensor, pose: Tensor, *args) -> List[Tensor]: 78 | n, c = pose.shape 79 | pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.image_size, self.args.image_size) 80 | feature = torch.cat([image, pose], dim=1) 81 | 82 | feature = self.encoder_decoder.forward(feature)[-1] 83 | grid_change = self.grid_change_creator(feature) 84 | direct_image = self.direct_creator(feature) 85 | warped_image = self.grid_change_applier.apply(grid_change, image) 86 | 87 | return [ 88 | direct_image, 89 | warped_image, 90 | grid_change] 91 | 92 | DIRECT_IMAGE_INDEX = 0 93 | WARPED_IMAGE_INDEX = 1 94 | GRID_CHANGE_INDEX = 2 95 | OUTPUT_LENGTH = 3 96 | 97 | 98 | class TwoAlgoFaceBodyRotator05Factory(ModuleFactory): 99 | def __init__(self, args: TwoAlgoFaceBodyRotator05Args): 100 | super().__init__() 101 | self.args = args 102 | 103 | def create(self) -> Module: 104 | return TwoAlgoFaceBodyRotator05(self.args) 105 | 106 | 107 | if __name__ == "__main__": 108 | cuda = torch.device('cuda') 109 | 110 | image_size = 256 111 | image_channels = 4 112 | num_pose_params = 6 113 | args = TwoAlgoFaceBodyRotator05Args( 114 | image_size=256, 115 | image_channels=4, 116 | start_channels=64, 117 | num_pose_params=6, 118 | bottleneck_image_size=32, 119 | num_bottleneck_blocks=6, 120 | max_channels=512, 121 | upsample_mode='nearest', 122 | use_separable_convolution=True, 123 | block_args=BlockArgs( 124 | initialization_method='he', 125 | use_spectral_norm=False, 126 | normalization_layer_factory=InstanceNorm2dFactory(), 127 | nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1))) 128 | module = TwoAlgoFaceBodyRotator05(args).to(cuda) 129 | 130 | image_count = 1 131 | image = torch.zeros(image_count, 4, image_size, image_size, device=cuda) 132 | pose = torch.zeros(image_count, num_pose_params, device=cuda) 133 | 134 | repeat = 100 135 | acc = 0.0 136 | for i in range(repeat + 2): 137 | start = torch.cuda.Event(enable_timing=True) 138 | end = torch.cuda.Event(enable_timing=True) 139 | 140 | start.record() 141 | module.forward(image, pose) 142 | end.record() 143 | torch.cuda.synchronize() 144 | if i >= 2: 145 | elapsed_time = start.elapsed_time(end) 146 | print("%d:" % i, elapsed_time) 147 | acc = acc + elapsed_time 148 | 149 | print("average:", acc / repeat) 150 | -------------------------------------------------------------------------------- /tha3/nn/util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Callable, Union 2 | 3 | from torch.nn import Module 4 | 5 | from tha3.module.module_factory import ModuleFactory 6 | from tha3.nn.init_function import create_init_function 7 | from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory 8 | from tha3.nn.normalization import NormalizationLayerFactory 9 | from tha3.nn.spectral_norm import apply_spectral_norm 10 | 11 | 12 | def wrap_conv_or_linear_module(module: Module, 13 | initialization_method: Union[str, Callable[[Module], Module]], 14 | use_spectral_norm: bool): 15 | if isinstance(initialization_method, str): 16 | init = create_init_function(initialization_method) 17 | else: 18 | init = initialization_method 19 | return apply_spectral_norm(init(module), use_spectral_norm) 20 | 21 | 22 | class BlockArgs: 23 | def __init__(self, 24 | initialization_method: Union[str, Callable[[Module], Module]] = 'he', 25 | use_spectral_norm: bool = False, 26 | normalization_layer_factory: Optional[NormalizationLayerFactory] = None, 27 | nonlinearity_factory: Optional[ModuleFactory] = None): 28 | self.nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory) 29 | self.normalization_layer_factory = normalization_layer_factory 30 | self.use_spectral_norm = use_spectral_norm 31 | self.initialization_method = initialization_method 32 | 33 | def wrap_module(self, module: Module) -> Module: 34 | return wrap_conv_or_linear_module(module, self.get_init_func(), self.use_spectral_norm) 35 | 36 | def get_init_func(self) -> Callable[[Module], Module]: 37 | if isinstance(self.initialization_method, str): 38 | return create_init_function(self.initialization_method) 39 | else: 40 | return self.initialization_method 41 | -------------------------------------------------------------------------------- /tha3/poser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/__init__.py -------------------------------------------------------------------------------- /tha3/poser/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/__pycache__/general_poser_02.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/__pycache__/general_poser_02.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/__pycache__/poser.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/__pycache__/poser.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/general_poser_02.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Tuple, Dict, Callable 2 | 3 | import torch 4 | from torch import Tensor 5 | from torch.nn import Module 6 | 7 | from tha3.poser.poser import PoseParameterGroup, Poser 8 | from tha3.compute.cached_computation_func import TensorListCachedComputationFunc 9 | 10 | 11 | class GeneralPoser02(Poser): 12 | def __init__(self, 13 | module_loaders: Dict[str, Callable[[], Module]], 14 | device: torch.device, 15 | output_length: int, 16 | pose_parameters: List[PoseParameterGroup], 17 | output_list_func: TensorListCachedComputationFunc, 18 | subrect: Optional[Tuple[Tuple[int, int], Tuple[int, int]]] = None, 19 | default_output_index: int = 0, 20 | image_size: int = 256, 21 | dtype: torch.dtype = torch.float): 22 | self.dtype = dtype 23 | self.image_size = image_size 24 | self.default_output_index = default_output_index 25 | self.output_list_func = output_list_func 26 | self.subrect = subrect 27 | self.pose_parameters = pose_parameters 28 | self.device = device 29 | self.module_loaders = module_loaders 30 | 31 | self.modules = None 32 | 33 | self.num_parameters = 0 34 | for pose_parameter in self.pose_parameters: 35 | self.num_parameters += pose_parameter.get_arity() 36 | 37 | self.output_length = output_length 38 | 39 | def get_image_size(self) -> int: 40 | return self.image_size 41 | 42 | def get_modules(self): 43 | if self.modules is None: 44 | self.modules = {} 45 | for key in self.module_loaders: 46 | module = self.module_loaders[key]() 47 | self.modules[key] = module 48 | module.to(self.device) 49 | module.train(False) 50 | return self.modules 51 | 52 | def get_pose_parameter_groups(self) -> List[PoseParameterGroup]: 53 | return self.pose_parameters 54 | 55 | def get_num_parameters(self) -> int: 56 | return self.num_parameters 57 | 58 | def pose(self, image: Tensor, pose: Tensor, output_index: Optional[int] = None) -> Tensor: 59 | if output_index is None: 60 | output_index = self.default_output_index 61 | output_list = self.get_posing_outputs(image, pose) 62 | return output_list[output_index] 63 | 64 | def get_posing_outputs(self, image: Tensor, pose: Tensor) -> List[Tensor]: 65 | modules = self.get_modules() 66 | 67 | if len(image.shape) == 3: 68 | image = image.unsqueeze(0) 69 | if len(pose.shape) == 1: 70 | pose = pose.unsqueeze(0) 71 | if self.subrect is not None: 72 | image = image[:, :, self.subrect[0][0]:self.subrect[0][1], self.subrect[1][0]:self.subrect[1][1]] 73 | batch = [image, pose] 74 | 75 | outputs = {} 76 | return self.output_list_func(modules, batch, outputs) 77 | 78 | def get_output_length(self) -> int: 79 | return self.output_length 80 | 81 | def free(self): 82 | self.modules = None 83 | 84 | def get_dtype(self) -> torch.dtype: 85 | return self.dtype 86 | -------------------------------------------------------------------------------- /tha3/poser/modes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__init__.py -------------------------------------------------------------------------------- /tha3/poser/modes/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/modes/__pycache__/pose_parameters.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__pycache__/pose_parameters.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/modes/__pycache__/separable_float.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__pycache__/separable_float.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/modes/__pycache__/separable_half.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__pycache__/separable_half.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/modes/__pycache__/standard_float.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__pycache__/standard_float.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/modes/__pycache__/standard_half.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ksuriuri/EasyAIVtuber/d4814a994c0156bff26cf77a5f37848066b56563/tha3/poser/modes/__pycache__/standard_half.cpython-310.pyc -------------------------------------------------------------------------------- /tha3/poser/modes/load_poser.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def load_poser(model: str, device: torch.device): 5 | print("Using the %s model." % model) 6 | if model == "standard_float": 7 | from tha3.poser.modes.standard_float import create_poser 8 | return create_poser(device) 9 | elif model == "standard_half": 10 | from tha3.poser.modes.standard_half import create_poser 11 | return create_poser(device) 12 | elif model == "separable_float": 13 | from tha3.poser.modes.separable_float import create_poser 14 | return create_poser(device) 15 | elif model == "separable_half": 16 | from tha3.poser.modes.separable_half import create_poser 17 | return create_poser(device) 18 | else: 19 | raise RuntimeError("Invalid model: '%s'" % model) -------------------------------------------------------------------------------- /tha3/poser/modes/pose_parameters.py: -------------------------------------------------------------------------------- 1 | from tha3.poser.poser import PoseParameters, PoseParameterCategory 2 | 3 | 4 | def get_pose_parameters(): 5 | return PoseParameters.Builder() \ 6 | .add_parameter_group("eyebrow_troubled", PoseParameterCategory.EYEBROW, arity=2) \ 7 | .add_parameter_group("eyebrow_angry", PoseParameterCategory.EYEBROW, arity=2) \ 8 | .add_parameter_group("eyebrow_lowered", PoseParameterCategory.EYEBROW, arity=2) \ 9 | .add_parameter_group("eyebrow_raised", PoseParameterCategory.EYEBROW, arity=2) \ 10 | .add_parameter_group("eyebrow_happy", PoseParameterCategory.EYEBROW, arity=2) \ 11 | .add_parameter_group("eyebrow_serious", PoseParameterCategory.EYEBROW, arity=2) \ 12 | .add_parameter_group("eye_wink", PoseParameterCategory.EYE, arity=2) \ 13 | .add_parameter_group("eye_happy_wink", PoseParameterCategory.EYE, arity=2) \ 14 | .add_parameter_group("eye_surprised", PoseParameterCategory.EYE, arity=2) \ 15 | .add_parameter_group("eye_relaxed", PoseParameterCategory.EYE, arity=2) \ 16 | .add_parameter_group("eye_unimpressed", PoseParameterCategory.EYE, arity=2) \ 17 | .add_parameter_group("eye_raised_lower_eyelid", PoseParameterCategory.EYE, arity=2) \ 18 | .add_parameter_group("iris_small", PoseParameterCategory.IRIS_MORPH, arity=2) \ 19 | .add_parameter_group("mouth_aaa", PoseParameterCategory.MOUTH, arity=1, default_value=1.0) \ 20 | .add_parameter_group("mouth_iii", PoseParameterCategory.MOUTH, arity=1) \ 21 | .add_parameter_group("mouth_uuu", PoseParameterCategory.MOUTH, arity=1) \ 22 | .add_parameter_group("mouth_eee", PoseParameterCategory.MOUTH, arity=1) \ 23 | .add_parameter_group("mouth_ooo", PoseParameterCategory.MOUTH, arity=1) \ 24 | .add_parameter_group("mouth_delta", PoseParameterCategory.MOUTH, arity=1) \ 25 | .add_parameter_group("mouth_lowered_corner", PoseParameterCategory.MOUTH, arity=2) \ 26 | .add_parameter_group("mouth_raised_corner", PoseParameterCategory.MOUTH, arity=2) \ 27 | .add_parameter_group("mouth_smirk", PoseParameterCategory.MOUTH, arity=1) \ 28 | .add_parameter_group("iris_rotation_x", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \ 29 | .add_parameter_group("iris_rotation_y", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \ 30 | .add_parameter_group("head_x", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \ 31 | .add_parameter_group("head_y", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \ 32 | .add_parameter_group("neck_z", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \ 33 | .add_parameter_group("body_y", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \ 34 | .add_parameter_group("body_z", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \ 35 | .add_parameter_group("breathing", PoseParameterCategory.BREATHING, arity=1, range=(0.0, 1.0)) \ 36 | .build() -------------------------------------------------------------------------------- /tha3/poser/poser.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from enum import Enum 3 | from typing import Tuple, List, Optional 4 | 5 | import torch 6 | from torch import Tensor 7 | 8 | 9 | class PoseParameterCategory(Enum): 10 | EYEBROW = 1 11 | EYE = 2 12 | IRIS_MORPH = 3 13 | IRIS_ROTATION = 4 14 | MOUTH = 5 15 | FACE_ROTATION = 6 16 | BODY_ROTATION = 7 17 | BREATHING = 8 18 | 19 | 20 | class PoseParameterGroup: 21 | def __init__(self, 22 | group_name: str, 23 | parameter_index: int, 24 | category: PoseParameterCategory, 25 | arity: int = 1, 26 | discrete: bool = False, 27 | default_value: float = 0.0, 28 | range: Optional[Tuple[float, float]] = None): 29 | assert arity == 1 or arity == 2 30 | if range is None: 31 | range = (0.0, 1.0) 32 | if arity == 1: 33 | parameter_names = [group_name] 34 | else: 35 | parameter_names = [group_name + "_left", group_name + "_right"] 36 | assert len(parameter_names) == arity 37 | 38 | self.parameter_names = parameter_names 39 | self.range = range 40 | self.default_value = default_value 41 | self.discrete = discrete 42 | self.arity = arity 43 | self.category = category 44 | self.parameter_index = parameter_index 45 | self.group_name = group_name 46 | 47 | def get_arity(self) -> int: 48 | return self.arity 49 | 50 | def get_group_name(self) -> str: 51 | return self.group_name 52 | 53 | def get_parameter_names(self) -> List[str]: 54 | return self.parameter_names 55 | 56 | def is_discrete(self) -> bool: 57 | return self.discrete 58 | 59 | def get_range(self) -> Tuple[float, float]: 60 | return self.range 61 | 62 | def get_default_value(self): 63 | return self.default_value 64 | 65 | def get_parameter_index(self): 66 | return self.parameter_index 67 | 68 | def get_category(self) -> PoseParameterCategory: 69 | return self.category 70 | 71 | 72 | class PoseParameters: 73 | def __init__(self, pose_parameter_groups: List[PoseParameterGroup]): 74 | self.pose_parameter_groups = pose_parameter_groups 75 | 76 | def get_parameter_index(self, name: str) -> int: 77 | index = 0 78 | for parameter_group in self.pose_parameter_groups: 79 | for param_name in parameter_group.parameter_names: 80 | if name == param_name: 81 | return index 82 | index += 1 83 | raise RuntimeError("Cannot find parameter with name %s" % name) 84 | 85 | def get_parameter_name(self, index: int) -> str: 86 | assert index >= 0 and index < self.get_parameter_count() 87 | 88 | for group in self.pose_parameter_groups: 89 | if index < group.get_arity(): 90 | return group.get_parameter_names()[index] 91 | index -= group.arity 92 | 93 | raise RuntimeError("Something is wrong here!!!") 94 | 95 | def get_pose_parameter_groups(self): 96 | return self.pose_parameter_groups 97 | 98 | def get_parameter_count(self): 99 | count = 0 100 | for group in self.pose_parameter_groups: 101 | count += group.arity 102 | return count 103 | 104 | class Builder: 105 | def __init__(self): 106 | self.index = 0 107 | self.pose_parameter_groups = [] 108 | 109 | def add_parameter_group(self, 110 | group_name: str, 111 | category: PoseParameterCategory, 112 | arity: int = 1, 113 | discrete: bool = False, 114 | default_value: float = 0.0, 115 | range: Optional[Tuple[float, float]] = None): 116 | self.pose_parameter_groups.append( 117 | PoseParameterGroup( 118 | group_name, 119 | self.index, 120 | category, 121 | arity, 122 | discrete, 123 | default_value, 124 | range)) 125 | self.index += arity 126 | return self 127 | 128 | def build(self) -> 'PoseParameters': 129 | return PoseParameters(self.pose_parameter_groups) 130 | 131 | 132 | class Poser(ABC): 133 | @abstractmethod 134 | def get_image_size(self) -> int: 135 | pass 136 | 137 | @abstractmethod 138 | def get_output_length(self) -> int: 139 | pass 140 | 141 | @abstractmethod 142 | def get_pose_parameter_groups(self) -> List[PoseParameterGroup]: 143 | pass 144 | 145 | @abstractmethod 146 | def get_num_parameters(self) -> int: 147 | pass 148 | 149 | @abstractmethod 150 | def pose(self, image: Tensor, pose: Tensor, output_index: int = 0) -> Tensor: 151 | pass 152 | 153 | @abstractmethod 154 | def get_posing_outputs(self, image: Tensor, pose: Tensor) -> List[Tensor]: 155 | pass 156 | 157 | def get_dtype(self) -> torch.dtype: 158 | return torch.float 159 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def linear_rgb2srgb(image): 6 | """ 7 | convert linear rgb image to srgb image 8 | Args: 9 | image (numpy array): linear rgb color space image 10 | 11 | Returns: 12 | image (numpy array): srgb color space image 13 | """ 14 | return np.where(image <= 0.04045, image / 12.92, ((image + 0.055) / 1.055) ** 2.4) 15 | 16 | 17 | def srgb2linear_rgb(image): 18 | """ 19 | convert srgb image to linear rgb image 20 | Args: 21 | image (numpy array): srgb color space image 22 | 23 | Returns: 24 | image (numpy array): linear rgb color space image 25 | """ 26 | return np.where(image <= 0.003130804953560372, image * 12.92, 1.055 * (image ** (1.0 / 2.4)) - 0.055) 27 | 28 | 29 | def preprocessing_image(image): 30 | """ 31 | convert image(numpy array, unit8) to tensor 32 | Args: 33 | image (numpy array): character image (256x256, alpha channel included) 34 | Returns: 35 | tensor 36 | """ 37 | np_image = np.array(image) / 255 38 | clipped_image = np.clip(np_image, 0, 1) 39 | srgb_image = linear_rgb2srgb(clipped_image) 40 | h, w, c = srgb_image.shape 41 | linear_image = srgb_image.reshape(h * w, c) 42 | for pixel in linear_image: 43 | if pixel[3] == 0.0: 44 | pixel[0:3] = 0.0 45 | reshaped_image = linear_image.transpose().reshape(c, h, w) 46 | return reshaped_image 47 | 48 | 49 | def postprocessing_image(tensor): 50 | """ 51 | convert tensor to image(numpy array, unit8) 52 | Args: 53 | tensor 54 | Returns: 55 | image (numpy array): character image (256x256, alpha channel included) 56 | """ 57 | tensor = tensor.detach().squeeze(0) 58 | reshaped_tensor = tensor.permute(1, 2, 0) 59 | np_image = reshaped_tensor.numpy() 60 | np_image = (np_image + 1) / 2 61 | srgb_image = np_image[..., :3] 62 | alpha_image = np_image[..., 3] 63 | clipped_image = np.clip(srgb_image, 0, 1) 64 | rgb_image = srgb2linear_rgb(clipped_image) 65 | rgba_image = np.concatenate([rgb_image, alpha_image[..., np.newaxis]], axis=2) 66 | rgba_image = rgba_image * 255 67 | return rgba_image.astype(np.uint8) 68 | 69 | 70 | def get_distance(a, b): 71 | """ 72 | calculate euclidean distance a to b 73 | Args: 74 | a (landmark): 3d points 75 | b (landmark): 3d points 76 | 77 | Returns: 78 | L2 distance (float) 79 | """ 80 | return np.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2) 81 | -------------------------------------------------------------------------------- /webui.bat: -------------------------------------------------------------------------------- 1 | D:\anaconda3\envs\eaiv\python.exe webui.py ^ 2 | --main_port 7888 ^ 3 | --webui_port 7999 4 | -------------------------------------------------------------------------------- /webui.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | import requests 3 | import argparse 4 | 5 | 6 | def speak(speak_file): 7 | if speak_file: 8 | data = { 9 | "type": "speak", 10 | "speech_path": speak_file 11 | } 12 | res = requests.post(f'http://127.0.0.1:{args.main_port}/alive', json=data) 13 | print(res.json()) 14 | 15 | 16 | def rhythm(rhythm_file, rhythm_beat): 17 | if rhythm_file: 18 | data = { 19 | "type": "rhythm", 20 | "music_path": rhythm_file.name, 21 | "beat": rhythm_beat 22 | } 23 | res = requests.post(f'http://127.0.0.1:{args.main_port}/alive', json=data) 24 | print(res.json()) 25 | 26 | 27 | def sing(sing_file, sing_voice_file, sing_beat, sing_mouth): 28 | if sing_file and sing_voice_file: 29 | data = { 30 | "type": "sing", 31 | "music_path": sing_file.name, 32 | "voice_path": sing_voice_file.name, 33 | "beat": sing_beat, 34 | "mouth_offset": sing_mouth 35 | } 36 | res = requests.post(f'http://127.0.0.1:{args.main_port}/alive', json=data) 37 | print(res.json()) 38 | 39 | 40 | def stop(): 41 | data = { 42 | "type": "stop", 43 | } 44 | res = requests.post(f'http://127.0.0.1:{args.main_port}/alive', json=data) 45 | print(res.json()) 46 | 47 | 48 | def change_img(img_path): 49 | print(img_path) 50 | if img_path: 51 | data = { 52 | "type": "change_img", 53 | "img": img_path 54 | } 55 | res = requests.post(f'http://127.0.0.1:{args.main_port}/alive', json=data) 56 | print(res.json()) 57 | 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument('--main_port', type=int, default=7888) 62 | parser.add_argument('--webui_port', type=int, default=7999) 63 | args = parser.parse_args() 64 | 65 | support_audio_type = ["audio"] # ".wav", ".mp3", ".flac" 66 | 67 | with gr.Blocks() as demo: 68 | with gr.Tab("说话"): 69 | speak_file = gr.File(label="语音音频", file_types=support_audio_type) 70 | speak_but = gr.Button("说话!!") 71 | speak_but.click(speak, [speak_file]) 72 | with gr.Tab("摇"): 73 | rhythm_file = gr.File(label="音乐音频", file_types=support_audio_type) 74 | rhythm_beat = gr.Radio(["1", "2", "4"], value="2", label="节奏", info="越小点头频率越快") 75 | rhythm_but = gr.Button("摇!") 76 | rhythm_but.click(rhythm, [rhythm_file, rhythm_beat]) 77 | with gr.Tab("唱歌"): 78 | with gr.Row(): 79 | with gr.Column(): 80 | sing_file = gr.File(label="原曲音频", file_types=support_audio_type) 81 | with gr.Column(): 82 | sing_voice_file = gr.File(label="人声音频", file_types=support_audio_type) 83 | sing_beat = gr.Radio(["1", "2", "4"], value="2", label="节奏", info="越小点头频率越快") 84 | sing_mouth = gr.Slider(0, 1, value=0, step=0.1, label="嘴巴大小偏移", info="如果角色唱歌时的嘴张的不够大,可以试试将这个值设大") 85 | sing_but = gr.Button("唱歌喵") 86 | sing_but.click(sing, [sing_file, sing_voice_file, sing_beat, sing_mouth]) 87 | with gr.Tab("换皮"): 88 | img = gr.Image(label="上传图片(512x512)", type="filepath", image_mode="RGBA") # , height=300, width=300 89 | change_but = gr.Button("启动!") 90 | change_but.click(change_img, [img]) 91 | 92 | stop_but = gr.Button("停止当前动作") 93 | stop_but.click(stop) 94 | 95 | demo.launch(server_port=args.webui_port, inbrowser=True) 96 | --------------------------------------------------------------------------------