├── .gitignore ├── README.md ├── main.html ├── readmeimage ├── image_2pzOQApOwH.png ├── image_3yF-LIoRPT.png ├── image_B15y4lD2SH.png ├── image_CBR-0Tc6ho.png ├── image_CKiRw9kizf.png ├── image_EMZWL7ClKz.png ├── image_J3kgj37Vnt.png ├── image_LV7lyDVdOx.png ├── image_LiEyqE9pgT.png ├── image_M19e48kRiX.png ├── image_QL9jMYMHD0.png ├── image_T0_ckg9_LV.png ├── image_U3viOiVe1Y.png ├── image_UwBZo8EelV.png ├── image_Wo_FrNscxf.png ├── image_YOHFI_YZ-8.png ├── image__xC_yabZfZ.png ├── image_aZK-HU_3W8.png ├── image_dyJYoeYgSh.png ├── image_hf_csisjp9.png ├── image_k7MWc1aRsf.png ├── image_klCmpEUaqI.png ├── image_qXYsLclsOZ.png ├── image_qsm6WL32ZC.png ├── image_s3Gj42KCar.png └── image_slayh9wlFO.png ├── server ├── __init__.py ├── api │ ├── __init__.py │ ├── inference_params_manager.py │ └── ras_api.py ├── bean │ ├── __init__.py │ ├── base_model.py │ ├── finished_product │ │ ├── __init__.py │ │ ├── finished_product_manager.py │ │ └── product_param_config_template.py │ ├── inference_task │ │ ├── __init__.py │ │ ├── gpt_model.py │ │ ├── obj_inference_task.py │ │ ├── obj_inference_task_audio.py │ │ ├── obj_inference_task_compare_params.py │ │ ├── obj_inference_task_text.py │ │ ├── task_cell.py │ │ └── vits_model.py │ ├── long_text_inference │ │ └── __init__.py │ ├── reference_audio │ │ ├── __init__.py │ │ ├── obj_inference_category.py │ │ ├── obj_reference_audio.py │ │ ├── obj_reference_audio_compare_detail.py │ │ └── obj_reference_audio_compare_task.py │ ├── result_evaluation │ │ ├── __init__.py │ │ └── obj_inference_task_result_audio.py │ ├── sound_fusion │ │ ├── __init__.py │ │ ├── obj_inference_task_sound_fusion_audio.py │ │ └── obj_sound_fusion_audio.py │ ├── system │ │ ├── __init__.py │ │ ├── role.py │ │ ├── role_category.py │ │ ├── sys_cache.py │ │ └── sys_cache_constants.py │ ├── text │ │ ├── __init__.py │ │ └── obj_inference_text.py │ └── tts_correction │ │ ├── __init__.py │ │ ├── obj_tts_correction_task.py │ │ └── obj_tts_correction_task_detail.py ├── common │ ├── __init__.py │ ├── config_manager.py │ ├── config_params.py │ ├── custom_exception.py │ ├── filter.py │ ├── log_config.py │ ├── ras_api_monitor.py │ ├── response_result.py │ └── time_util.py ├── config.ini ├── controller │ ├── __init__.py │ ├── finished_product │ │ ├── __init__.py │ │ └── finished_product_controller.py │ ├── inference_task │ │ ├── __init__.py │ │ └── inference_task_controller.py │ ├── long_text_inference │ │ ├── __init__.py │ │ └── long_text_inference_controller.py │ ├── reference_audio │ │ ├── __init__.py │ │ └── reference_audio_controller.py │ ├── result_evaluation │ │ ├── __init__.py │ │ └── result_evaluation_controller.py │ ├── sound_fusion │ │ ├── __init__.py │ │ └── sound_fusion_controller.py │ ├── system │ │ ├── __init__.py │ │ └── system_controller.py │ ├── text │ │ ├── __init__.py │ │ └── text_controller.py │ └── tts_correction │ │ ├── __init__.py │ │ └── tts_correction_controller.py ├── dao │ ├── __init__.py │ ├── data_base_manager.py │ ├── finished_product │ │ ├── __init__.py │ │ └── finished_product_dao.py │ ├── inference_task │ │ ├── __init__.py │ │ ├── inference_task_dao.py │ │ └── inference_text_dao.py │ ├── init_master_table.py │ ├── init_slave_table.py │ ├── reference_audio │ │ ├── __init__.py │ │ ├── reference_audio_compare_dao.py │ │ ├── reference_audio_dao.py │ │ └── reference_category_dao.py │ ├── result_evaluation │ │ ├── __init__.py │ │ └── result_evaluation_dao.py │ ├── sound_fusion │ │ ├── __init__.py │ │ └── sound_fusion_dao.py │ ├── system │ │ ├── __init__.py │ │ └── system_dao.py │ └── tts_correction │ │ ├── __init__.py │ │ └── tts_correction_dao.py ├── service │ ├── __init__.py │ ├── finished_product │ │ ├── __init__.py │ │ └── finished_product_service.py │ ├── inference_task │ │ ├── __init__.py │ │ ├── inference_task_service.py │ │ ├── inference_text_service.py │ │ └── model_manager_service.py │ ├── reference_audio │ │ ├── __init__.py │ │ ├── reference_audio_compare_sevice.py │ │ ├── reference_audio_service.py │ │ └── reference_category_service.py │ ├── result_evaluation │ │ ├── __init__.py │ │ └── result_evaluation_service.py │ ├── sound_fusion │ │ ├── __init__.py │ │ └── sound_fusion_service.py │ ├── system │ │ ├── __init__.py │ │ └── system_service.py │ └── tts_correction │ │ ├── __init__.py │ │ └── tts_correction_service.py ├── tool │ ├── __init__.py │ ├── asr │ │ ├── config.py │ │ ├── fasterwhisper_asr.py │ │ ├── funasr_asr.py │ │ ├── inference_task_asr.py │ │ └── models │ │ │ └── .gitignore │ ├── speaker_verification │ │ ├── __init__.py │ │ ├── audio_compare.py │ │ ├── inference_task_voice_similarity.py │ │ ├── models │ │ │ └── speech_campplus_sv_zh-cn_16k-common │ │ │ │ └── 保留 │ │ └── voice_similarity.py │ └── text_comparison │ │ ├── __init__.py │ │ ├── asr_text_process.py │ │ ├── models │ │ └── .gitignore │ │ └── text_comparison.py └── util │ ├── __init__.py │ └── util.py ├── server_api.bat ├── server_api.py └── web ├── config.js ├── image └── top_p top_k temperauter.png ├── js ├── darkmode │ └── darkmode-js.min.js ├── echarts │ └── echarts.min.js ├── jquery │ └── jquery-3.7.1.min.js ├── layui │ ├── css │ │ ├── layui.css │ │ └── layui.css.map │ ├── font │ │ ├── iconfont.eot │ │ ├── iconfont.svg │ │ ├── iconfont.ttf │ │ ├── iconfont.woff │ │ └── iconfont.woff2 │ ├── layui.js │ └── layui.js.map ├── smart_wizard │ ├── jquery.smartWizard.min.js │ └── smart_wizard_all.min.css ├── sortable │ └── Sortable.min.js ├── tippy │ ├── light-border.css │ ├── light.css │ ├── material.css │ ├── popper.min.js │ ├── scale-extreme.css │ ├── scale-subtle.css │ ├── scale.css │ ├── tippy-bundle.umd.js │ ├── tippy.css │ └── translucent.css ├── wavefile │ └── wavefile.js ├── wavesurfer │ ├── hover.min.js │ ├── regions.min.js │ ├── spectrogram.min.js │ ├── timeline.min.js │ ├── wavesurfer.min.js │ └── zoom.min.js └── xm_select │ └── xm-select.js └── pages ├── bean └── common_bean.js ├── business ├── finished_product │ ├── finished_product.html │ ├── finished_product_edit.html │ └── finished_product_speak.html ├── home.html ├── inference_task │ ├── inference_task.html │ ├── inference_task_result_audio_list.html │ ├── inference_task_sound_fusion_select.html │ ├── reference_select.html │ ├── task_detail.html │ └── text_manager.html ├── long_text_inference │ ├── inference_params_edit.html │ ├── long_text_deal_with.html │ ├── long_text_inference.html │ ├── long_text_refer_audio_select.html │ ├── long_text_result_audio_select.html │ └── long_text_select.html ├── ras_setting.html ├── reference_audio │ ├── inference_audio_result_list.html │ ├── reference_audio.html │ ├── reference_audio_compare.html │ ├── reference_audio_edit.html │ ├── reference_audio_split.html │ ├── reference_audio_split_item.html │ └── report_result.html ├── result_evaluation │ ├── result_audio_all_detail.html │ ├── result_audio_list.html │ └── result_evaluation.html ├── sound_fusion │ ├── sound_fusion_edit.html │ ├── sound_fusion_manager.html │ └── sound_fusion_select.html ├── template.html └── tts_correction │ ├── tts_correction_create.html │ ├── tts_correction_inference.html │ └── tts_correction_list.html ├── common ├── audio.js ├── common_jquery.js ├── ras_api.js ├── util.js └── wavesurfer.js ├── json └── reference_audio.json └── style └── common-style.css /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RefAudioSelectorV2-BaseOn-GptSoVits 2 | 3 | 1. 为什么开发这个项目 4 | 5 | 去年五一的时候发布了一个v1版本,用来简化参考音频的筛选。但是那个功能做的比较粗糙,在后面做崩坏三第一部模型训练的过程中,发现了很多不足之处,所以基于当时的体验,我设计了这个v2版本,进一步降低筛选参考音频的时间成本。 6 | 2. 先前做参考筛选时的主要痛点 7 | 1. 缺少对参考音频的管理,不方便记录以及切换参考音频的相关参数 8 | 2. 对比维度单一,模型推理参数除了参考音频,还有Gpt、SoVits模型轮数、top_K、top_P等其他众多参数,过去的方式无法对多种参数进行效果比较 9 | 3. 对推理结果的分析,过于依靠人耳,对耳朵的压力很大 10 | 4. 对推理结果的记录不方便,过去只能新建一个txt或excel手工记录,不灵活 11 | 5. 对10s以上的优质音频不便用于参考推理比较可惜 12 | 6. 缺少对参考音频高效的比较准确的分类方法 13 | 3. 针对以上痛点的解决措施 14 | 1. 对参考音频做了集中管理,无论是后续做人工校准,还是切换参考音频,都很方便 15 | 16 | ![](readmeimage/image_Wo_FrNscxf.png) 17 | 2. 添加更全面的参数对比,除了参考音频对比,还添加了Gpt模型、SoVits模型、topK、topP、temperature、文本分隔符、音频语速、融合音频等单变量对比,以及 组合模型、kpt三参数等多变量对比 18 | 19 | ![](readmeimage/image_slayh9wlFO.png) 20 | 3. 结果音频按时长排序,不同参考音频对模型推理的语速效果是存在影响的,根据推理结果音频的排序,可以快速确定一批需要重点关注的音频对象 21 | 22 | ![](readmeimage/image_LiEyqE9pgT.png) 23 | 4. 音频结果可视化,添加结果音频频谱图,可以直观的发现大段电音、复读、吞句、以及频繁换气之类的问题,减轻耳朵的负担 24 | 25 | ![](readmeimage/image_dyJYoeYgSh.png) 26 | 5. 提供了结果音频打分功能,可以记录自己对此推理结果的评分,便于后续筛选 27 | 6. 音频分类,对于参考音频片段多的角色,比如一千以上的参考音频片段,提供了基于阿里的说话识别模型,进行音频分类的功能 28 | 29 | ![](readmeimage/image_klCmpEUaqI.png) 30 | 7. 参考音频切分,对于10s以上的参考音频,以及整体不错,但是存在部分瑕疵,需要微调的参考音频,可以利用音频切分功能进行拆分 31 | 32 | ![](readmeimage/image_LV7lyDVdOx.png) 33 | 4. 主体流程 34 | 1. 下载整合包: 35 | 通过百度网盘分享的文件:RAS2整合包 36 | 链接:https://pan.baidu.com/s/1R-T_1y1Nyqbq8pBaHxk9Cw?pwd=i8ep 37 | 提取码:i8ep 38 | 此整合包,内嵌了fasterwhisper-large-v3模型,所以体积较大 39 | 说明文档(github不开代理无法显示图片):https://www.wolai.com/x1AVSzBUCn4EF4HYNvdfrF 40 | 2. 解压后,在RefAudioSelectorV2-BaseOn-GptSoVits目录下,双击server\_api.bat文件启动项目 41 | 3. 按照【分类-名称】创建角色,比如,男性-张三 42 | 43 | ![](readmeimage/image_EMZWL7ClKz.png) 44 | 4. 将GptSoVits模型生成的list文件导入到本系统中 45 | 46 | ![](readmeimage/image_aZK-HU_3W8.png) 47 | 5. (可选)对导入的参考音频进行分类 48 | 49 | ![](readmeimage/image_QL9jMYMHD0.png) 50 | 51 | ![](readmeimage/image_T0_ckg9_LV.png) 52 | 53 | ![](readmeimage/image_hf_csisjp9.png) 54 | 55 | ![](readmeimage/image_klCmpEUaqI.png) 56 | 57 | ![](readmeimage/image_M19e48kRiX.png) 58 | 6. 创建基于参考音频参数的对比任务 59 | 60 | ![](readmeimage/image_qXYsLclsOZ.png) 61 | 62 | ![](readmeimage/image_slayh9wlFO.png) 63 | 7. 推理结果音频 64 | 65 | ![](readmeimage/image_2pzOQApOwH.png) 66 | 8. 在结果评测界面依次评测音频,并打分 67 | 68 | ![](readmeimage/image_YOHFI_YZ-8.png) 69 | 70 | ![](readmeimage/image_dyJYoeYgSh.png) 71 | 9. 在长文测试环节,挑选最高评分音频进行测试并打分 72 | 73 | ![](readmeimage/image__xC_yabZfZ.png) 74 | 75 | ![](readmeimage/image_k7MWc1aRsf.png) 76 | 77 | ![](readmeimage/image_CBR-0Tc6ho.png) 78 | 79 | ![](readmeimage/image_CKiRw9kizf.png) 80 | 81 | ![](readmeimage/image_3yF-LIoRPT.png) 82 | 10. 将最符合期望的音频放入成品管理 83 | 84 | ![](readmeimage/image_qsm6WL32ZC.png) 85 | 86 | ![](readmeimage/image_J3kgj37Vnt.png) 87 | 11. 如果没有找到合适的参考音频,可以前往推理任务界面调整对比参数,比如降低Gpt模型轮数,重新启动流程。 88 | 12. 如果绝大部分结果音频的质量都非常糟糕,应考虑寻找更高质量的基础音频重新训练模型 89 | 13. 如果发现某个参考音频非常契合期望,但是存在部分瑕疵,可以对此音频进行分割,挑选子音频进行推理测试,或许能找到更完美的推理结果 90 | 91 | ![](readmeimage/image_LV7lyDVdOx.png) 92 | 5. 技术结构 93 | 1. 以SQLite作为数据库 94 | 2. 用layui设计前端界面 95 | 3. 后端采用python 96 | 4. 依赖GptSoVits项目和其运行环境 97 | 5. 对GptSoVits项目的少部分代码做了调整,主要是切换模型加载的路径(我将GSV项目的工作目录切换到了本项目下,因此GSV模型加载路径需要调整),以及处理一个阿里说话人识别模型在windows环境下运行存在的兼容性问题 98 | 1. 找GptSoVits项目下的GPT\_SoVITS/text/chinese2.py文件,用下面这段代码,替换掉原第三十行的g2pw模型加载代码。 99 | 100 | model\_dir = os.environ.get( 101 | 102 | "g2pw\_model\_dir", "GPT\_SoVITS/text/G2PWModel" 103 | 104 | ) 105 | 106 | model\_source = os.environ.get( 107 | 108 | "g2pw\_model\_source", "GPT\_SoVITS/pretrained\_models/chinese-roberta-wwm-ext-large" 109 | 110 | ) 111 | 112 | g2pw = G2PWPinyin(model\_dir=model\_dir,model\_source=model\_source,v\_to\_u=False, neutral\_tone\_with\_five=True) 113 | 114 | ![](readmeimage/image_s3Gj42KCar.png) 115 | 2. windows不支持torchaudio\_extension中的sox,百度到的解决办法 116 | 117 | 修改GptSoVits的runtime\lib\site-packages\modelscope\pipelines\audio\speaker\_verification\_light\_pipeline.py中的类 SpeakerVerificationPipeline中函数preprocess下的 118 | 119 | data, fs = torchaudio.sox\_effects.apply\_effects\_tensor( 120 | 121 | data, 122 | 123 | fs, 124 | 125 | effects=\[\[ 126 | 127 | 'rate', 128 | 129 | str(self.model\_config\['sample\_rate']) 130 | 131 | ]]) 132 | 133 | 为 134 | 135 | resampler = torchaudio.transforms.Resample(orig\_freq=fs, new\_freq=self.model\_config\['sample\_rate']) 136 | 137 | data = resampler(data) 138 | 139 | ![](readmeimage/image_UwBZo8EelV.png) 140 | 6. 文件存储结构,分为主库和分库两部分,主库存放推理文本等公用内容,分库则各自存放不同角色的数据,如果需要删除某个角色,直接在分库删除此角色目录即可 141 | 142 | ![](readmeimage/image_U3viOiVe1Y.png) 143 | 6. 潜在问题 144 | 1. 在执行推理任务时,为了加快推理速度,添加了多进程推理,但是会遇到一些偶发问题,比如GPU占用100%但是任务没有推进;或者直接闪退,没有任何错误信息。这些在多进程并发推理时,有时会发生有时不会,保险起见,我默认只设置了推理进程为1。如果要加速的,就提高推理进程数,但更容易遇到上面的问题 145 | 146 | ![](readmeimage/image_B15y4lD2SH.png) 147 | -------------------------------------------------------------------------------- /readmeimage/image_2pzOQApOwH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_2pzOQApOwH.png -------------------------------------------------------------------------------- /readmeimage/image_3yF-LIoRPT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_3yF-LIoRPT.png -------------------------------------------------------------------------------- /readmeimage/image_B15y4lD2SH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_B15y4lD2SH.png -------------------------------------------------------------------------------- /readmeimage/image_CBR-0Tc6ho.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_CBR-0Tc6ho.png -------------------------------------------------------------------------------- /readmeimage/image_CKiRw9kizf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_CKiRw9kizf.png -------------------------------------------------------------------------------- /readmeimage/image_EMZWL7ClKz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_EMZWL7ClKz.png -------------------------------------------------------------------------------- /readmeimage/image_J3kgj37Vnt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_J3kgj37Vnt.png -------------------------------------------------------------------------------- /readmeimage/image_LV7lyDVdOx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_LV7lyDVdOx.png -------------------------------------------------------------------------------- /readmeimage/image_LiEyqE9pgT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_LiEyqE9pgT.png -------------------------------------------------------------------------------- /readmeimage/image_M19e48kRiX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_M19e48kRiX.png -------------------------------------------------------------------------------- /readmeimage/image_QL9jMYMHD0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_QL9jMYMHD0.png -------------------------------------------------------------------------------- /readmeimage/image_T0_ckg9_LV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_T0_ckg9_LV.png -------------------------------------------------------------------------------- /readmeimage/image_U3viOiVe1Y.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_U3viOiVe1Y.png -------------------------------------------------------------------------------- /readmeimage/image_UwBZo8EelV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_UwBZo8EelV.png -------------------------------------------------------------------------------- /readmeimage/image_Wo_FrNscxf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_Wo_FrNscxf.png -------------------------------------------------------------------------------- /readmeimage/image_YOHFI_YZ-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_YOHFI_YZ-8.png -------------------------------------------------------------------------------- /readmeimage/image__xC_yabZfZ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image__xC_yabZfZ.png -------------------------------------------------------------------------------- /readmeimage/image_aZK-HU_3W8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_aZK-HU_3W8.png -------------------------------------------------------------------------------- /readmeimage/image_dyJYoeYgSh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_dyJYoeYgSh.png -------------------------------------------------------------------------------- /readmeimage/image_hf_csisjp9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_hf_csisjp9.png -------------------------------------------------------------------------------- /readmeimage/image_k7MWc1aRsf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_k7MWc1aRsf.png -------------------------------------------------------------------------------- /readmeimage/image_klCmpEUaqI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_klCmpEUaqI.png -------------------------------------------------------------------------------- /readmeimage/image_qXYsLclsOZ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_qXYsLclsOZ.png -------------------------------------------------------------------------------- /readmeimage/image_qsm6WL32ZC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_qsm6WL32ZC.png -------------------------------------------------------------------------------- /readmeimage/image_s3Gj42KCar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_s3Gj42KCar.png -------------------------------------------------------------------------------- /readmeimage/image_slayh9wlFO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/readmeimage/image_slayh9wlFO.png -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/__init__.py -------------------------------------------------------------------------------- /server/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/api/__init__.py -------------------------------------------------------------------------------- /server/api/inference_params_manager.py: -------------------------------------------------------------------------------- 1 | from server.util.util import str_to_int, str_to_float 2 | 3 | 4 | class InferenceParams: 5 | def __init__(self, 6 | refer_wav_path: str = None, 7 | prompt_text: str = None, 8 | prompt_language: str = None, 9 | cut_punc: str = None, 10 | top_k: int = None, 11 | top_p: float = None, 12 | temperature: float = None, 13 | speed: float = None, 14 | sample_steps: int = None, 15 | if_sr: bool = None, 16 | inp_refs: list = None): 17 | self.refer_wav_path = refer_wav_path 18 | self.prompt_text = prompt_text 19 | self.prompt_language = prompt_language 20 | self.cut_punc = cut_punc 21 | self.top_k = str_to_int(top_k, None) 22 | self.top_p = str_to_float(top_p, None) 23 | self.temperature = str_to_float(temperature, None) 24 | self.speed = str_to_float(speed, None) 25 | self.sample_steps = sample_steps 26 | self.if_sr = if_sr 27 | self.inp_refs = inp_refs 28 | 29 | def __str__(self): 30 | return (f"refer_wav_path:{self.refer_wav_path}, " 31 | f"prompt_text:{self.prompt_text}, " 32 | f"prompt_language:{self.prompt_language}, " 33 | f"cut_punc:{self.cut_punc}, " 34 | f"top_k:{self.top_k}, " 35 | f"top_p:{self.top_p}, " 36 | f"temperature:{self.temperature}, " 37 | f"speed:{self.speed}, " 38 | f"sample_steps:{self.sample_steps}, " 39 | f"if_sr:{self.if_sr}, " 40 | f"inp_refs:{self.inp_refs}") 41 | 42 | 43 | class InferenceParamsManager: 44 | def __init__(self): 45 | self.default_params = InferenceParams() 46 | 47 | def set_default_params(self, default_params: InferenceParams): 48 | if default_params.refer_wav_path is not None: 49 | self.default_params.refer_wav_path = default_params.refer_wav_path 50 | if default_params.prompt_text is not None: 51 | self.default_params.prompt_text = default_params.prompt_text 52 | if default_params.prompt_language is not None: 53 | self.default_params.prompt_language = default_params.prompt_language 54 | if default_params.cut_punc is not None: 55 | self.default_params.cut_punc = default_params.cut_punc 56 | if default_params.top_k is not None: 57 | self.default_params.top_k = default_params.top_k 58 | if default_params.top_p is not None: 59 | self.default_params.top_p = default_params.top_p 60 | if default_params.temperature is not None: 61 | self.default_params.temperature = default_params.temperature 62 | if default_params.speed is not None: 63 | self.default_params.speed = default_params.speed 64 | if default_params.sample_steps is not None: 65 | self.default_params.sample_steps = default_params.sample_steps 66 | if default_params.if_sr is not None: 67 | self.default_params.if_sr = default_params.if_sr 68 | if default_params.inp_refs is not None: 69 | self.default_params.inp_refs = default_params.inp_refs 70 | 71 | def get_real_params(self, web_params: InferenceParams): 72 | return InferenceParams( 73 | refer_wav_path=get_params(web_params.refer_wav_path, self.default_params.refer_wav_path, None), 74 | prompt_text=get_params(web_params.prompt_text, self.default_params.prompt_text, None), 75 | prompt_language=get_params(web_params.prompt_language, self.default_params.prompt_language, None), 76 | cut_punc=get_params(web_params.cut_punc, self.default_params.cut_punc, None), 77 | top_k=get_params(web_params.top_k, self.default_params.top_k, 10), 78 | top_p=get_params(web_params.top_p, self.default_params.top_p, 1.0), 79 | temperature=get_params(web_params.temperature, self.default_params.temperature, 1.0), 80 | speed=get_params(web_params.speed, self.default_params.speed, 1.0), 81 | sample_steps=get_params(web_params.sample_steps, self.default_params.sample_steps, 32), 82 | if_sr=get_params(web_params.if_sr, self.default_params.if_sr, False), 83 | inp_refs=get_params(web_params.inp_refs, self.default_params.inp_refs, []), 84 | ) 85 | 86 | 87 | def get_params(web_params, exists_params, default_params): 88 | if web_params is not None: 89 | return web_params 90 | if exists_params is not None: 91 | return exists_params 92 | return default_params 93 | -------------------------------------------------------------------------------- /server/bean/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/__init__.py -------------------------------------------------------------------------------- /server/bean/base_model.py: -------------------------------------------------------------------------------- 1 | class BaseModel: 2 | @staticmethod 3 | def snake_to_camel(snake_str): 4 | """Converts a snake_case string to camelCase.""" 5 | components = snake_str.split('_') 6 | # 如果字符串只有一个部分,则保持不变;否则首字母大写并拼接 7 | return components[0] + ''.join(x.title() for x in components[1:]) 8 | 9 | def to_camel_case_dict(self): 10 | """Converts the instance attributes to a dictionary with camelCase keys.""" 11 | return {self.snake_to_camel(k): to_camel_case_dict_if_is_base_model(v) for k, v in self.__dict__.items()} 12 | 13 | 14 | def to_camel_case_dict_if_is_base_model(item): 15 | if isinstance(item, BaseModel) and hasattr(item, 'to_camel_case_dict'): 16 | return item.to_camel_case_dict() 17 | if isinstance(item, list): 18 | return convert_list_to_camel_case_dicts(item) 19 | else: 20 | return item 21 | 22 | 23 | def convert_list_to_camel_case_dicts(list): 24 | result = [] 25 | for item in list: 26 | if isinstance(item, BaseModel): 27 | result.append(item.to_camel_case_dict()) 28 | else: 29 | result.append(item) 30 | return result 31 | -------------------------------------------------------------------------------- /server/bean/finished_product/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/finished_product/__init__.py -------------------------------------------------------------------------------- /server/bean/finished_product/finished_product_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from server.bean.base_model import BaseModel 4 | from server.bean.inference_task.gpt_model import GptModel 5 | from server.bean.inference_task.vits_model import VitsModel 6 | from server.bean.sound_fusion.obj_sound_fusion_audio import ObjSoundFusionAudio 7 | from server.common.filter import Filter 8 | from server.util.util import ValidationUtils 9 | 10 | 11 | class ObjFinishedProductManager(BaseModel): 12 | """ 13 | 成品管理类,对应数据库表 tab_obj_finished_product_manager 14 | """ 15 | 16 | def __init__(self, 17 | id: int = None, # 自增编号 18 | name: str = None, # 成品名称 19 | category: str = None, # 分类 20 | gpt_sovits_version: str = None, # 模型版本 21 | gpt_model_name: str = None, # GPT模型名称 22 | gpt_model_path: str = None, # GPT模型路径 23 | vits_model_name: str = None, # Vits模型名称 24 | vits_model_path: str = None, # Vits模型路径 25 | audio_id: int = None, # 音频id 26 | audio_name: str = None, # 音频名称 27 | audio_path: str = None, # 音频路径 28 | content: str = None, # 音频内容 29 | language: str = None, # 语言 30 | audio_length: int = None, # 音频长度 31 | top_k: int = None, # top_k值 32 | top_p: float = None, # top_p值 33 | temperature: float = None, # 温度 34 | text_delimiter: str = None, # 文本分隔符 35 | speed: float = None, # 语速 36 | sample_steps: int = None, # 采样步数 37 | if_sr: int = None, # 是否超分 38 | inp_refs: str = None, # 融合音频,json字符串 39 | score: int = None, # 评分 40 | remark: str = None, # 备注 41 | create_time=None): # 创建时间 42 | self.id = id # 自增编号 43 | self.name = name # 成品名称 44 | self.category = category # 分类 45 | self.gpt_sovits_version = gpt_sovits_version # 模型版本 46 | self.gpt_model_name = gpt_model_name # GPT模型名称 47 | self.gpt_model_path = gpt_model_path # GPT模型路径 48 | self.vits_model_name = vits_model_name # Vits模型名称 49 | self.vits_model_path = vits_model_path # Vits模型路径 50 | self.audio_id = audio_id # 音频id 51 | self.audio_name = audio_name # 音频名称 52 | self.audio_path = audio_path # 音频路径 53 | self.content = content # 音频内容 54 | self.language = language # 语言 55 | self.audio_length = audio_length # 音频长度 56 | self.top_k = top_k # top_k值 57 | self.top_p = top_p # top_p值 58 | self.temperature = temperature # 温度 59 | self.text_delimiter = text_delimiter # 文本分隔符 60 | self.speed = speed # 语速 61 | self.sample_steps = sample_steps # 采样步数 62 | self.if_sr = if_sr # 是否超分 63 | self.inp_refs = inp_refs # 融合音频,json字符串 64 | self.sound_fusion_list = [] # 融合音频 65 | self.score = score # 评分 66 | self.remark = remark # 备注 67 | self.create_time = create_time # 创建时间 68 | self.set_sound_fusion_list_from_json(self.inp_refs) 69 | 70 | def get_text_delimiter_safe_for_json(self): 71 | if not self.text_delimiter: 72 | return '' 73 | return self.text_delimiter.replace('"', '\\"') 74 | 75 | def set_sound_fusion_list(self, sound_fusion_list: list[ObjSoundFusionAudio]): 76 | self.sound_fusion_list = sound_fusion_list 77 | if sound_fusion_list is not None: 78 | self.inp_refs = json.dumps([x.to_dict() for x in sound_fusion_list]) 79 | else: 80 | self.inp_refs = None 81 | 82 | def get_audio_extension(self): 83 | if self.audio_path: 84 | _, ext = os.path.splitext(self.audio_path) 85 | return ext[1:] if ext else None # 去掉扩展名前的点 86 | return None 87 | 88 | def set_sound_fusion_list_from_json(self, inp_refs: str): 89 | if inp_refs is not None: 90 | dict_list = json.loads(inp_refs) 91 | self.sound_fusion_list = [ObjSoundFusionAudio.from_json_string(d) for d in dict_list] 92 | else: 93 | self.sound_fusion_list = [] 94 | 95 | def get_vits_model(self): 96 | return VitsModel(version=self.gpt_sovits_version, name=self.vits_model_name, path=self.vits_model_path) 97 | 98 | def get_gpt_model(self): 99 | return GptModel(version=self.gpt_sovits_version, name=self.gpt_model_name, path=self.gpt_model_path) 100 | 101 | 102 | class ObjFinishedProductManagerFilter(Filter): 103 | def __init__(self, form_data): 104 | super().__init__(form_data) 105 | self.id = form_data.get('id') 106 | self.ids = form_data.get('ids') 107 | self.name = form_data.get('name') 108 | self.category = form_data.get('category') 109 | self.category_list_str = form_data.get('category_list_str') 110 | 111 | def make_sql(self) -> []: 112 | sql = '' 113 | condition = [] 114 | if not ValidationUtils.is_empty(self.id): 115 | sql += f" and id = ? " 116 | condition.append(f"{self.id}") 117 | if not ValidationUtils.is_empty(self.ids): 118 | sql += f" and id in ({self.ids}) " 119 | if not ValidationUtils.is_empty(self.name): 120 | sql += f" and name like ? " 121 | condition.append(f"%{self.name}%") 122 | if not ValidationUtils.is_empty(self.category): 123 | sql += f" and category = ? " 124 | condition.append(f"%{self.category}%") 125 | if not ValidationUtils.is_empty(self.category_list_str): 126 | sql += f" and category in ({self.category_list_str}) " 127 | 128 | return sql, tuple(condition) 129 | -------------------------------------------------------------------------------- /server/bean/inference_task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/inference_task/__init__.py -------------------------------------------------------------------------------- /server/bean/inference_task/gpt_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from server.bean.base_model import BaseModel 3 | from server.dao.data_base_manager import db_config 4 | 5 | 6 | class GptModel(BaseModel): 7 | def __init__(self, version=None, name=None, path=None): 8 | self.version = version 9 | self.name = name 10 | self.path = path 11 | 12 | def equals(self, version, name): 13 | return self.version == version and self.name == name 14 | 15 | @staticmethod 16 | def create_dir(): 17 | os.makedirs(GptModel.get_base_v1_dir(), exist_ok=True) 18 | os.makedirs(GptModel.get_base_v2_dir(), exist_ok=True) 19 | os.makedirs(GptModel.get_base_v3_dir(), exist_ok=True) 20 | 21 | @staticmethod 22 | def get_base_v1_dir(): 23 | return f'{db_config.get_role_model_dir()}/GPT_weights' 24 | 25 | @staticmethod 26 | def get_base_v2_dir(): 27 | return f'{db_config.get_role_model_dir()}/GPT_weights_v2' 28 | 29 | @staticmethod 30 | def get_base_v3_dir(): 31 | return f'{db_config.get_role_model_dir()}/GPT_weights_v3' 32 | -------------------------------------------------------------------------------- /server/bean/inference_task/obj_inference_task.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.bean.inference_task.obj_inference_task_audio import ObjInferenceTaskAudio 3 | from server.bean.inference_task.obj_inference_task_compare_params import ObjInferenceTaskCompareParams 4 | from server.bean.inference_task.obj_inference_task_text import ObjInferenceTaskText 5 | from server.bean.sound_fusion.obj_inference_task_sound_fusion_audio import ObjInferenceTaskSoundFusionAudio 6 | from server.common.filter import Filter 7 | from server.util.util import ValidationUtils, str_to_int 8 | 9 | 10 | class ObjInferenceTask(BaseModel): 11 | def __init__(self, id=None, task_name=None, compare_type=None, gpt_sovits_version=None, 12 | gpt_model_name=None, vits_model_name=None, top_k=None, 13 | top_p=None, temperature=None, text_delimiter=None, 14 | speed=None, sample_steps=None, if_sr=None, 15 | other_parameters=None, create_time=None, 16 | inference_status=0, execute_text_similarity=0, execute_audio_similarity=0, 17 | conclusion: str = None, 18 | audio_list: list[ObjInferenceTaskAudio] = None, param_list: list[ObjInferenceTaskCompareParams] = None, 19 | text_list: list[ObjInferenceTaskText] = None, 20 | inp_refs_list: list[ObjInferenceTaskSoundFusionAudio] = None): 21 | self.id = id # 主键ID,允许从外部传入 22 | self.task_name = task_name # 任务名称 23 | self.compare_type = compare_type # 比较类型 24 | self.gpt_sovits_version = gpt_sovits_version # 模型版本 25 | self.gpt_model_name = gpt_model_name # GPT模型名称 26 | self.vits_model_name = vits_model_name # Vits模型名称 27 | self.top_k = top_k # top_k值 28 | self.top_p = top_p # top_p值 29 | self.temperature = temperature # 温度 30 | self.text_delimiter = text_delimiter # 文本分隔符 31 | self.speed = speed # 语速 32 | self.sample_steps = sample_steps # 采样步数 33 | self.if_sr = if_sr # 是否超分 34 | self.other_parameters = other_parameters # 其余参数 35 | self.inference_status = inference_status # 推理状态 0 待推理 1 推理中 2 推理完成 36 | self.execute_text_similarity = execute_text_similarity # 是否已执行文本相似度 0 否 1 是 37 | self.execute_audio_similarity = execute_audio_similarity # 是否已执行音频相似度 0 否 1 是 38 | self.conclusion = conclusion # 任务结论 39 | self.create_time = create_time # 创建时间,默认为当前时间 40 | self.audio_list = audio_list 41 | self.param_list = param_list 42 | self.text_list = text_list 43 | self.inp_refs_list = inp_refs_list 44 | self.result_audio_count = 0 45 | 46 | def __str__(self): 47 | return (f"Id: {self.id}, TaskName: {self.task_name}, CompareType: {self.compare_type}, " 48 | f"GptSovitsVersion: {self.gpt_sovits_version}, " 49 | f"GptModelName: {self.gpt_model_name}, " 50 | f"VitsModelName: {self.vits_model_name}, " 51 | f"TopK: {self.top_k}, TopP: {self.top_p}, " 52 | f"Temperature: {self.temperature}, TextDelimiter: {self.text_delimiter}, " 53 | f"Speed: {self.speed}, OtherParameters: {self.other_parameters}, " 54 | f"InferenceStatus: {self.inference_status}, ExecuteTextSimilarity: {self.execute_text_similarity}, " 55 | f"ExecuteAudioSimilarity: {self.execute_audio_similarity}, " 56 | f"CreateTime: {self.create_time}") 57 | 58 | 59 | class ObjInferenceTaskFilter(Filter): 60 | def __init__(self, form_data): 61 | super().__init__(form_data) 62 | self.id = form_data.get('id') 63 | self.ids = form_data.get('ids') 64 | self.task_name = form_data.get('task_name') 65 | self.compare_type = form_data.get('compare_type') 66 | self.inference_status = str_to_int(form_data.get('inference_status')) 67 | 68 | def make_sql(self) -> []: 69 | sql = '' 70 | condition = [] 71 | if not ValidationUtils.is_empty(self.id): 72 | sql += f" and id = ? " 73 | condition.append(f"{self.id}") 74 | 75 | if not ValidationUtils.is_empty(self.ids): 76 | sql += f" and id in ({self.ids}) " 77 | 78 | if not ValidationUtils.is_empty(self.task_name): 79 | sql += f" and TaskName like ? " 80 | condition.append(f"%{self.task_name}%") 81 | 82 | if not ValidationUtils.is_empty(self.compare_type): 83 | sql += f" and CompareType = ? " 84 | condition.append(f"{self.compare_type}") 85 | 86 | if self.inference_status is not None and self.inference_status > -1: 87 | sql += f" and InferenceStatus = ? " 88 | condition.append(f"{self.inference_status}") 89 | 90 | return sql, tuple(condition) 91 | -------------------------------------------------------------------------------- /server/bean/inference_task/obj_inference_task_audio.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.common.filter import Filter 3 | from server.util.util import ValidationUtils 4 | 5 | 6 | class ObjInferenceTaskAudio(BaseModel): 7 | def __init__(self, id=None, task_id=None, audio_id=None, audio_name=None,audio_length=0, 8 | audio_path=None, audio_content=None, audio_language=None, audio_category=None, create_time=None): 9 | self.id = id # 主键ID,允许从外部传入 10 | self.task_id = task_id # 推理任务id 11 | self.audio_id = audio_id # 音频id 12 | self.audio_name = audio_name # 音频名称 13 | self.audio_path = audio_path # 音频路径 14 | self.audio_content = audio_content # 音频内容 15 | self.audio_language = audio_language # 音频语种 16 | self.audio_category = audio_category # 音频分类 17 | self.audio_length = audio_length # 音频时长 18 | self.create_time = create_time # 创建时间,默认为当前时间 19 | 20 | def __str__(self): 21 | return (f"Id: {self.id}, TaskId: {self.task_id}, " 22 | f"AudioId: {self.audio_id}, AudioName: {self.audio_name}, AudioLength: {self.audio_length}," 23 | f"AudioPath: {self.audio_path}, AudioContent: {self.audio_content}, AudioCategory: {self.audio_category}," 24 | f"AudioLanguage: {self.audio_language}, CreateTime: {self.create_time}") 25 | 26 | class ObjInferenceTaskAudioFilter(Filter): 27 | def __init__(self, form_data): 28 | super().__init__(form_data) 29 | self.id = form_data.get('id') 30 | self.ids = form_data.get('ids') 31 | self.task_id = form_data.get('task_id') 32 | self.result_audio_id = form_data.get('result_audio_id') 33 | 34 | def make_sql(self) -> []: 35 | sql = '' 36 | condition = [] 37 | if not ValidationUtils.is_empty(self.id): 38 | sql += f" and id = ? " 39 | condition.append(f"{self.id}") 40 | 41 | if not ValidationUtils.is_empty(self.ids): 42 | sql += f" and id in ({self.ids}) " 43 | 44 | if not ValidationUtils.is_empty(self.task_id): 45 | sql += f" and taskId = ? " 46 | condition.append(f"{self.task_id}") 47 | 48 | if not ValidationUtils.is_empty(self.result_audio_id): 49 | sql += f" and exists (select 1 from tab_obj_inference_task_result_audio where AudioId = ta.Id and Id = ? ) " 50 | condition.append(f"{self.result_audio_id}") 51 | 52 | return sql, tuple(condition) -------------------------------------------------------------------------------- /server/bean/inference_task/obj_inference_task_compare_params.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.bean.sound_fusion.obj_inference_task_sound_fusion_audio import ObjInferenceTaskSoundFusionAudio 3 | from server.common.filter import Filter 4 | from server.util.util import ValidationUtils 5 | 6 | 7 | class ObjInferenceTaskCompareParams(BaseModel): 8 | def __init__(self, id=None, task_id=None, audio_category=None, gpt_sovits_version=None, 9 | gpt_model_name=None, vits_model_name=None, top_k=None, 10 | top_p=None, temperature=None, text_delimiter=None, 11 | speed=None, sample_steps=None, if_sr=None, 12 | other_parameters=None, create_time=None, 13 | inp_refs_list: list[ObjInferenceTaskSoundFusionAudio] = None): 14 | self.id = id # 主键ID,允许从外部传入 15 | self.task_id = task_id # 任务id 16 | self.audio_category = audio_category # 音频分类 17 | self.gpt_sovits_version = gpt_sovits_version # 模型版本 18 | self.gpt_model_name = gpt_model_name # GPT模型名称 19 | self.vits_model_name = vits_model_name # Vits模型名称 20 | self.top_k = top_k # top_k值 21 | self.top_p = top_p # top_p值 22 | self.temperature = temperature # 温度 23 | self.text_delimiter = text_delimiter # 文本分隔符 24 | self.speed = speed # 语速 25 | self.sample_steps = sample_steps # 采样步数 26 | self.if_sr = if_sr # 是否超分 27 | self.other_parameters = other_parameters # 其余参数 28 | self.create_time = create_time # 创建时间,默认为当前时间 29 | self.inp_refs_list = inp_refs_list 30 | self.index = 0 # 分组索引 31 | 32 | def __str__(self): 33 | return (f"Id: {self.id}, TaskId: {self.task_id}, AudioCategory: {self.audio_category}," 34 | f"GptSovitsVersion: {self.gpt_sovits_version}, " 35 | f"GptModelName: {self.gpt_model_name}, " 36 | f"VitsModelName: {self.vits_model_name}, " 37 | f"TopK: {self.top_k}, TopP: {self.top_p}, " 38 | f"Temperature: {self.temperature}, TextDelimiter: {self.text_delimiter}, " 39 | f"Speed: {self.speed}, OtherParameters: {self.other_parameters}, " 40 | f"CreateTime: {self.create_time}") 41 | 42 | 43 | class ObjInferenceTaskCompareParamsFilter(Filter): 44 | def __init__(self, form_data): 45 | super().__init__(form_data) 46 | self.id = form_data.get('id') 47 | self.ids = form_data.get('ids') 48 | self.task_id = form_data.get('task_id') 49 | 50 | def make_sql(self) -> []: 51 | sql = '' 52 | condition = [] 53 | if not ValidationUtils.is_empty(self.id): 54 | sql += f" and id = ? " 55 | condition.append(f"{self.id}") 56 | 57 | if not ValidationUtils.is_empty(self.ids): 58 | sql += f" and id in ({self.ids}) " 59 | 60 | if not ValidationUtils.is_empty(self.task_id): 61 | sql += f" and taskId = ? " 62 | condition.append(f"{self.task_id}") 63 | 64 | return sql, tuple(condition) -------------------------------------------------------------------------------- /server/bean/inference_task/obj_inference_task_text.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.common.filter import Filter 3 | from server.util.util import ValidationUtils 4 | 5 | 6 | class ObjInferenceTaskText(BaseModel): 7 | def __init__(self, id=None, task_id=None, text_id=None, text_content=None, category=None, 8 | text_language=None, create_time=None): 9 | self.id = id # 主键ID,允许从外部传入 10 | self.task_id = task_id # 推理任务id 11 | self.text_id = text_id # 推理文本id 12 | self.category = category # 文本分类 13 | self.text_content = text_content # 推理文本 14 | self.text_language = text_language # 文本语种 15 | self.create_time = create_time # 创建时间,默认为当前时间 16 | 17 | def __str__(self): 18 | return (f"Id: {self.id}, TaskId: {self.task_id}, Category: {self.category}, " 19 | f"TextId: {self.text_id}, TextContent: {self.text_content}, " 20 | f"TextLanguage: {self.text_language}, CreateTime: {self.create_time}") 21 | 22 | 23 | class ObjInferenceTaskTextFilter(Filter): 24 | def __init__(self, form_data): 25 | super().__init__(form_data) 26 | self.id = form_data.get('id') 27 | self.ids = form_data.get('ids') 28 | self.task_id = form_data.get('task_id') 29 | 30 | def make_sql(self) -> []: 31 | sql = '' 32 | condition = [] 33 | if not ValidationUtils.is_empty(self.id): 34 | sql += f" and id = ? " 35 | condition.append(f"{self.id}") 36 | 37 | if not ValidationUtils.is_empty(self.ids): 38 | sql += f" and id in ({self.ids}) " 39 | 40 | if not ValidationUtils.is_empty(self.task_id): 41 | sql += f" and taskId = ? " 42 | condition.append(f"{self.task_id}") 43 | 44 | return sql, tuple(condition) -------------------------------------------------------------------------------- /server/bean/inference_task/task_cell.py: -------------------------------------------------------------------------------- 1 | from server.bean.inference_task.gpt_model import GptModel 2 | from server.bean.inference_task.vits_model import VitsModel 3 | from server.bean.result_evaluation.obj_inference_task_result_audio import ObjInferenceTaskResultAudio 4 | 5 | 6 | class TaskCell: 7 | def __init__(self, gpt_model: GptModel = None, vits_model: VitsModel = None, 8 | task_result_audio_list: list[ObjInferenceTaskResultAudio] = None): 9 | self.gpt_model = gpt_model 10 | self.vits_model = vits_model 11 | self.task_result_audio_list = task_result_audio_list 12 | -------------------------------------------------------------------------------- /server/bean/inference_task/vits_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from server.bean.base_model import BaseModel 4 | from server.dao.data_base_manager import db_config 5 | 6 | 7 | class VitsModel(BaseModel): 8 | def __init__(self, version=None, name=None, path=None): 9 | self.version = version 10 | self.name = name 11 | self.path = path 12 | 13 | def equals(self, version, name): 14 | return self.version == version and self.name == name 15 | 16 | @staticmethod 17 | def create_dir(): 18 | os.makedirs(VitsModel.get_base_v1_dir(), exist_ok=True) 19 | os.makedirs(VitsModel.get_base_v2_dir(), exist_ok=True) 20 | os.makedirs(VitsModel.get_base_v3_dir(), exist_ok=True) 21 | 22 | @staticmethod 23 | def get_base_v1_dir(): 24 | return f'{db_config.get_role_model_dir()}/SoVITS_weights' 25 | 26 | @staticmethod 27 | def get_base_v2_dir(): 28 | return f'{db_config.get_role_model_dir()}/SoVITS_weights_v2' 29 | 30 | @staticmethod 31 | def get_base_v3_dir(): 32 | return f'{db_config.get_role_model_dir()}/SoVITS_weights_v3' 33 | -------------------------------------------------------------------------------- /server/bean/long_text_inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/long_text_inference/__init__.py -------------------------------------------------------------------------------- /server/bean/reference_audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/reference_audio/__init__.py -------------------------------------------------------------------------------- /server/bean/reference_audio/obj_inference_category.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | 3 | 4 | class ObjInferenceCategory(BaseModel): 5 | def __init__(self, id=None, name=None, create_time=None): 6 | self.id = id 7 | self.name = name 8 | self.create_time = create_time # 创建时间,默认为当前时间 -------------------------------------------------------------------------------- /server/bean/reference_audio/obj_reference_audio.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.common.filter import Filter 3 | from server.util.util import ValidationUtils, str_to_int 4 | 5 | 6 | class ObjReferenceAudio(BaseModel): 7 | def __init__(self, id=None, audio_name=None, audio_path=None, content=None, 8 | language=None, category=None, audio_length=None, 9 | valid_or_not=None, md5_value=None, is_manual_calib=None, file_size=None, 10 | score=None, long_text_score=None, remark=None, 11 | create_time=None): 12 | self.id = id # 主键ID,允许从外部传入 13 | self.audio_name = audio_name # 音频名称 14 | self.audio_path = audio_path # 音频路径 15 | self.content = content # 音频内容 16 | self.language = language # 音频语种 17 | self.category = category # 音频分类 18 | self.audio_length = audio_length # 音频时长 19 | self.valid_or_not = valid_or_not # 是否有效 1 有效 0 无效 20 | self.md5_value = md5_value # md5值 21 | self.is_manual_calib = is_manual_calib # 是否人工校准 1 是; 0 否 22 | self.file_size = file_size # 文件大小 23 | self.score = score # 评分 24 | self.long_text_score = long_text_score # 长文评分 25 | self.remark = remark # 备注 26 | self.create_time = create_time # 创建时间,默认为当前时间 27 | 28 | def __str__(self): 29 | return (f"Id: {self.id}, AudioName: {self.audio_name}, " 30 | f"AudioPath: {self.audio_path}, Content: {self.content}, " 31 | f"Language: {self.language}, Category: {self.category}," 32 | f"ValidOrNot: {self.valid_or_not}, Md5Value: {self.md5_value}, " 33 | f"IsManualCalib: {self.is_manual_calib}, FileSize: {self.file_size}, " 34 | f"Score: {self.score}, LongTextScore: {self.long_text_score}, remark: {self.remark}, " 35 | f"AudioLength: {self.audio_length}, CreateTime: {self.create_time}") 36 | 37 | 38 | class ObjReferenceAudioFilter(Filter): 39 | def __init__(self, form_data): 40 | super().__init__(form_data) 41 | self.id = form_data.get('id') 42 | self.audio_ids_str = form_data.get('audio_ids_str') 43 | self.audio_name = form_data.get('audio_name') 44 | self.content = form_data.get('content') 45 | self.category = form_data.get('category') 46 | self.isManual_calib = str_to_int(form_data.get('isManualCalib'), -1) 47 | self.categories = form_data.get('categories') 48 | self.valid = str_to_int(form_data.get('valid'), -1) 49 | self.language = form_data.get('language') 50 | self.category_list_str = form_data.get('category_list_str') 51 | 52 | def make_sql(self) -> []: 53 | sql = '' 54 | condition = [] 55 | if not ValidationUtils.is_empty(self.id): 56 | sql += f" and id = ? " 57 | condition.append(f"{self.id}") 58 | if not ValidationUtils.is_empty(self.audio_ids_str): 59 | sql += f" and id in ({self.audio_ids_str}) " 60 | if not ValidationUtils.is_empty(self.audio_name): 61 | sql += f" and AudioName like ? " 62 | condition.append(f"%{self.audio_name}%") 63 | if not ValidationUtils.is_empty(self.content): 64 | sql += f" and content like ? " 65 | condition.append(f"%{self.content}%") 66 | if not ValidationUtils.is_empty(self.category): 67 | sql += f" and category = ? " 68 | condition.append(f"{self.category}") 69 | if self.isManual_calib > -1: 70 | sql += f" and IsManualCalib = ? " 71 | condition.append(f"{self.isManual_calib}") 72 | if not ValidationUtils.is_empty(self.categories): 73 | sql += f" and category in ({self.categories}) " 74 | if self.valid > -1: 75 | sql += f" and ValidOrNot = ? " 76 | condition.append(f"{self.valid}") 77 | if not ValidationUtils.is_empty(self.category_list_str): 78 | sql += f" and category in ({self.category_list_str}) " 79 | if not ValidationUtils.is_empty(self.language): 80 | sql += f" and language = ? " 81 | condition.append(f"{self.language}") 82 | return sql, tuple(condition) 83 | -------------------------------------------------------------------------------- /server/bean/reference_audio/obj_reference_audio_compare_detail.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.bean.reference_audio.obj_reference_audio import ObjReferenceAudio 3 | 4 | 5 | class ObjReferenceAudioCompareDetail(BaseModel): 6 | def __init__(self, id=None, task_id=None, compare_audio_id=None, score=None, create_time=None, 7 | compare_audio: ObjReferenceAudio = None): 8 | self.id = id # 自增编号 9 | self.task_id = task_id # 比对任务id 10 | self.compare_audio_id = compare_audio_id # 被比较的音频id 11 | self.score = score # 相似度分值 12 | self.create_time = create_time # 创建时间,默认为当前时间 13 | self.compare_audio = compare_audio 14 | 15 | def __repr__(self): 16 | return f"ReferenceAudioCompareDetail(id={self.id}, task_id={self.task_id}, " \ 17 | f"compare_audio_id={self.compare_audio_id}, score={self.score}, " \ 18 | f"create_time='{self.create_time}')" 19 | -------------------------------------------------------------------------------- /server/bean/reference_audio/obj_reference_audio_compare_task.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | 3 | 4 | class ObjReferenceAudioCompareTask(BaseModel): 5 | def __init__(self, id=None, audio_id=None, category_names=None, status=0, remark='', create_time=None): 6 | self.id = id # 自增编号 7 | self.audio_id = audio_id # 音频id 8 | self.category_names = category_names # 比对目录名称 9 | self.status = status # 任务状态:0 待执行 1 执行中 2 已完成 3 失败 10 | self.remark = remark # 备注 11 | self.create_time = create_time # 创建时间,默认为当前时间 12 | 13 | def __repr__(self): 14 | return f"ReferenceAudioCompareTask(id={self.id}, audio_id={self.audio_id}, status={self.status}, remark='{self.remark}', " \ 15 | f"category_names='{self.category_names}', create_time='{self.create_time}')" 16 | -------------------------------------------------------------------------------- /server/bean/result_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/result_evaluation/__init__.py -------------------------------------------------------------------------------- /server/bean/sound_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/sound_fusion/__init__.py -------------------------------------------------------------------------------- /server/bean/sound_fusion/obj_inference_task_sound_fusion_audio.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | 3 | 4 | class ObjInferenceTaskSoundFusionAudio(BaseModel): 5 | def __init__(self, id=None, task_id=0, compare_param_id=0, audio_id=0, 6 | role_name='', audio_name='', audio_path='', content='', 7 | language='', category='', audio_length=0, remark='', 8 | create_time=None): 9 | self.id = id # 自增编号 10 | self.task_id = task_id # 任务ID 11 | self.compare_param_id = compare_param_id # 对比参数ID 12 | self.audio_id = audio_id # 融合音频ID 13 | self.role_name = role_name # 角色名称 14 | self.audio_name = audio_name # 音频名称 15 | self.audio_path = audio_path # 音频路径 16 | self.content = content # 音频内容 17 | self.language = language # 音频语种 18 | self.category = category # 音频分类 19 | self.audio_length = audio_length # 音频时长 20 | self.remark = remark # 备注 21 | self.create_time = create_time # 创建时间 22 | 23 | def __str__(self): 24 | return f"TabObjInferenceTaskSoundFusionAudio(id={self.id}, task_id={self.task_id}, " \ 25 | f"compare_param_id={self.compare_param_id}, audio_id={self.audio_id}, " \ 26 | f"role_name='{self.role_name}', audio_name='{self.audio_name}', " \ 27 | f"audio_path='{self.audio_path}', content='{self.content}', " \ 28 | f"language='{self.language}', category='{self.category}', " \ 29 | f"audio_length={self.audio_length}, remark='{self.remark}', " \ 30 | f"create_time={self.create_time})" 31 | -------------------------------------------------------------------------------- /server/bean/sound_fusion/obj_sound_fusion_audio.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from server.bean.base_model import BaseModel 5 | from server.common.filter import Filter 6 | from server.util.util import ValidationUtils 7 | 8 | 9 | class ObjSoundFusionAudio(BaseModel): 10 | def __init__(self, id=None, role_name='', audio_name='', audio_path='', 11 | content='', language='', category='', audio_length=0, 12 | remark='', create_time=None): 13 | self.id = id # 自增编号 14 | self.role_name = role_name # 角色名称 15 | self.audio_name = audio_name # 音频名称 16 | self.audio_path = audio_path # 音频路径 17 | self.content = content # 音频内容 18 | self.language = language # 音频语种 19 | self.category = category # 音频分类 20 | self.audio_length = audio_length # 音频时长 21 | self.remark = remark # 备注 22 | self.create_time = create_time # 创建时间 23 | 24 | def to_dict(self): 25 | return { 26 | 'id': self.id, 27 | 'role_name': self.role_name, 28 | 'audio_name': self.audio_name, 29 | 'audio_path': self.audio_path, 30 | 'content': self.content, 31 | 'language': self.language, 32 | 'category': self.category, 33 | 'audio_length': self.audio_length, 34 | 'remark': self.remark, 35 | 'create_time': self.create_time 36 | } 37 | 38 | def get_audio_extension(self): 39 | if self.audio_path: 40 | _, ext = os.path.splitext(self.audio_path) 41 | return ext[1:] if ext else None # 去掉扩展名前的点 42 | return None 43 | 44 | def to_json_string(self): 45 | return json.dumps(self.to_dict()) 46 | 47 | @classmethod 48 | def from_json_string(cls, json_str): 49 | data = json.loads(json_str) 50 | return cls( 51 | id=data.get('id'), 52 | role_name=data.get('role_name'), 53 | audio_name=data.get('audio_name'), 54 | audio_path=data.get('audio_path'), 55 | content=data.get('content'), 56 | language=data.get('language'), 57 | category=data.get('category'), 58 | audio_length=data.get('audio_length'), 59 | remark=data.get('remark'), 60 | create_time=data.get('create_time') 61 | ) 62 | 63 | def __str__(self): 64 | return f"TabObjSoundFusionAudio(id={self.id}, role_name='{self.role_name}', " \ 65 | f"audio_name='{self.audio_name}', audio_path='{self.audio_path}', " \ 66 | f"content='{self.content}', language='{self.language}', " \ 67 | f"category='{self.category}', audio_length={self.audio_length}, " \ 68 | f"remark='{self.remark}', create_time={self.create_time})" 69 | 70 | 71 | class ObjSoundFusionAudioFilter(Filter): 72 | def __init__(self, form_data): 73 | super().__init__(form_data) 74 | self.id = form_data.get('id') 75 | self.audio_ids_str = form_data.get('audio_ids_str') 76 | self.role_name = form_data.get('role_name') 77 | self.audio_name = form_data.get('audio_name') 78 | self.content = form_data.get('content') 79 | self.category = form_data.get('category') 80 | self.language = form_data.get('language') 81 | self.category_list_str = form_data.get('category_list_str') 82 | 83 | def make_sql(self) -> []: 84 | sql = '' 85 | condition = [] 86 | if not ValidationUtils.is_empty(self.id): 87 | sql += f" and id = ? " 88 | condition.append(f"{self.id}") 89 | if not ValidationUtils.is_empty(self.audio_ids_str): 90 | sql += f" and id in ({self.audio_ids_str}) " 91 | if not ValidationUtils.is_empty(self.role_name): 92 | sql += f" and RoleName like ? " 93 | condition.append(f"%{self.role_name}%") 94 | if not ValidationUtils.is_empty(self.audio_name): 95 | sql += f" and AudioName like ? " 96 | condition.append(f"%{self.audio_name}%") 97 | 98 | if not ValidationUtils.is_empty(self.content): 99 | sql += f" and content like ? " 100 | condition.append(f"%{self.content}%") 101 | 102 | if not ValidationUtils.is_empty(self.category): 103 | sql += f" and category = ? " 104 | condition.append(f"{self.category}") 105 | if not ValidationUtils.is_empty(self.category_list_str): 106 | sql += f" and category in ({self.category_list_str}) " 107 | 108 | if not ValidationUtils.is_empty(self.language): 109 | sql += f" and language = ? " 110 | condition.append(f"{self.language}") 111 | 112 | return sql, tuple(condition) 113 | -------------------------------------------------------------------------------- /server/bean/system/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/system/__init__.py -------------------------------------------------------------------------------- /server/bean/system/role.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from server.bean.base_model import BaseModel 4 | 5 | 6 | class Role(BaseModel): 7 | def __init__(self, category: str = '默认', name: str = None): 8 | self.category = category # 分类 9 | self.name = name # 名称 10 | 11 | def to_dict(self): 12 | return { 13 | 'category': self.category, 14 | 'name': self.name 15 | } 16 | 17 | def to_json_string(self): 18 | return json.dumps(self.to_dict()) 19 | 20 | @classmethod 21 | def from_json_string(cls, json_str): 22 | data = json.loads(json_str) 23 | return cls(category=data['category'], name=data['name']) 24 | 25 | def __repr__(self): 26 | return f"Role(category='{self.category}', name='{self.name}')" 27 | -------------------------------------------------------------------------------- /server/bean/system/role_category.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.bean.system.role import Role 3 | 4 | 5 | class RoleCategory(BaseModel): 6 | def __init__(self, category: str = '默认', role_list: list[Role] = None): 7 | self.category = category # 分类 8 | self.role_list = role_list # 角色列表 9 | 10 | def __repr__(self): 11 | return f"Role(category='{self.category}')" 12 | -------------------------------------------------------------------------------- /server/bean/system/sys_cache.py: -------------------------------------------------------------------------------- 1 | class SysCache: 2 | def __init__(self, type=None, key_name=None, value=None): 3 | self.type = type # 类型 4 | self.key_name = key_name # key 5 | self.value = value # 值 6 | 7 | def __repr__(self): 8 | return f"TabSysCache(type='{self.type}', key_name='{self.key_name}', value='{self.value}')" -------------------------------------------------------------------------------- /server/bean/system/sys_cache_constants.py: -------------------------------------------------------------------------------- 1 | class SystemConstants: 2 | CACHE_TYPE = 'system' 3 | CACHE_KEY_ROLE = 'role' 4 | 5 | class TextConstants: 6 | CACHE_TYPE = 'text' 7 | CACHE_KEY_LAST_SELECTED_ID = 'last_selected_id' -------------------------------------------------------------------------------- /server/bean/text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/text/__init__.py -------------------------------------------------------------------------------- /server/bean/text/obj_inference_text.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.common.filter import Filter 3 | from server.util.util import ValidationUtils 4 | 5 | 6 | class ObjInferenceText(BaseModel): 7 | def __init__(self, id=None, category=None, text_content=None, text_language=None, create_time=None): 8 | self.id = id 9 | self.category = category # 文本分类 10 | self.text_content = text_content # 推理文本 11 | self.text_language = text_language # 文本语种 12 | self.create_time = create_time 13 | 14 | def __repr__(self): 15 | return f", " \ 17 | f"TextLanguage='{self.text_language}', CreateTime='{self.create_time}')>" 18 | 19 | 20 | class ObjInferenceTextFilter(Filter): 21 | def __init__(self, form_data): 22 | super().__init__(form_data) 23 | self.id = form_data.get('id') 24 | self.ids = form_data.get('ids') 25 | self.category = form_data.get('category') 26 | self.text_content = form_data.get('text_content') 27 | self.text_language = form_data.get('text_language') 28 | 29 | def make_sql(self) -> []: 30 | sql = '' 31 | condition = [] 32 | if not ValidationUtils.is_empty(self.id): 33 | sql += f" and id = ? " 34 | condition.append(f"{self.id}") 35 | if not ValidationUtils.is_empty(self.ids): 36 | sql += f" and id in ({self.ids}) " 37 | if not ValidationUtils.is_empty(self.text_content): 38 | sql += f" and TextContent like ? " 39 | condition.append(f"%{self.text_content}%") 40 | 41 | if not ValidationUtils.is_empty(self.category): 42 | sql += f" and category = ? " 43 | condition.append(f"{self.category}") 44 | 45 | if not ValidationUtils.is_empty(self.text_language): 46 | sql += f" and TextLanguage = ? " 47 | condition.append(f"{self.text_language}") 48 | 49 | return sql, tuple(condition) -------------------------------------------------------------------------------- /server/bean/tts_correction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/bean/tts_correction/__init__.py -------------------------------------------------------------------------------- /server/bean/tts_correction/obj_tts_correction_task.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel 2 | from server.bean.finished_product.finished_product_manager import ObjFinishedProductManager 3 | from server.bean.text.obj_inference_text import ObjInferenceText 4 | from server.bean.tts_correction.obj_tts_correction_task_detail import ObjTtsCorrectionTaskDetail 5 | from server.common.filter import Filter 6 | from server.util.util import str_to_int, ValidationUtils 7 | 8 | 9 | class ObjTtsCorrectionTask(BaseModel): 10 | def __init__(self, id=0, task_name=None, text_id=None, product_id=None, inference_status=None, remark=None, 11 | create_time=None,detail_list: list[ObjTtsCorrectionTaskDetail] = None, 12 | product: ObjFinishedProductManager = None, text_obj: ObjInferenceText = None): 13 | self.id = id # 自增编号 14 | self.task_name = task_name # 任务名称 15 | self.text_id = text_id # 推理文本id 16 | self.product_id = product_id # 成品Id 17 | self.inference_status = inference_status # 推理状态 0 待推理 1 推理中 2 推理完成 18 | self.remark = remark # 备注 19 | self.create_time = create_time # 创建时间 20 | self.detail_count = 0 21 | self.detail_list = detail_list 22 | self.product = product 23 | self.text_obj = text_obj 24 | 25 | def __str__(self): 26 | return (f"TabObjTtsCorrectionTask(id={self.id}, task_name='{self.task_name}', " 27 | f"text_id={self.text_id}, product_id={self.product_id}, " 28 | f"inference_status={self.inference_status}, remark='{self.remark}', " 29 | f"create_time='{self.create_time}')") 30 | 31 | 32 | class ObjTtsCorrectionTaskFilter(Filter): 33 | def __init__(self, form_data): 34 | super().__init__(form_data) 35 | self.id = form_data.get('id') 36 | self.task_name = form_data.get('task_name') 37 | self.inference_status = str_to_int(form_data.get('inference_status')) 38 | 39 | def make_sql(self) -> []: 40 | sql = '' 41 | condition = [] 42 | if not ValidationUtils.is_empty(self.id): 43 | sql += f" and id = ? " 44 | condition.append(f"{self.id}") 45 | 46 | if not ValidationUtils.is_empty(self.task_name): 47 | sql += f" and TaskName like ? " 48 | condition.append(f"%{self.task_name}%") 49 | 50 | if self.inference_status is not None and self.inference_status > -1: 51 | sql += f" and InferenceStatus = ? " 52 | condition.append(f"{self.inference_status}") 53 | 54 | return sql, tuple(condition) 55 | -------------------------------------------------------------------------------- /server/bean/tts_correction/obj_tts_correction_task_detail.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from server.common.ras_api_monitor import InferenceParams 4 | from server.bean.base_model import BaseModel 5 | from server.bean.finished_product.finished_product_manager import ObjFinishedProductManager 6 | from server.bean.text.obj_inference_text import ObjInferenceText 7 | from server.common.filter import Filter 8 | from server.dao.data_base_manager import db_config 9 | from server.util.util import ValidationUtils 10 | 11 | 12 | class ObjTtsCorrectionTaskDetail(BaseModel): 13 | def __init__(self, id=0, task_id=None, text_content=None, text_index=None, status=None, 14 | audio_path=None,audio_length=None, asr_text=None, asr_text_similarity=None, audio_status=None, create_time=None): 15 | self.id = id # 自增编号 16 | self.task_id = task_id # 任务id 17 | self.text_content = text_content # 待推理的文本内容 18 | self.text_index = text_index # 文本序号 19 | self.status = status # 推理状态 0 待推理;1 推理中;2 已完成;3 失败 20 | self.audio_path = audio_path # 音频路径 21 | self.audio_length = audio_length # 音频时长 22 | self.asr_text = asr_text # asr文本 23 | self.asr_text_similarity = asr_text_similarity # 文本相似度 24 | self.audio_status = audio_status # 音频状态 0 未校验;1 推理正确;2 推理不正确 25 | self.create_time = create_time # 创建时间 26 | 27 | def get_audio_directory(self) -> str: 28 | return os.path.join(db_config.get_role_work_dir(), "tts_correction_task", f'task_{self.task_id}') 29 | 30 | def get_audio_file_path(self) -> str: 31 | return os.path.join(self.get_audio_directory(), f'detail_{self.id}.wav') 32 | 33 | def get_inference_params(self, product: ObjFinishedProductManager, text_obj: ObjInferenceText): 34 | 35 | return InferenceParams( 36 | refer_wav_path=product.audio_path, 37 | prompt_text=product.content, 38 | prompt_language=product.language, 39 | text=self.text_content, 40 | text_language=text_obj.text_language, 41 | cut_punc=product.text_delimiter, 42 | top_k=product.top_k, 43 | top_p=product.top_p, 44 | temperature=product.temperature, 45 | speed=product.speed, 46 | inp_refs=[p.audio_path for p in product.sound_fusion_list] if product.sound_fusion_list else [] 47 | ) 48 | 49 | def __str__(self): 50 | return (f"TabObjTtsCorrectionTaskDetail(id={self.id}, task_id={self.task_id}, " 51 | f"text_content='{self.text_content}', text_index={self.text_index}, " 52 | f"status={self.status}, audio_path='{self.audio_path}', " 53 | f"asr_text='{self.asr_text}', asr_text_similarity={self.asr_text_similarity}, " 54 | f"audio_status={self.audio_status}, create_time='{self.create_time}')") 55 | 56 | 57 | class ObjTtsCorrectionTaskDetailFilter(Filter): 58 | def __init__(self, form_data): 59 | super().__init__(form_data) 60 | self.task_id = form_data.get('task_id') 61 | 62 | def make_sql(self) -> []: 63 | sql = '' 64 | condition = [] 65 | if not ValidationUtils.is_empty(self.task_id): 66 | sql += f" and task_id = ? " 67 | condition.append(f"{self.task_id}") 68 | 69 | return sql, tuple(condition) 70 | -------------------------------------------------------------------------------- /server/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/common/__init__.py -------------------------------------------------------------------------------- /server/common/config_manager.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | import server.util.util as util 4 | 5 | 6 | class ParamReadWriteManager: 7 | def __init__(self): 8 | pass 9 | 10 | 11 | class ConfigManager: 12 | def __init__(self): 13 | self.config_path = 'server/config.ini' 14 | self.config = configparser.ConfigParser() 15 | self.config.read(self.config_path, encoding='utf-8') 16 | 17 | def get_log(self, key): 18 | return self.config.get('Log',key) 19 | 20 | def get_base(self, key): 21 | return self.config.get('Base', key) 22 | 23 | def print(self): 24 | # 打印所有配置 25 | for section in self.config.sections(): 26 | print('[{}]'.format(section)) 27 | for key in self.config[section]: 28 | print('{} = {}'.format(key, self.config[section][key])) 29 | print() 30 | 31 | 32 | _config = ConfigManager() 33 | _param_read_write_manager = ParamReadWriteManager() 34 | 35 | 36 | def get_config(): 37 | return _config 38 | 39 | 40 | def get_rw_param(): 41 | return _param_read_write_manager 42 | 43 | 44 | if __name__ == '__main__': 45 | print(_config.print()) 46 | -------------------------------------------------------------------------------- /server/common/config_params.py: -------------------------------------------------------------------------------- 1 | import server.common.config_manager as config_manager 2 | from server.util.util import str_to_int 3 | 4 | config = config_manager.get_config() 5 | 6 | # [Base] 7 | # 版本号 8 | version = config.get_base('version') 9 | # Gpt-Sovits2项目路径 10 | gsv2_dir = config.get_base('gsv2_dir') 11 | # 推理任务并发进程数量 12 | inference_process_num = str_to_int(config.get_base('inference_process_num'), 1) 13 | # service port 14 | service_port = str_to_int(config.get_base('service_port'), 8000) 15 | # api port 16 | api_port = str_to_int(config.get_base('api_port'), 8001) 17 | 18 | # [Log] 19 | # 日志保存目录路径 20 | log_dir = config.get_log('log_dir') 21 | # 日志级别 CRITICAL、FATAL、ERROR、WARNING、WARN、INFO、DEBUG、NOTSET、 22 | log_level = config.get_log('log_level') 23 | # 函数时间消耗日志打印类型 file 打印到文件; close 关闭 24 | time_log_print_type = config.get_log('time_log_print_type') 25 | # 函数时间消耗日志保存目录路径 26 | time_log_print_dir = config.get_log('time_log_print_dir') 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /server/common/custom_exception.py: -------------------------------------------------------------------------------- 1 | class CustomException(Exception): 2 | """自定义异常类""" 3 | 4 | def __init__(self, message): 5 | super().__init__(message) # 调用基类的构造函数 6 | self.message = message 7 | -------------------------------------------------------------------------------- /server/common/filter.py: -------------------------------------------------------------------------------- 1 | from server.util.util import str_to_int, ValidationUtils 2 | 3 | 4 | class Filter: 5 | def __init__(self, form_data): 6 | self.page = str_to_int(form_data.get('page'), 0) 7 | self.limit = str_to_int(form_data.get('limit'), 0) 8 | self.order_by = form_data.get('order') 9 | self.order_by_desc = form_data.get('desc') 10 | 11 | def get_order_by_sql(self) -> str: 12 | if ValidationUtils.is_empty(self.order_by): 13 | return "" 14 | return f" ORDER BY {self.order_by} {self.order_by_desc}" 15 | 16 | def get_limit_sql(self) -> str: 17 | if self.page < 1 or self.limit < 1: 18 | return "" 19 | return f" LIMIT {self.limit} OFFSET {(self.page - 1) * self.limit}" 20 | -------------------------------------------------------------------------------- /server/common/log_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import datetime 4 | import server.common.config_params as params 5 | 6 | 7 | def create_general_logger(): 8 | # 获取当前日期,用于文件名和日志内容 9 | current_date = datetime.datetime.now().strftime('%Y-%m-%d') 10 | 11 | # 创建一个用于控制台输出的处理器,并设置日志级别 12 | console_handler = logging.StreamHandler() 13 | # console_handler.setLevel(logging.INFO) 14 | # 可以设置控制台输出的格式 15 | console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 16 | console_handler.setFormatter(console_formatter) 17 | console_handler.encoding = 'utf-8' # 设置字符编码为utf-8 18 | 19 | os.makedirs(params.log_dir, exist_ok=True) 20 | 21 | # 创建一个用于常规日志的处理器 22 | general_handler = logging.FileHandler(f"{params.log_dir}/{current_date}.log", mode='a', encoding='utf-8') 23 | # general_handler.setLevel(logging.INFO) 24 | general_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 25 | general_handler.setFormatter(general_formatter) 26 | 27 | # 配置一个常规的logger 28 | general_logger = logging.getLogger('general') 29 | level = logging.getLevelName(params.log_level) 30 | general_logger.setLevel(level) 31 | general_logger.addHandler(console_handler) 32 | general_logger.addHandler(general_handler) 33 | 34 | # 配置根logger,以防万一 35 | logging.basicConfig(level=logging.WARNING, handlers=[general_handler]) 36 | 37 | return general_logger 38 | 39 | 40 | def create_performance_logger(): 41 | # 获取当前日期,用于文件名和日志内容 42 | current_date = datetime.datetime.now().strftime('%Y-%m-%d') 43 | 44 | os.makedirs(params.time_log_print_dir, exist_ok=True) 45 | 46 | # 创建一个专用于性能监控日志的处理器 47 | performance_handler = logging.FileHandler( 48 | f"{params.time_log_print_dir}/{current_date}.log", mode='a', encoding='utf-8') 49 | # performance_handler.setLevel(logging.INFO) 50 | performance_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 51 | performance_handler.setFormatter(performance_formatter) 52 | 53 | # 配置一个专门用于性能监控的logger 54 | performance_logger = logging.getLogger('performance') 55 | performance_logger.setLevel(logging.INFO) 56 | performance_logger.addHandler(performance_handler) 57 | 58 | return performance_logger 59 | 60 | 61 | def setup_logging(): 62 | return create_general_logger(), create_performance_logger() 63 | 64 | 65 | logger, p_logger = setup_logging() 66 | -------------------------------------------------------------------------------- /server/common/response_result.py: -------------------------------------------------------------------------------- 1 | from server.bean.base_model import BaseModel, convert_list_to_camel_case_dicts 2 | 3 | 4 | def convert_dict_to_camel_case_dicts(dict_obj: dict): 5 | 6 | result = {} 7 | for key, value in dict_obj.items(): 8 | if isinstance(value, BaseModel): 9 | result[key] = value.to_camel_case_dict() 10 | elif isinstance(value, list): 11 | result[key] = convert_list_to_camel_case_dicts(value) 12 | else: 13 | result[key] = value 14 | return result 15 | 16 | 17 | class ResponseResult: 18 | def __init__(self, code=0, msg="success", count=0, data=None): 19 | self.code = code 20 | self.msg = msg 21 | self.count = count 22 | self.data = None 23 | if isinstance(data, list): 24 | self.data = convert_list_to_camel_case_dicts(data if data is not None else []) 25 | else: 26 | if data is not None: 27 | if isinstance(data, BaseModel): 28 | self.data = data.to_camel_case_dict() 29 | elif isinstance(data, dict): 30 | self.data = convert_dict_to_camel_case_dicts(data) 31 | else: 32 | self.data = data 33 | 34 | def to_dict(self): 35 | return { 36 | "code": self.code, 37 | "msg": self.msg, 38 | "count": self.count, 39 | "data": self.data 40 | } 41 | 42 | def __str__(self): 43 | return f"ResponseResult(code={self.code}, msg='{self.msg}', count={self.count}, data={self.data})" 44 | -------------------------------------------------------------------------------- /server/common/time_util.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | from server.common.log_config import p_logger 4 | import server.common.config_params as params 5 | 6 | 7 | def timeit_decorator(func): 8 | """ 9 | 装饰器,用于计算被装饰函数的执行时间。 10 | 11 | 参数: 12 | func (function): 要计时的函数。 13 | 14 | 返回: 15 | function: 包含计时功能的新函数。 16 | """ 17 | 18 | def wrapper(*args, **kwargs): 19 | if params.time_log_print_type != 'file': 20 | return func(*args, **kwargs) 21 | 22 | start_time = time.perf_counter() # 使用 perf_counter 获取高精度计时起点 23 | 24 | func_result = func(*args, **kwargs) # 执行原函数 25 | 26 | end_time = time.perf_counter() # 获取计时终点 27 | elapsed_time = end_time - start_time # 计算执行耗时 28 | 29 | # 记录日志内容 30 | log_message = f"进程ID: {os.getpid()}, {func.__name__} 执行耗时: {elapsed_time:.6f} 秒" 31 | p_logger.info(log_message) 32 | 33 | return func_result 34 | 35 | return wrapper 36 | 37 | 38 | def time_monitor(func): 39 | """ 40 | 返回结果,追加时间 41 | """ 42 | 43 | def wrapper(*args, **kwargs): 44 | 45 | start_time = time.perf_counter() # 使用 perf_counter 获取高精度计时起点 46 | 47 | func_result = func(*args, **kwargs) # 执行原函数 48 | 49 | end_time = time.perf_counter() # 获取计时终点 50 | elapsed_time = end_time - start_time # 计算执行耗时 51 | 52 | return elapsed_time, func_result 53 | 54 | return wrapper 55 | 56 | 57 | # 使用装饰器 58 | @timeit_decorator 59 | def example_function(n): 60 | time.sleep(n) # 假设这是需要计时的函数,这里模拟耗时操作 61 | return n * 2 62 | 63 | 64 | def example_function2(n): 65 | time.sleep(n) # 假设这是需要计时的函数,这里模拟耗时操作 66 | return n * 2 67 | 68 | 69 | if __name__ == "__main__": 70 | # 调用经过装饰的函数 71 | # result = example_function(2) 72 | print(time_monitor(example_function2)(2)) 73 | -------------------------------------------------------------------------------- /server/config.ini: -------------------------------------------------------------------------------- 1 | # config.ini 2 | 3 | [Base] 4 | # 版本号 5 | version = 1.0.0 6 | # Gpt-Sovits2项目路径 7 | gsv2_dir = ../GPT-SoVITS-v3lora-20250228 8 | # 推理任务并发进程数量,增加可以提高推理任务的速度,但是可能会遇到奇怪的bug, 9 | inference_process_num = 1 10 | # RAST服务端口 11 | service_port = 9000 12 | # api服务端口 13 | api_port = 8002 14 | 15 | 16 | [Log] 17 | # 日志保存目录路径 18 | log_dir = log/general 19 | # 日志级别 CRITICAL、FATAL、ERROR、WARNING、WARN、INFO、DEBUG、NOTSET、 20 | log_level = INFO 21 | # 函数时间消耗日志打印类型 file 打印到文件; close 关闭 22 | time_log_print_type = file 23 | # 函数时间消耗日志保存目录路径 24 | time_log_print_dir = log/performance -------------------------------------------------------------------------------- /server/controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/__init__.py -------------------------------------------------------------------------------- /server/controller/finished_product/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/finished_product/__init__.py -------------------------------------------------------------------------------- /server/controller/inference_task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/inference_task/__init__.py -------------------------------------------------------------------------------- /server/controller/long_text_inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/long_text_inference/__init__.py -------------------------------------------------------------------------------- /server/controller/long_text_inference/long_text_inference_controller.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request 2 | 3 | from server.common.ras_api_monitor import RasApiMonitor 4 | from server.common.response_result import ResponseResult 5 | from server.service.inference_task.model_manager_service import ModelManagerService 6 | from server.util.util import str_to_int 7 | 8 | router = APIRouter(prefix="/inference") 9 | 10 | 11 | @router.post("/start_ras_api") 12 | async def start_ras_api(request: Request): 13 | data_form = await request.form() 14 | stream_mode = str_to_int(data_form.get('streamMode'),0) 15 | media_type = data_form.get('mediaType') 16 | if RasApiMonitor.start_service(stream_mode == 1, media_type): 17 | return ResponseResult(msg="api服务已启动") 18 | return ResponseResult(msg="api服务启动失败") 19 | 20 | 21 | @router.post("/stop_ras_api") 22 | async def stop_ras_api(): 23 | if RasApiMonitor.stop_service(): 24 | return ResponseResult(msg="api服务已关闭") 25 | return ResponseResult(msg="api服务关闭失败") 26 | 27 | 28 | @router.post("/load_models") 29 | async def load_models(request: Request): 30 | 31 | gpt_model_list = ModelManagerService.get_gpt_model_list() 32 | vits_model_list = ModelManagerService.get_vits_model_list() 33 | 34 | return ResponseResult(data={ 35 | "gptModels": gpt_model_list, 36 | "vitsModels": vits_model_list 37 | }) -------------------------------------------------------------------------------- /server/controller/reference_audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/reference_audio/__init__.py -------------------------------------------------------------------------------- /server/controller/result_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/result_evaluation/__init__.py -------------------------------------------------------------------------------- /server/controller/result_evaluation/result_evaluation_controller.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request 2 | 3 | from server.bean.result_evaluation.obj_inference_task_result_audio import ObjInferenceTaskResultAudioFilter, \ 4 | ObjInferenceTaskResultAudio 5 | from server.common.response_result import ResponseResult 6 | from server.service.inference_task.inference_task_service import InferenceTaskService 7 | from server.service.result_evaluation.result_evaluation_service import ResultEvaluationService 8 | from server.util.util import ValidationUtils 9 | 10 | router = APIRouter(prefix="/evaluation") 11 | 12 | 13 | @router.post("/get_result_evaluation_of_task_list") 14 | async def get_result_evaluation_of_task_list(request: Request): 15 | form_data = await request.form() 16 | audio_filter = ObjInferenceTaskResultAudioFilter(form_data) 17 | 18 | if audio_filter.task_id is None: 19 | return ResponseResult(code=1, msg="task_id is required") 20 | 21 | count = ResultEvaluationService.find_count(audio_filter) 22 | audio_list = ResultEvaluationService.find_list(audio_filter) 23 | task = InferenceTaskService.find_whole_inference_task_by_id(audio_filter.task_id) 24 | 25 | return ResponseResult(data={ 26 | "task": task, 27 | "audioList": audio_list 28 | }, count=count) 29 | 30 | 31 | @router.post("/get_result_evaluation_list") 32 | async def get_result_evaluation_list(request: Request): 33 | form_data = await request.form() 34 | audio_filter = ObjInferenceTaskResultAudioFilter(form_data) 35 | 36 | audio_filter.status = 1 37 | count = ResultEvaluationService.find_count(audio_filter) 38 | # audio_list = ResultEvaluationService.find_whole_list(audio_filter) 39 | audio_list = ResultEvaluationService.find_list2(audio_filter) 40 | 41 | return ResponseResult(data=audio_list, count=count) 42 | 43 | 44 | @router.post("/get_inference_task_result_audio_list") 45 | async def get_inference_task_result_audio_list(request: Request): 46 | form_data = await request.form() 47 | audio_filter = ObjInferenceTaskResultAudioFilter(form_data) 48 | 49 | count = ResultEvaluationService.find_count(audio_filter) 50 | audio_list = ResultEvaluationService.find_list(audio_filter) 51 | 52 | return ResponseResult(data=audio_list, count=count) 53 | 54 | 55 | @router.post("/get_inference_task_result_audio_list2") 56 | async def get_inference_task_result_audio_list2(request: Request): 57 | form_data = await request.form() 58 | audio_filter = ObjInferenceTaskResultAudioFilter(form_data) 59 | 60 | count = ResultEvaluationService.find_count(audio_filter) 61 | audio_list = ResultEvaluationService.find_list2(audio_filter) 62 | 63 | return ResponseResult(data=audio_list, count=count) 64 | 65 | 66 | @router.post("/update_result_audio_score") 67 | async def update_result_audio_score(request: Request): 68 | form_data = await request.form() 69 | result_audio_id = form_data.get('id') 70 | score = form_data.get('score') 71 | 72 | if ValidationUtils.is_empty(result_audio_id): 73 | return ResponseResult(code=1, msg="id is required") 74 | if ValidationUtils.is_empty(score): 75 | return ResponseResult(code=1, msg="score is required") 76 | 77 | ResultEvaluationService.update_result_audio_score(result_audio_id, score) 78 | 79 | return ResponseResult() 80 | 81 | 82 | @router.post("/update_result_audio_long_text_score") 83 | async def update_result_audio_long_text_score(request: Request): 84 | form_data = await request.form() 85 | result_audio_id = form_data.get('id') 86 | long_text_score = form_data.get('long_text_score') 87 | 88 | if ValidationUtils.is_empty(result_audio_id): 89 | return ResponseResult(code=1, msg="id is required") 90 | if ValidationUtils.is_empty(long_text_score): 91 | return ResponseResult(code=1, msg="long_text_score is required") 92 | 93 | ResultEvaluationService.update_result_audio_long_text_score(result_audio_id, long_text_score) 94 | 95 | return ResponseResult() 96 | 97 | 98 | @router.post("/update_result_audio_remark") 99 | async def update_result_audio_remark(request: Request): 100 | form_data = await request.form() 101 | result_audio_id = form_data.get('id') 102 | remark = form_data.get('remark') 103 | 104 | if ValidationUtils.is_empty(result_audio_id): 105 | return ResponseResult(code=1, msg="id is required") 106 | 107 | ResultEvaluationService.update_result_audio_remark(result_audio_id, remark) 108 | 109 | return ResponseResult() 110 | 111 | 112 | @router.post("/get_inference_task_result_audio_detail") 113 | async def get_inference_task_result_audio_detail(request: Request): 114 | form_data = await request.form() 115 | result_audio_id = form_data.get('id') 116 | 117 | if ValidationUtils.is_empty(result_audio_id): 118 | return ResponseResult(code=1, msg="id is required") 119 | 120 | result_audio = ResultEvaluationService.find_one_by_id(result_audio_id) 121 | 122 | if not result_audio: 123 | return ResponseResult(code=1, msg="无此记录") 124 | 125 | task = InferenceTaskService.find_whole_inference_task_by_id(result_audio.task_id) 126 | 127 | result_audio.obj_task = task 128 | result_audio.obj_text = next((text for text in task.text_list if result_audio.text_id == text.id), None) 129 | result_audio.obj_audio = next((audio for audio in task.audio_list if result_audio.audio_id == audio.id), None) 130 | result_audio.obj_param = next((param for param in task.param_list if result_audio.compare_param_id == param.id), 131 | None) 132 | 133 | task.text_list = [] 134 | task.audio_list = [] 135 | task.param_list = [] 136 | 137 | return ResponseResult(data=result_audio) 138 | -------------------------------------------------------------------------------- /server/controller/sound_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/sound_fusion/__init__.py -------------------------------------------------------------------------------- /server/controller/sound_fusion/sound_fusion_controller.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import librosa 3 | 4 | from fastapi import APIRouter, Request 5 | 6 | from server.bean.reference_audio.obj_reference_audio import ObjReferenceAudioFilter 7 | from server.bean.sound_fusion.obj_sound_fusion_audio import ObjSoundFusionAudioFilter, ObjSoundFusionAudio 8 | from server.common.response_result import ResponseResult 9 | from server.dao.data_base_manager import db_config 10 | from server.service.reference_audio.reference_audio_service import ReferenceAudioService 11 | from server.service.sound_fusion.sound_fusion_service import SoundFusionService 12 | from server.util.util import str_to_int, save_file 13 | 14 | router = APIRouter(prefix="/fusion") 15 | 16 | 17 | @router.post("/get_sound_fusion_audio_list") 18 | async def get_sound_fusion_audio_list(request: Request): 19 | form_data = await request.form() 20 | audio_filter = ObjSoundFusionAudioFilter(form_data) 21 | 22 | count = SoundFusionService.find_count(audio_filter) 23 | audio_list = SoundFusionService.find_list(audio_filter) 24 | 25 | return ResponseResult(data=audio_list, count=count) 26 | 27 | 28 | @router.post("/batch_add_sound_fusion_audio") 29 | async def batch_add_sound_fusion_audio(request: Request): 30 | form_data = await request.form() 31 | ref_audio_ids = form_data.get('refAudioIds') 32 | ref_audio_list = ReferenceAudioService.find_list(ObjReferenceAudioFilter({'audio_ids_str': ref_audio_ids})) 33 | if ref_audio_list is None and len(ref_audio_list) <= 0: 34 | return ResponseResult(code=1, msg='音频不存在') 35 | 36 | sound_fusion_audio_list = [] 37 | 38 | for audio in ref_audio_list: 39 | new_path = SoundFusionService.get_new_sound_fusion_path() 40 | shutil.copy2(audio.audio_path, new_path) 41 | sound = ObjSoundFusionAudio( 42 | role_name=db_config.role.name, audio_name=audio.audio_name, audio_path=new_path, 43 | content=audio.content, 44 | language=audio.language, category=audio.category, audio_length=audio.audio_length 45 | ) 46 | sound_fusion_audio_list.append(sound) 47 | 48 | SoundFusionService.batch_add_sound_fusion_audio(sound_fusion_audio_list) 49 | return ResponseResult() 50 | 51 | 52 | @router.post("/add_sound_fusion_audio") 53 | async def add_sound_fusion_audio(request: Request): 54 | form_data = await request.form() 55 | 56 | file = form_data.get('file') 57 | 58 | audio = ObjSoundFusionAudio( 59 | role_name=form_data.get('roleName'), 60 | audio_name=form_data.get('audioName'), 61 | # audio_path=form_data.get('audioPath'), 62 | content=form_data.get('content'), 63 | language=form_data.get('language'), 64 | category=form_data.get('category'), 65 | # audio_length=form_data.get('audioLength'), 66 | remark=form_data.get('remark') 67 | ) 68 | 69 | new_path = SoundFusionService.get_new_sound_fusion_path() 70 | 71 | await save_file(file, new_path) 72 | 73 | audio.audio_path = new_path 74 | 75 | # 直接计算音频文件的时长(单位:秒) 76 | audio.audio_length = librosa.get_duration(filename=new_path) 77 | 78 | SoundFusionService.add_sound_fusion_audio(audio) 79 | return ResponseResult() 80 | 81 | 82 | @router.post("/update_sound_fusion_audio") 83 | async def update_sound_fusion_audio(request: Request): 84 | form_data = await request.form() 85 | audio = ObjSoundFusionAudio( 86 | id=str_to_int(form_data.get('id'), 0), 87 | role_name=form_data.get('roleName'), 88 | audio_name=form_data.get('audioName'), 89 | # audio_path=form_data.get('audioPath'), 90 | content=form_data.get('content'), 91 | language=form_data.get('language'), 92 | category=form_data.get('category'), 93 | # audio_length=form_data.get('audioLength'), 94 | remark=form_data.get('remark') 95 | ) 96 | 97 | if audio.id <= 0: 98 | return ResponseResult(code=1, msg='音频Id异常') 99 | 100 | SoundFusionService.update_sound_fusion_audio(audio) 101 | 102 | return ResponseResult() 103 | 104 | 105 | @router.post("/delete_sound_fusion_audio") 106 | async def delete_sound_fusion_audio(request: Request): 107 | form_data = await request.form() 108 | audio_id = str_to_int(form_data.get('audioId'), 0) 109 | if audio_id <= 0: 110 | return ResponseResult(code=1, msg='音频Id异常') 111 | 112 | SoundFusionService.delete_sound_fusion_audio_by_id(audio_id) 113 | 114 | return ResponseResult() 115 | -------------------------------------------------------------------------------- /server/controller/system/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/system/__init__.py -------------------------------------------------------------------------------- /server/controller/system/system_controller.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request 2 | 3 | from server.bean.system.role import Role 4 | from server.bean.system.sys_cache_constants import SystemConstants 5 | from server.common.response_result import ResponseResult 6 | from server.dao.data_base_manager import db_config 7 | from server.service.system.system_service import SystemService 8 | from server.util.util import ValidationUtils 9 | 10 | router = APIRouter(prefix="/system") 11 | 12 | 13 | @router.post("/load_last_role_name") 14 | async def load_last_role_name(): 15 | role = SystemService.get_valid_role() 16 | 17 | role_list = SystemService.get_role_list() 18 | 19 | return ResponseResult(data={ 20 | "role": role, 21 | "roleList": role_list 22 | }) 23 | 24 | 25 | @router.post("/switch_role_workspace") 26 | async def switch_role_workspace(request: Request): 27 | form_data = await request.form() 28 | role_name = form_data.get("roleName") 29 | role_category = form_data.get("roleCategory") 30 | if ValidationUtils.is_empty(role_name): 31 | return ResponseResult(code=1, msg="roleName is empty") 32 | if ValidationUtils.is_empty(role_category): 33 | return ResponseResult(code=1, msg="roleCategory is empty") 34 | role = Role(category=role_category, name=role_name) 35 | db_config.update_db_path(role) 36 | SystemService.update_sys_cache(SystemConstants.CACHE_TYPE, SystemConstants.CACHE_KEY_ROLE, role.to_json_string()) 37 | return ResponseResult(code=0, msg="success") 38 | -------------------------------------------------------------------------------- /server/controller/text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/text/__init__.py -------------------------------------------------------------------------------- /server/controller/text/text_controller.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Request 2 | 3 | from server.bean.system.sys_cache_constants import TextConstants 4 | from server.bean.text.obj_inference_text import ObjInferenceTextFilter, ObjInferenceText 5 | from server.common.response_result import ResponseResult 6 | from server.service.inference_task.inference_text_service import InferenceTextService 7 | from server.service.system.system_service import SystemService 8 | from server.util.util import ValidationUtils, str_to_int 9 | 10 | router = APIRouter(prefix="/text") 11 | 12 | 13 | 14 | @router.post("/get_last_select_inference_text") 15 | async def get_last_select_inference_text(request: Request): 16 | form_data = await request.form() 17 | 18 | text = None 19 | 20 | last_selected_id = SystemService.get_sys_cache(TextConstants.CACHE_TYPE, TextConstants.CACHE_KEY_LAST_SELECTED_ID) 21 | 22 | if last_selected_id: 23 | text = InferenceTextService.find_one_by_id(last_selected_id) 24 | 25 | return ResponseResult(data=text) 26 | 27 | 28 | @router.post("/update_last_select_inference_text_id") 29 | async def update_last_select_inference_text_id(request: Request): 30 | form_data = await request.form() 31 | 32 | text_id = form_data.get('text_id') 33 | 34 | SystemService.update_sys_cache(TextConstants.CACHE_TYPE, TextConstants.CACHE_KEY_LAST_SELECTED_ID, text_id) 35 | 36 | return ResponseResult() 37 | 38 | 39 | @router.post("/get_inference_text_list") 40 | async def get_inference_text_list(request: Request): 41 | form_data = await request.form() 42 | text_filter = ObjInferenceTextFilter(form_data) 43 | 44 | if ValidationUtils.is_empty(text_filter.order_by): 45 | text_filter.order_by = "id" 46 | if ValidationUtils.is_empty(text_filter.order_by_desc): 47 | text_filter.order_by_desc = "desc" 48 | 49 | count = InferenceTextService.find_count(text_filter) 50 | text_list = InferenceTextService.find_list(text_filter) 51 | return ResponseResult(data=text_list, count=count) 52 | 53 | 54 | @router.post("/save_inference_text") 55 | async def update_inference_text(request: Request): 56 | form_data = await request.form() 57 | text = ObjInferenceText( 58 | id=str_to_int(form_data.get('id')), 59 | category=form_data.get('category'), 60 | text_content=form_data.get('textContent'), 61 | text_language=form_data.get('textLanguage') 62 | ) 63 | if text.id > 0: 64 | InferenceTextService.update_inference_text_by_id(text) 65 | text_id = text.id 66 | else: 67 | text_id = InferenceTextService.insert_inference_text(text) 68 | 69 | return ResponseResult(data={"text_id": text_id}) 70 | 71 | 72 | @router.post("/delete_inference_text") 73 | async def delete_inference_text(request: Request): 74 | form_data = await request.form() 75 | text_id = str_to_int(form_data.get('text_id')) 76 | if text_id < 1: 77 | return ResponseResult(code=1, msg="text_id is invalid") 78 | 79 | result = InferenceTextService.delete_inference_text_by_id(text_id) 80 | 81 | return ResponseResult(data={"result": result}) 82 | -------------------------------------------------------------------------------- /server/controller/tts_correction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/controller/tts_correction/__init__.py -------------------------------------------------------------------------------- /server/controller/tts_correction/tts_correction_controller.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from fastapi import APIRouter, Request 4 | from fastapi.responses import StreamingResponse 5 | 6 | from server.bean.tts_correction.obj_tts_correction_task import ObjTtsCorrectionTaskFilter, ObjTtsCorrectionTask 7 | from server.bean.tts_correction.obj_tts_correction_task_detail import ObjTtsCorrectionTaskDetailFilter, \ 8 | ObjTtsCorrectionTaskDetail 9 | from server.common import config_params 10 | from server.common.custom_exception import CustomException 11 | from server.common.log_config import logger 12 | from server.common.response_result import ResponseResult 13 | from server.service.tts_correction.tts_correction_service import TtsCorrectionService 14 | 15 | router = APIRouter(prefix="/correction") 16 | 17 | 18 | @router.post("/get_tts_correction_task_list") 19 | async def get_tts_correction_task_list(request: Request): 20 | form_data = await request.form() 21 | task_filter = ObjTtsCorrectionTaskFilter(form_data) 22 | 23 | count = TtsCorrectionService.find_count(task_filter) 24 | task_list = TtsCorrectionService.find_list(task_filter) 25 | 26 | if task_list is not None and len(task_list) > 0: 27 | for task in task_list: 28 | task.detail_count = TtsCorrectionService.find_detail_count(ObjTtsCorrectionTaskDetailFilter({ 29 | 'task_id': task.id 30 | })) 31 | 32 | return ResponseResult(data=task_list, count=count) 33 | 34 | 35 | @router.post("/get_tts_correction_task_by_id") 36 | async def get_tts_correction_task_by_id(request: Request): 37 | form_data = await request.form() 38 | 39 | id = str_to_int(form_data.get('id')) 40 | 41 | if id < 1: 42 | return ResponseResult(code=1, msg="id is invalid") 43 | 44 | task = TtsCorrectionService.find_task_by_id(id) 45 | 46 | return ResponseResult(data=task) 47 | 48 | 49 | def get_tts_correction_from_json(form_data: dict) -> ObjTtsCorrectionTask: 50 | task = ObjTtsCorrectionTask( 51 | task_name=form_data.get('taskName'), 52 | text_id=form_data.get('textId'), 53 | product_id=form_data.get('productId'), 54 | remark=form_data.get('remark') 55 | ) 56 | 57 | detail_list = form_data.get('taskDetailList') 58 | 59 | task_detail_list = [] 60 | for detail in detail_list: 61 | task_detail_list.append(ObjTtsCorrectionTaskDetail( 62 | text_content=detail.get('textContent'), 63 | text_index=detail.get('textIndex') 64 | )) 65 | task.detail_list = task_detail_list 66 | 67 | return task 68 | 69 | 70 | @router.post("/add_tts_correction_task") 71 | async def add_tts_correction_task(request: Request): 72 | form_data = await request.form() 73 | 74 | task = get_tts_correction_from_json(form_data) 75 | task_detail_list = task.detail_list 76 | 77 | task_id = TtsCorrectionService.add_tts_correction_task(task) 78 | 79 | if task_id < 1: 80 | return ResponseResult(code=1, msg="add tts correction task failed") 81 | 82 | for detail in task_detail_list: 83 | detail.task_id = task_id 84 | detail.status = 0 85 | detail.audio_path = '' 86 | detail.asr_text = '' 87 | detail.asr_text_similarity = 0 88 | detail.audio_status = 0 89 | 90 | TtsCorrectionService.batch_add_tts_correction_task_detail(task_detail_list) 91 | 92 | return ResponseResult(data=task) 93 | 94 | 95 | @router.post("/start_execute_tts_correction_task") 96 | async def start_execute_tts_correction_task(request: Request): 97 | form_data = await request.form() 98 | task_id = str_to_int(form_data.get('task_id')) 99 | if task_id < 0: 100 | raise CustomException("task_id is invalid") 101 | task = TtsCorrectionService.find_task_by_id(task_id) 102 | if task is None: 103 | raise CustomException("未找到task") 104 | 105 | start_time = time.perf_counter() # 使用 perf_counter 获取高精度计时起点 106 | 107 | TtsCorrectionService.start_execute_tts_correction_task(task, config_params.inference_process_num) 108 | 109 | end_time = time.perf_counter() # 获取计时终点 110 | elapsed_time = end_time - start_time # 计算执行耗时 111 | 112 | # 记录日志内容 113 | log_message = f"执行耗时: {elapsed_time:.6f} 秒" 114 | logger.info(log_message) 115 | 116 | return ResponseResult() 117 | 118 | 119 | @router.post("/export_tts_correction_task_audio") 120 | async def export_tts_correction_task_audio(request: Request): 121 | form_data = await request.form() 122 | task_id = str_to_int(form_data.get('task_id')) 123 | if task_id < 0: 124 | raise CustomException("task_id is invalid") 125 | task = TtsCorrectionService.find_task_by_id(task_id) 126 | if task is None: 127 | raise CustomException("未找到task") 128 | 129 | zip_in_memory = TtsCorrectionService.generate_zip(task) 130 | 131 | # 将指针移动到开始位置 132 | zip_in_memory.seek(0) 133 | 134 | # 设置响应头 135 | headers = { 136 | 'Content-Disposition': 'attachment; filename=example.zip', 137 | 'Content-Type': 'application/zip' 138 | } 139 | 140 | # 使用 StreamingResponse 返回 BytesIO 对象 141 | return StreamingResponse(zip_in_memory, headers=headers) 142 | -------------------------------------------------------------------------------- /server/dao/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/__init__.py -------------------------------------------------------------------------------- /server/dao/finished_product/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/finished_product/__init__.py -------------------------------------------------------------------------------- /server/dao/finished_product/finished_product_dao.py: -------------------------------------------------------------------------------- 1 | from server.bean.finished_product.finished_product_manager import ObjFinishedProductManagerFilter, \ 2 | ObjFinishedProductManager 3 | from server.dao.data_base_manager import DBSlaveSQLExecutor 4 | 5 | 6 | class FinishedProductDao: 7 | @staticmethod 8 | def find_count(filter: ObjFinishedProductManagerFilter) -> int: 9 | # 查询所有记录的SQL语句 10 | select_sql = ''' 11 | SELECT COUNT(1) FROM tab_obj_finished_product_manager where 1=1 12 | ''' 13 | 14 | condition_sql, condition = filter.make_sql() 15 | 16 | select_sql += condition_sql 17 | 18 | count = DBSlaveSQLExecutor.get_count(select_sql, condition) 19 | 20 | return count 21 | 22 | @staticmethod 23 | def find_list(audio_filter: ObjFinishedProductManagerFilter) -> list[ObjFinishedProductManager]: 24 | # 查询所有记录的SQL语句 25 | select_sql = ''' 26 | SELECT * FROM tab_obj_finished_product_manager where 1=1 27 | ''' 28 | 29 | condition_sql, condition = audio_filter.make_sql() 30 | 31 | select_sql += condition_sql 32 | 33 | select_sql += audio_filter.get_order_by_sql() 34 | 35 | select_sql += audio_filter.get_limit_sql() 36 | 37 | records = DBSlaveSQLExecutor.execute_query(select_sql, condition) 38 | 39 | list = [] 40 | 41 | for data in records: 42 | list.append(ObjFinishedProductManager( 43 | id=data.get('Id'), 44 | name=data.get('Name'), 45 | category=data.get('Category'), 46 | gpt_sovits_version=data.get('GptSovitsVersion'), 47 | gpt_model_name=data.get('GptModelName'), 48 | gpt_model_path=data.get('GptModelPath'), 49 | vits_model_name=data.get('VitsModelName'), 50 | vits_model_path=data.get('VitsModelPath'), 51 | audio_id=data.get('AudioId'), 52 | audio_name=data.get('AudioName'), 53 | audio_path=data.get('AudioPath'), 54 | content=data.get('Content'), 55 | language=data.get('Language'), 56 | audio_length=data.get('AudioLength'), 57 | top_k=data.get('TopK'), 58 | top_p=data.get('TopP'), 59 | temperature=data.get('Temperature'), 60 | text_delimiter=data.get('TextDelimiter'), 61 | speed=data.get('Speed'), 62 | sample_steps=data.get('sample_steps'), 63 | if_sr=data.get('if_sr'), 64 | inp_refs=data.get('InpRefs'), 65 | score=data.get('Score'), 66 | remark=data.get('Remark'), 67 | create_time=data.get('CreateTime') 68 | )) 69 | return list 70 | 71 | @staticmethod 72 | def add_finished_product(product: ObjFinishedProductManager) -> int: 73 | sql = ''' 74 | INSERT INTO tab_obj_finished_product_manager(Name,Category,GptSovitsVersion,GptModelName,GptModelPath,VitsModelName,VitsModelPath,AudioId,AudioName,AudioPath,Content,Language,AudioLength,TopK,TopP,Temperature,TextDelimiter,Speed,SampleSteps,IfSr,InpRefs,Score,Remark,CreateTime) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,datetime('now')) 75 | ''' 76 | return DBSlaveSQLExecutor.insert(sql, ( 77 | product.name, 78 | product.category, 79 | product.gpt_sovits_version, 80 | product.gpt_model_name, 81 | product.gpt_model_path, 82 | product.vits_model_name, 83 | product.vits_model_path, 84 | product.audio_id, 85 | product.audio_name, 86 | product.audio_path, 87 | product.content, 88 | product.language, 89 | product.audio_length, 90 | product.top_k, 91 | product.top_p, 92 | product.temperature, 93 | product.text_delimiter, 94 | product.speed, 95 | product.sample_steps, 96 | product.if_sr, 97 | product.inp_refs, 98 | product.score, 99 | product.remark 100 | )) 101 | 102 | @staticmethod 103 | def update_finished_product(product: ObjFinishedProductManager) -> int: 104 | sql = ''' 105 | UPDATE tab_obj_finished_product_manager SET Name=?,Category=?,GptSovitsVersion=?,GptModelName=?,GptModelPath=?,VitsModelName=?,VitsModelPath=?,AudioId=?,AudioName=?,AudioPath=?,Content=?,Language=?,AudioLength=?,TopK=?,TopP=?,Temperature=?,TextDelimiter=?,Speed=?,SampleSteps=?,IfSr=?,InpRefs=?,Score=?,Remark=? WHERE Id = ? 106 | ''' 107 | return DBSlaveSQLExecutor.execute_update(sql, ( 108 | product.name, 109 | product.category, 110 | product.gpt_sovits_version, 111 | product.gpt_model_name, 112 | product.gpt_model_path, 113 | product.vits_model_name, 114 | product.vits_model_path, 115 | product.audio_id, 116 | product.audio_name, 117 | product.audio_path, 118 | product.content, 119 | product.language, 120 | product.audio_length, 121 | product.top_k, 122 | product.top_p, 123 | product.temperature, 124 | product.text_delimiter, 125 | product.speed, 126 | product.sample_steps, 127 | product.if_sr, 128 | product.inp_refs, 129 | product.score, 130 | product.remark, 131 | product.id 132 | )) 133 | -------------------------------------------------------------------------------- /server/dao/inference_task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/inference_task/__init__.py -------------------------------------------------------------------------------- /server/dao/inference_task/inference_text_dao.py: -------------------------------------------------------------------------------- 1 | from server.bean.text.obj_inference_text import ObjInferenceTextFilter, ObjInferenceText 2 | from server.dao.data_base_manager import DBMasterSQLExecutor 3 | 4 | 5 | class InferenceTextDao: 6 | @staticmethod 7 | def find_count(text_filter: ObjInferenceTextFilter) -> int: 8 | # 查询所有记录的SQL语句 9 | select_sql = ''' 10 | SELECT COUNT(1) FROM tab_obj_inference_text where 1=1 11 | ''' 12 | 13 | condition_sql, condition = text_filter.make_sql() 14 | 15 | select_sql += condition_sql 16 | 17 | count = DBMasterSQLExecutor.get_count(select_sql, condition) 18 | 19 | return count 20 | 21 | @staticmethod 22 | def find_list(text_filter: ObjInferenceTextFilter) -> list[ObjInferenceText]: 23 | # 查询所有记录的SQL语句 24 | select_sql = ''' 25 | SELECT * FROM tab_obj_inference_text where 1=1 26 | ''' 27 | 28 | condition_sql, condition = text_filter.make_sql() 29 | 30 | select_sql += condition_sql 31 | 32 | select_sql += text_filter.get_order_by_sql() 33 | 34 | select_sql += text_filter.get_limit_sql() 35 | 36 | records = DBMasterSQLExecutor.execute_query(select_sql, condition) 37 | 38 | text_list = [] 39 | 40 | for data in records: 41 | text_list.append(ObjInferenceText( 42 | id=data.get('Id'), 43 | category=data.get('Category'), 44 | text_content=data.get('TextContent'), 45 | text_language=data.get('TextLanguage'), 46 | create_time=data.get('CreateTime') 47 | )) 48 | return text_list 49 | 50 | @staticmethod 51 | def insert_inference_text(text): 52 | sql = ''' 53 | INSERT INTO tab_obj_inference_text(Category,TextContent,TextLanguage,CreateTime) VALUES (?,?,?,datetime('now')) 54 | ''' 55 | return DBMasterSQLExecutor.insert(sql, ( 56 | text.category, 57 | text.text_content, 58 | text.text_language 59 | )) 60 | 61 | @staticmethod 62 | def delete_inference_text_by_id(text_id): 63 | sql = ''' 64 | DELETE FROM tab_obj_inference_text WHERE Id = ? 65 | ''' 66 | return DBMasterSQLExecutor.execute_update(sql, (text_id,)) 67 | 68 | @staticmethod 69 | def update_inference_text_by_id(text): 70 | sql = ''' 71 | UPDATE tab_obj_inference_text SET 72 | Category = ?, 73 | TextContent = ?, 74 | TextLanguage = ? 75 | WHERE Id = ? 76 | ''' 77 | return DBMasterSQLExecutor.execute_update(sql, ( 78 | text.category, 79 | text.text_content, 80 | text.text_language, 81 | text.id 82 | )) 83 | -------------------------------------------------------------------------------- /server/dao/init_master_table.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | 4 | def init_master_table(db_path): 5 | # 连接到SQLite数据库,如果不存在则创建 6 | conn = sqlite3.connect(db_path) 7 | 8 | # 创建一个游标对象用于执行SQL命令 9 | cursor = conn.cursor() 10 | 11 | # 音色融合 12 | cursor.execute(''' 13 | CREATE TABLE IF NOT EXISTS tab_obj_sound_fusion_audio ( 14 | Id INTEGER PRIMARY KEY AUTOINCREMENT, -- 自增编号 15 | RoleName TEXT DEFAULT '',-- 角色名称 16 | AudioName TEXT DEFAULT '',-- 音频名称 17 | AudioPath TEXT DEFAULT '',-- 音频路径 18 | Content TEXT DEFAULT '',-- 音频内容 19 | Language TEXT DEFAULT '',-- 音频语种 20 | Category TEXT DEFAULT '',-- 音频分类 21 | AudioLength INTEGER DEFAULT 0 ,-- 音频时长 22 | Remark TEXT DEFAULT '' ,-- 备注 23 | CreateTime TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP -- 创建时间 24 | ); 25 | ''') 26 | 27 | # 创建一个新表 28 | cursor.execute(''' 29 | CREATE TABLE IF NOT EXISTS tab_obj_inference_text ( 30 | Id INTEGER PRIMARY KEY AUTOINCREMENT, -- SQLite使用INTEGER PRIMARY KEY AUTOINCREMENT来实现自增功能 31 | Category TEXT, -- 分类 32 | TextContent TEXT, -- 文本 33 | TextLanguage TEXT, -- 语种 34 | CreateTime TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP -- SQLite中默认的时间戳格式 35 | ); 36 | ''') 37 | 38 | # 创建一个新表 39 | cursor.execute(''' 40 | CREATE TABLE IF NOT EXISTS tab_sys_cache ( 41 | Id INTEGER PRIMARY KEY AUTOINCREMENT, -- SQLite使用INTEGER PRIMARY KEY AUTOINCREMENT来实现自增功能 42 | Type TEXT, -- 类型 43 | KeyName TEXT, -- 文本 44 | Value TEXT, -- 语种 45 | CreateTime TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP -- SQLite中默认的时间戳格式 46 | ); 47 | ''') 48 | 49 | # 提交事务(如果没有这一步,则不会保存更改) 50 | conn.commit() 51 | 52 | # 关闭连接 53 | conn.close() 54 | -------------------------------------------------------------------------------- /server/dao/reference_audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/reference_audio/__init__.py -------------------------------------------------------------------------------- /server/dao/reference_audio/reference_audio_compare_dao.py: -------------------------------------------------------------------------------- 1 | from server.bean.reference_audio.obj_reference_audio_compare_detail import ObjReferenceAudioCompareDetail 2 | from server.bean.reference_audio.obj_reference_audio_compare_task import ObjReferenceAudioCompareTask 3 | from server.dao.data_base_manager import DBSlaveSQLExecutor 4 | 5 | 6 | class ReferenceAudioCompareDao: 7 | @staticmethod 8 | def insert_task(task: ObjReferenceAudioCompareTask) -> int: 9 | sql = ''' 10 | INSERT INTO tab_obj_reference_audio_compare_task(AudioId,CategoryNames,Status,Remark,CreateTime) VALUES (?,?,?,?,datetime('now')) 11 | ''' 12 | return DBSlaveSQLExecutor.insert(sql, ( 13 | task.audio_id, 14 | task.category_names, 15 | task.status, 16 | task.remark 17 | )) 18 | 19 | @staticmethod 20 | def get_task_by_id(task_id: int) -> ObjReferenceAudioCompareTask: 21 | # 查询所有记录的SQL语句 22 | select_sql = ''' 23 | SELECT * FROM tab_obj_reference_audio_compare_task where id = ? LIMIT 1 24 | ''' 25 | 26 | records = DBSlaveSQLExecutor.execute_query(select_sql, (task_id,)) 27 | 28 | task_list = [] 29 | 30 | for data in records: 31 | task_list.append(ObjReferenceAudioCompareTask( 32 | id=data.get('Id'), 33 | audio_id=data.get('AudioId'), 34 | category_names=data.get('CategoryNames'), 35 | status=data.get('Status'), 36 | remark=data.get('Remark'), 37 | create_time=data.get('CreateTime') 38 | )) 39 | if len(task_list) == 0: 40 | return None 41 | return task_list[0] 42 | 43 | @staticmethod 44 | def update_task_status(task_id: int, status: int) -> int: 45 | sql = ''' 46 | UPDATE tab_obj_reference_audio_compare_task SET Status = ? WHERE Id = ? 47 | ''' 48 | return DBSlaveSQLExecutor.execute_update(sql, ( 49 | status, 50 | task_id 51 | )) 52 | 53 | @staticmethod 54 | def batch_insert_task_detail(detail_list: list[ObjReferenceAudioCompareDetail]) -> int: 55 | sql = ''' 56 | INSERT INTO tab_obj_reference_audio_compare_detail(TaskId,CompareAudioId,Score,CreateTime) VALUES (?,?,?,datetime('now')) 57 | ''' 58 | return DBSlaveSQLExecutor.batch_execute(sql, [( 59 | x.task_id, 60 | x.compare_audio_id, 61 | x.score 62 | ) for x in detail_list]) 63 | 64 | @staticmethod 65 | def get_last_finish_task_by_audio_id(audio_id: int) -> ObjReferenceAudioCompareTask: 66 | # 查询所有记录的SQL语句 67 | select_sql = ''' 68 | SELECT * FROM tab_obj_reference_audio_compare_task where AudioId = ? AND Status = 2 ORDER BY Id DESC LIMIT 1 69 | ''' 70 | 71 | records = DBSlaveSQLExecutor.execute_query(select_sql, (audio_id,)) 72 | 73 | task_list = [] 74 | 75 | for data in records: 76 | task_list.append(ObjReferenceAudioCompareTask( 77 | id=data.get('Id'), 78 | audio_id=data.get('AudioId'), 79 | category_names=data.get('CategoryNames'), 80 | status=data.get('Status'), 81 | remark=data.get('Remark'), 82 | create_time=data.get('CreateTime') 83 | )) 84 | if len(task_list) == 0: 85 | return None 86 | return task_list[0] 87 | 88 | @staticmethod 89 | def get_compare_detail_list_by_task_id(task_id: int) -> list[ObjReferenceAudioCompareDetail]: 90 | # 查询所有记录的SQL语句 91 | select_sql = ''' 92 | SELECT * FROM tab_obj_reference_audio_compare_detail where TaskId = ? ORDER BY Score DESC 93 | ''' 94 | 95 | records = DBSlaveSQLExecutor.execute_query(select_sql, (task_id,)) 96 | 97 | task_list = [] 98 | 99 | for data in records: 100 | task_list.append(ObjReferenceAudioCompareDetail( 101 | id=data.get('Id'), 102 | task_id=data.get('TaskId'), 103 | compare_audio_id=data.get('CompareAudioId'), 104 | score=data.get('Score'), 105 | create_time=data.get('CreateTime') 106 | )) 107 | return task_list 108 | 109 | -------------------------------------------------------------------------------- /server/dao/reference_audio/reference_category_dao.py: -------------------------------------------------------------------------------- 1 | from server.bean.reference_audio.obj_inference_category import ObjInferenceCategory 2 | from server.dao.data_base_manager import DBSlaveSQLExecutor 3 | 4 | 5 | class ReferenceCategoryDao: 6 | @staticmethod 7 | def exists_category_name(category: str) -> int: 8 | # 查询所有记录的SQL语句 9 | select_sql = ''' 10 | SELECT COUNT(1) FROM tab_obj_inference_category where Name = ? 11 | ''' 12 | 13 | count = DBSlaveSQLExecutor.get_count(select_sql, (category,)) 14 | 15 | return count 16 | 17 | @staticmethod 18 | def insert_category(category: ObjInferenceCategory) -> int: 19 | sql = ''' 20 | INSERT INTO tab_obj_inference_category(Name,CreateTime) VALUES (?,datetime('now')) 21 | ''' 22 | return DBSlaveSQLExecutor.insert(sql, ( 23 | category.name, 24 | )) 25 | 26 | @staticmethod 27 | def get_category_list() -> list[ObjInferenceCategory]: 28 | # 查询所有记录的SQL语句 29 | select_sql = ''' 30 | SELECT * FROM tab_obj_inference_category 31 | ''' 32 | 33 | records = DBSlaveSQLExecutor.execute_query(select_sql, ()) 34 | 35 | task_list = [] 36 | 37 | for data in records: 38 | task_list.append(ObjInferenceCategory( 39 | id=data.get('Id'), 40 | name=data.get('Name'), 41 | create_time=data.get('CreateTime') 42 | )) 43 | return task_list 44 | -------------------------------------------------------------------------------- /server/dao/result_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/result_evaluation/__init__.py -------------------------------------------------------------------------------- /server/dao/sound_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/sound_fusion/__init__.py -------------------------------------------------------------------------------- /server/dao/sound_fusion/sound_fusion_dao.py: -------------------------------------------------------------------------------- 1 | from server.bean.sound_fusion.obj_sound_fusion_audio import ObjSoundFusionAudioFilter, ObjSoundFusionAudio 2 | from server.dao.data_base_manager import DBMasterSQLExecutor 3 | 4 | 5 | class SoundFusionDao: 6 | @staticmethod 7 | def find_count(audio_filter: ObjSoundFusionAudioFilter) -> int: 8 | # 查询所有记录的SQL语句 9 | select_sql = ''' 10 | SELECT COUNT(1) FROM tab_obj_sound_fusion_audio where 1=1 11 | ''' 12 | 13 | condition_sql, condition = audio_filter.make_sql() 14 | 15 | select_sql += condition_sql 16 | 17 | count = DBMasterSQLExecutor.get_count(select_sql, condition) 18 | 19 | return count 20 | 21 | @staticmethod 22 | def find_list(audio_filter: ObjSoundFusionAudioFilter) -> list[ObjSoundFusionAudio]: 23 | # 查询所有记录的SQL语句 24 | select_sql = ''' 25 | SELECT * FROM tab_obj_sound_fusion_audio where 1=1 26 | ''' 27 | 28 | condition_sql, condition = audio_filter.make_sql() 29 | 30 | select_sql += condition_sql 31 | 32 | select_sql += audio_filter.get_order_by_sql() 33 | 34 | select_sql += audio_filter.get_limit_sql() 35 | 36 | records = DBMasterSQLExecutor.execute_query(select_sql, condition) 37 | 38 | list = [] 39 | 40 | for data in records: 41 | list.append(ObjSoundFusionAudio( 42 | id=data['Id'], 43 | role_name=data['RoleName'], 44 | audio_name=data['AudioName'], 45 | audio_path=data['AudioPath'], 46 | content=data['Content'], 47 | language=data['Language'], 48 | category=data['Category'], 49 | audio_length=data['AudioLength'], 50 | remark=data['Remark'], 51 | create_time=data['CreateTime'] 52 | )) 53 | return list 54 | 55 | @staticmethod 56 | def batch_add_sound_fusion_audio(sound_fusion_audio_list: list[ObjSoundFusionAudio]): 57 | sql = ''' 58 | INSERT INTO tab_obj_sound_fusion_audio(RoleName,AudioName,AudioPath,Content,Language,Category,AudioLength,Remark,CreateTime) VALUES (?,?,?,?,?,?,?,?,datetime('now')) 59 | ''' 60 | return DBMasterSQLExecutor.batch_execute(sql, [( 61 | x.role_name, 62 | x.audio_name, 63 | x.audio_path, 64 | x.content, 65 | x.language, 66 | x.category, 67 | x.audio_length, 68 | x.remark 69 | ) for x in sound_fusion_audio_list]) 70 | 71 | @staticmethod 72 | def update_sound_fusion_audio(audio: ObjSoundFusionAudio): 73 | sql = f''' 74 | UPDATE tab_obj_sound_fusion_audio SET RoleName=?,AudioName=?,Content=?,Language=?,Category=?,Remark=? WHERE Id = ? 75 | ''' 76 | return DBMasterSQLExecutor.execute_update(sql, ( 77 | audio.role_name, 78 | audio.audio_name, 79 | audio.content, 80 | audio.language, 81 | audio.category, 82 | audio.remark, 83 | audio.id 84 | )) 85 | 86 | @staticmethod 87 | def delete_sound_fusion_audio_by_id(audio_id: int): 88 | sql = f''' 89 | DELETE FROM tab_obj_sound_fusion_audio WHERE Id = ? 90 | ''' 91 | return DBMasterSQLExecutor.execute_update(sql, (audio_id,)) 92 | -------------------------------------------------------------------------------- /server/dao/system/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/system/__init__.py -------------------------------------------------------------------------------- /server/dao/system/system_dao.py: -------------------------------------------------------------------------------- 1 | from server.bean.system.sys_cache import SysCache 2 | from server.dao.data_base_manager import DBMasterSQLExecutor 3 | 4 | 5 | class SystemDao: 6 | @staticmethod 7 | def get_sys_cache(cache_type, cache_key) -> SysCache: 8 | # 查询所有记录的SQL语句 9 | select_sql = ''' 10 | SELECT * FROM tab_sys_cache where Type = ? AND KeyName = ? LIMIT 1 11 | ''' 12 | 13 | records = DBMasterSQLExecutor.execute_query(select_sql, (cache_type, cache_key)) 14 | 15 | task_list = [] 16 | 17 | for data in records: 18 | task_list.append(SysCache( 19 | type=data['Type'], 20 | key_name=data['KeyName'], 21 | value=data['Value'] 22 | )) 23 | if len(task_list) == 0: 24 | return None 25 | return task_list[0] 26 | 27 | @staticmethod 28 | def insert_sys_cache(cache): 29 | sql = ''' 30 | INSERT INTO tab_sys_cache(Type,KeyName,Value,CreateTime) VALUES (?,?,?,datetime('now')) 31 | ''' 32 | return DBMasterSQLExecutor.insert(sql, ( 33 | cache.type, 34 | cache.key_name, 35 | cache.value 36 | )) 37 | 38 | @staticmethod 39 | def update_sys_cache(cache): 40 | sql = ''' 41 | UPDATE tab_sys_cache SET Value = ? WHERE Type = ? AND KeyName = ? 42 | ''' 43 | return DBMasterSQLExecutor.execute_update(sql, ( 44 | cache.value, 45 | cache.type, 46 | cache.key_name 47 | )) 48 | -------------------------------------------------------------------------------- /server/dao/tts_correction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/dao/tts_correction/__init__.py -------------------------------------------------------------------------------- /server/service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/__init__.py -------------------------------------------------------------------------------- /server/service/finished_product/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/finished_product/__init__.py -------------------------------------------------------------------------------- /server/service/finished_product/finished_product_service.py: -------------------------------------------------------------------------------- 1 | from server.bean.finished_product.finished_product_manager import ObjFinishedProductManager, \ 2 | ObjFinishedProductManagerFilter 3 | from server.dao.finished_product.finished_product_dao import FinishedProductDao 4 | 5 | 6 | class FinishedProductService: 7 | @staticmethod 8 | def find_count(audio_filter: ObjFinishedProductManagerFilter) -> int: 9 | return FinishedProductDao.find_count(audio_filter) 10 | 11 | @staticmethod 12 | def find_list(audio_filter: ObjFinishedProductManagerFilter) -> list[ObjFinishedProductManager]: 13 | return FinishedProductDao.find_list(audio_filter) 14 | 15 | @staticmethod 16 | def find_by_id(product_id: int) -> ObjFinishedProductManager: 17 | product_list = FinishedProductService.find_list(ObjFinishedProductManagerFilter({'id': product_id})) 18 | return product_list[0] if len(product_list) > 0 else None 19 | 20 | @staticmethod 21 | def add_finished_product(product: ObjFinishedProductManager) -> int: 22 | return FinishedProductDao.add_finished_product(product) 23 | 24 | @staticmethod 25 | def update_finished_product(product: ObjFinishedProductManager) -> int: 26 | return FinishedProductDao.update_finished_product(product) 27 | -------------------------------------------------------------------------------- /server/service/inference_task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/inference_task/__init__.py -------------------------------------------------------------------------------- /server/service/inference_task/inference_text_service.py: -------------------------------------------------------------------------------- 1 | from server.bean.text.obj_inference_text import ObjInferenceTextFilter, ObjInferenceText 2 | from server.dao.inference_task.inference_text_dao import InferenceTextDao 3 | 4 | 5 | class InferenceTextService: 6 | @staticmethod 7 | def find_count(audio_filter: ObjInferenceTextFilter) -> int: 8 | return InferenceTextDao.find_count(audio_filter) 9 | 10 | @staticmethod 11 | def find_list(audio_filter: ObjInferenceTextFilter) -> list[ObjInferenceText]: 12 | return InferenceTextDao.find_list(audio_filter) 13 | 14 | @staticmethod 15 | def find_one_by_id(text_id: int) -> ObjInferenceText: 16 | text_list = InferenceTextDao.find_list(ObjInferenceTextFilter({ 17 | "id": text_id 18 | })) 19 | 20 | if len(text_list) > 0: 21 | return text_list[0] 22 | 23 | return None 24 | 25 | @staticmethod 26 | def insert_inference_text(text: ObjInferenceText) -> int: 27 | return InferenceTextDao.insert_inference_text(text) 28 | 29 | @staticmethod 30 | def delete_inference_text_by_id(text_id) -> int: 31 | return InferenceTextDao.delete_inference_text_by_id(text_id) 32 | 33 | @staticmethod 34 | def update_inference_text_by_id(text: ObjInferenceText): 35 | return InferenceTextDao.update_inference_text_by_id(text) 36 | 37 | -------------------------------------------------------------------------------- /server/service/inference_task/model_manager_service.py: -------------------------------------------------------------------------------- 1 | import os 2 | from server.bean.inference_task.gpt_model import GptModel 3 | from server.bean.inference_task.vits_model import VitsModel 4 | import server.common.config_params as params 5 | 6 | 7 | class ModelManagerService: 8 | @staticmethod 9 | def get_gpt_model_list() -> list[GptModel]: 10 | GptModel.create_dir() 11 | v1_file_list = read_files_with_suffix(GptModel.get_base_v1_dir(), '.ckpt') 12 | v2_file_list = read_files_with_suffix(GptModel.get_base_v2_dir(), '.ckpt') 13 | v3_file_list = read_files_with_suffix(GptModel.get_base_v3_dir(), '.ckpt') 14 | gpt_model_list = [GptModel(version='v1', name=os.path.basename(file_path), path=file_path) for file_path in 15 | v1_file_list] 16 | gpt_model_list = gpt_model_list + [GptModel(version='v2', name=os.path.basename(file_path), path=file_path) for 17 | file_path in v2_file_list] 18 | 19 | current_dir = os.getcwd() 20 | api_dir = os.path.join(current_dir, params.gsv2_dir) 21 | pretrained_models_dir = os.path.join(api_dir, 'GPT_SoVITS/pretrained_models') 22 | 23 | v3 = GptModel(version='v3', name='s1v3.ckpt', path=os.path.join(pretrained_models_dir, 's1v3.ckpt')) 24 | if os.path.exists(v3.path): 25 | gpt_model_list.append(v3) 26 | 27 | gpt_model_list = gpt_model_list + [GptModel(version='v3', name=os.path.basename(file_path), path=file_path) for 28 | file_path in v3_file_list] 29 | 30 | return gpt_model_list 31 | 32 | @staticmethod 33 | def get_vits_model_list() -> list[VitsModel]: 34 | VitsModel.create_dir() 35 | v1_file_list = read_files_with_suffix(VitsModel.get_base_v1_dir(), '.pth') 36 | v2_file_list = read_files_with_suffix(VitsModel.get_base_v2_dir(), '.pth') 37 | v3_file_list = read_files_with_suffix(VitsModel.get_base_v3_dir(), '.pth') 38 | vits_model_list = [VitsModel(version='v1', name=os.path.basename(file_path), path=file_path) for file_path in 39 | v1_file_list] 40 | vits_model_list = vits_model_list + [VitsModel(version='v2', name=os.path.basename(file_path), path=file_path) 41 | for file_path in v2_file_list] 42 | 43 | current_dir = os.getcwd() 44 | api_dir = os.path.join(current_dir, params.gsv2_dir) 45 | pretrained_models_dir = os.path.join(api_dir, 'GPT_SoVITS/pretrained_models') 46 | 47 | v3 = VitsModel(version='v3', name='s2Gv3.pth', path=os.path.join(pretrained_models_dir, 's2Gv3.pth')) 48 | if os.path.exists(v3.path): 49 | vits_model_list.append(v3) 50 | 51 | vits_model_list = vits_model_list + [VitsModel(version='v3', name=os.path.basename(file_path), path=file_path) 52 | for file_path in v3_file_list] 53 | return vits_model_list 54 | 55 | @staticmethod 56 | def get_vits_model_by_name(gpt_sovits_version, vits_model_name): 57 | return next(filter(lambda model: model.equals(gpt_sovits_version, vits_model_name), 58 | ModelManagerService.get_vits_model_list())) 59 | 60 | @staticmethod 61 | def get_gpt_model_by_name(gpt_sovits_version, gpt_model_name): 62 | return next(filter(lambda model: model.equals(gpt_sovits_version, gpt_model_name), 63 | ModelManagerService.get_gpt_model_list())) 64 | 65 | 66 | def read_files_with_suffix(directory, suffix): 67 | """ 68 | 读取指定目录下符合指定后缀名称的文件。 69 | 参数: 70 | directory (str): 要搜索的目录路径。 71 | suffix (str): 要查找的文件后缀名。 72 | 返回: 73 | list: 包含所有匹配后缀的文件路径的列表。 74 | """ 75 | matching_files = [] 76 | 77 | # 遍历目录中的所有文件和子目录 78 | for root, dirs, files in os.walk(directory): 79 | for file in files: 80 | # 检查文件是否匹配指定的后缀 81 | if file.endswith(suffix): 82 | # 构建完整的文件路径 83 | file_path = os.path.join(root, file) 84 | matching_files.append(file_path) 85 | 86 | return matching_files 87 | -------------------------------------------------------------------------------- /server/service/reference_audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/reference_audio/__init__.py -------------------------------------------------------------------------------- /server/service/reference_audio/reference_audio_compare_sevice.py: -------------------------------------------------------------------------------- 1 | from server.bean.reference_audio.obj_reference_audio import ObjReferenceAudioFilter 2 | from server.bean.reference_audio.obj_reference_audio_compare_detail import ObjReferenceAudioCompareDetail 3 | from server.bean.reference_audio.obj_reference_audio_compare_task import ObjReferenceAudioCompareTask 4 | from server.dao.reference_audio.reference_audio_compare_dao import ReferenceAudioCompareDao 5 | from server.service.reference_audio.reference_audio_service import ReferenceAudioService 6 | from server.service.reference_audio.reference_category_service import ReferenceCategoryService 7 | 8 | 9 | class ReferenceAudioCompareService: 10 | @staticmethod 11 | def insert_task(task: ObjReferenceAudioCompareTask) -> int: 12 | return ReferenceAudioCompareDao.insert_task(task) 13 | 14 | @staticmethod 15 | def get_task_by_id(task_id: int) -> ObjReferenceAudioCompareTask: 16 | return ReferenceAudioCompareDao.get_task_by_id(task_id) 17 | 18 | @staticmethod 19 | def update_task_to_fail(task_id: int) -> int: 20 | return ReferenceAudioCompareDao.update_task_status(task_id, 3) 21 | 22 | @staticmethod 23 | def update_task_to_start(task_id: int) -> int: 24 | return ReferenceAudioCompareDao.update_task_status(task_id, 1) 25 | 26 | @staticmethod 27 | def update_task_to_finish(task_id: int) -> int: 28 | return ReferenceAudioCompareDao.update_task_status(task_id, 2) 29 | 30 | @staticmethod 31 | def batch_insert_task_detail(detail_list: list[ObjReferenceAudioCompareDetail]) -> int: 32 | return ReferenceAudioCompareDao.batch_insert_task_detail(detail_list) 33 | 34 | @staticmethod 35 | def get_last_finish_task_by_audio_id(audio_id: int) -> ObjReferenceAudioCompareTask: 36 | return ReferenceAudioCompareDao.get_last_finish_task_by_audio_id(audio_id) 37 | 38 | @staticmethod 39 | def get_compare_detail_list_by_task_id(task_id: int) -> list[ObjReferenceAudioCompareDetail]: 40 | detail_list = ReferenceAudioCompareDao.get_compare_detail_list_by_task_id(task_id) 41 | if len(detail_list) == 0: 42 | return detail_list 43 | audio_ids_str = ','.join(str(x.compare_audio_id) for x in detail_list) 44 | audio_list = ReferenceAudioService.find_list(ObjReferenceAudioFilter({'audio_ids_str': audio_ids_str})) 45 | for detail in detail_list: 46 | detail.compare_audio = next(filter(lambda x: x.id == detail.compare_audio_id, audio_list), None) 47 | return detail_list 48 | 49 | @staticmethod 50 | def change_audio_category(task_id: int, target_category: str, limit_score: float) -> int: 51 | ReferenceCategoryService.add_category(target_category) 52 | compare_audio_list = ReferenceAudioCompareService.get_compare_detail_list_by_task_id(task_id) 53 | change_audio_list = [] 54 | for compare_audio in compare_audio_list: 55 | if compare_audio.score >= limit_score and compare_audio.compare_audio.category != target_category: 56 | change_audio_list.append(compare_audio.compare_audio) 57 | change_audio_id_str = ','.join(str(x.id) for x in change_audio_list) 58 | return ReferenceAudioService.update_audio_category(change_audio_id_str, target_category) 59 | -------------------------------------------------------------------------------- /server/service/reference_audio/reference_category_service.py: -------------------------------------------------------------------------------- 1 | from server.bean.reference_audio.obj_inference_category import ObjInferenceCategory 2 | from server.dao.reference_audio.reference_category_dao import ReferenceCategoryDao 3 | 4 | 5 | class ReferenceCategoryService: 6 | @staticmethod 7 | def add_category(target_category: str) -> None: 8 | exists = ReferenceCategoryService.exists_category_name(target_category) 9 | if exists > 0: 10 | return 11 | if target_category == 'default': 12 | return 13 | ReferenceCategoryDao.insert_category(ObjInferenceCategory(name=target_category)) 14 | 15 | @staticmethod 16 | def exists_category_name(target_category: str) -> int: 17 | return ReferenceCategoryDao.exists_category_name(target_category) 18 | 19 | @staticmethod 20 | def get_category_list() -> list[ObjInferenceCategory]: 21 | category_list = [ 22 | ObjInferenceCategory(name='default'), 23 | ObjInferenceCategory(name='无效') 24 | ] 25 | category_list = category_list + ReferenceCategoryDao.get_category_list() 26 | return category_list 27 | -------------------------------------------------------------------------------- /server/service/result_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/result_evaluation/__init__.py -------------------------------------------------------------------------------- /server/service/sound_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/sound_fusion/__init__.py -------------------------------------------------------------------------------- /server/service/sound_fusion/sound_fusion_service.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import time 4 | from fastapi import UploadFile 5 | 6 | from server.bean.sound_fusion.obj_sound_fusion_audio import ObjSoundFusionAudioFilter, ObjSoundFusionAudio 7 | from server.dao.data_base_manager import db_config 8 | from server.dao.sound_fusion.sound_fusion_dao import SoundFusionDao 9 | 10 | 11 | class SoundFusionService: 12 | @staticmethod 13 | def find_count(audio_filter: ObjSoundFusionAudioFilter) -> int: 14 | return SoundFusionDao.find_count(audio_filter) 15 | 16 | @staticmethod 17 | def find_list(audio_filter: ObjSoundFusionAudioFilter) -> list[ObjSoundFusionAudio]: 18 | return SoundFusionDao.find_list(audio_filter) 19 | 20 | @staticmethod 21 | def batch_add_sound_fusion_audio(sound_fusion_audio_list: list[ObjSoundFusionAudio]): 22 | return SoundFusionDao.batch_add_sound_fusion_audio(sound_fusion_audio_list) 23 | 24 | @staticmethod 25 | def add_sound_fusion_audio(audio: ObjSoundFusionAudio): 26 | return SoundFusionDao.batch_add_sound_fusion_audio([audio]) 27 | 28 | @staticmethod 29 | def update_sound_fusion_audio(audio: ObjSoundFusionAudio): 30 | return SoundFusionDao.update_sound_fusion_audio(audio) 31 | 32 | @staticmethod 33 | def delete_sound_fusion_audio_by_id(audio_id: int): 34 | return SoundFusionDao.delete_sound_fusion_audio_by_id(audio_id) 35 | 36 | @staticmethod 37 | def get_new_sound_fusion_path(): 38 | output_dir = f'{db_config.get_master_db_dir()}\\sound_fusion_audio' 39 | if not os.path.exists(output_dir): 40 | os.makedirs(output_dir) 41 | unique_id_time_based = uuid.uuid1() 42 | new_filename = str(unique_id_time_based) + '.wav' 43 | new_path = os.path.join(output_dir, new_filename) 44 | return new_path 45 | -------------------------------------------------------------------------------- /server/service/system/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/system/__init__.py -------------------------------------------------------------------------------- /server/service/system/system_service.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from server.bean.system.role import Role 4 | from server.bean.system.role_category import RoleCategory 5 | from server.bean.system.sys_cache import SysCache 6 | from server.bean.system.sys_cache_constants import SystemConstants 7 | from server.common.log_config import logger 8 | from server.dao.data_base_manager import db_config 9 | from server.dao.system.system_dao import SystemDao 10 | 11 | 12 | class SystemService: 13 | @staticmethod 14 | def get_sys_cache(cache_type: str, cache_key: str) -> str: 15 | cache = SystemDao.get_sys_cache(cache_type, cache_key) 16 | if cache is None: 17 | return None 18 | return cache.value 19 | 20 | @staticmethod 21 | def update_sys_cache(cache_type: str, cache_key: str, cache_value: str): 22 | cache = SystemDao.get_sys_cache(cache_type, cache_key) 23 | if cache is None: 24 | cache = SysCache(type=cache_type, key_name=cache_key, value=cache_value) 25 | SystemDao.insert_sys_cache(cache) 26 | else: 27 | cache.value = cache_value 28 | SystemDao.update_sys_cache(cache) 29 | 30 | @staticmethod 31 | def get_role_list() -> list[RoleCategory]: 32 | directory_path = db_config.get_slave_dir() # 假设db_config.get_slave_dir()返回的是正确的目录路径 33 | subdirectories = [] 34 | try: 35 | # 使用os.listdir获取目录下的所有条目 36 | entries = os.listdir(directory_path) 37 | 38 | # 遍历所有条目,检查是否为子目录 39 | for entry in entries: 40 | full_path = os.path.join(directory_path, entry) 41 | if os.path.isdir(full_path): 42 | 43 | role_list = [] 44 | 45 | # 获取该子目录下的所有条目 46 | second_level_entries = os.listdir(full_path) 47 | for second_entry in second_level_entries: 48 | second_full_path = os.path.join(full_path, second_entry) 49 | if os.path.isdir(second_full_path): 50 | role_list.append(Role(category=entry, name=second_entry)) # 添加二级子目录 51 | 52 | if len(role_list) > 0: 53 | subdirectories.append(RoleCategory(category=entry, role_list=role_list)) # 添加一级子目录 54 | 55 | except FileNotFoundError: 56 | logger.error(f"错误:指定的目录 '{directory_path}' 不存在。") 57 | except PermissionError: 58 | logger.error(f"错误:没有权限访问目录 '{directory_path}'。") 59 | except Exception as e: 60 | logger.error(f"发生未知错误:{e}") 61 | return subdirectories 62 | 63 | @staticmethod 64 | def get_valid_role() -> Role: 65 | role = SystemService.get_sys_cache(SystemConstants.CACHE_TYPE, SystemConstants.CACHE_KEY_ROLE) 66 | if role is not None: 67 | return Role.from_json_string(role) 68 | role_list = SystemService.get_role_list() 69 | if len(role_list) > 0: 70 | return role_list[0].role_list[0] 71 | return None 72 | -------------------------------------------------------------------------------- /server/service/tts_correction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/service/tts_correction/__init__.py -------------------------------------------------------------------------------- /server/tool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/tool/__init__.py -------------------------------------------------------------------------------- /server/tool/asr/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def check_fw_local_models(): 4 | ''' 5 | 启动时检查本地是否有 Faster Whisper 模型. 6 | ''' 7 | model_size_list = [ 8 | "tiny", "tiny.en", 9 | "base", "base.en", 10 | "small", "small.en", 11 | "medium", "medium.en", 12 | "large", "large-v1", 13 | "large-v2", "large-v3"] 14 | for i, size in enumerate(model_size_list): 15 | if os.path.exists(f'tools/asr/models/faster-whisper-{size}'): 16 | model_size_list[i] = size + '-local' 17 | return model_size_list 18 | 19 | asr_dict = { 20 | "达摩 ASR (中文)": { 21 | 'lang': ['zh','yue'], 22 | 'size': ['large'], 23 | 'path': 'funasr_asr.py', 24 | 'precision': ['float32'] 25 | }, 26 | "Faster Whisper (多语种)": { 27 | 'lang': ['auto', 'zh', 'en', 'ja', 'ko', 'yue'], 28 | 'size': check_fw_local_models(), 29 | 'path': 'fasterwhisper_asr.py', 30 | 'precision': ['float32', 'float16', 'int8'] 31 | }, 32 | } 33 | 34 | -------------------------------------------------------------------------------- /server/tool/asr/funasr_asr.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import os 3 | import sys 4 | import traceback 5 | from funasr import AutoModel 6 | 7 | sys.path.append(os.getcwd()) 8 | 9 | from server.common.log_config import logger 10 | import server.common.config_params as params 11 | 12 | 13 | class LanguageModel: 14 | def __init__(self, language='zh'): 15 | if not language: 16 | raise ValueError(f'Unsupported language: {language}') 17 | language = language.lower() 18 | if language.lower() not in ['all_zh','zh', 'yue']: 19 | raise ValueError(f'Unsupported language: {language}') 20 | self.model = init(language) 21 | 22 | def generate(self, path): 23 | try: 24 | return self.model.generate(input=path)[0]["text"] 25 | except: 26 | logger.error(traceback.format_exc()) 27 | 28 | 29 | def init(language): 30 | # model_dir = 'server/tool/asr/models/' 31 | # 获取当前脚本所在的绝对路径 32 | current_dir = os.getcwd() 33 | # 计算上上级目录的绝对路径 34 | api_dir = os.path.join(current_dir, params.gsv2_dir) 35 | model_dir = api_dir+'/tools/asr/models/' 36 | path_vad = model_dir + 'speech_fsmn_vad_zh-cn-16k-common-pytorch' 37 | path_punc = model_dir + 'punc_ct-transformer_zh-cn-common-vocab272727-pytorch' 38 | path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" 39 | path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" 40 | vad_model_revision = punc_model_revision = "v2.0.4" 41 | 42 | if (language == "zh" or language == "all_zh"): 43 | path_asr = model_dir + 'speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' 44 | path_asr = path_asr if os.path.exists( 45 | path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" 46 | model_revision = "v2.0.4" 47 | else: 48 | path_asr = model_dir + 'speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online' 49 | path_asr = path_asr if os.path.exists( 50 | path_asr) else "iic/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online" 51 | model_revision = "master" 52 | path_vad = path_punc = vad_model_revision = punc_model_revision = None # ##友情提示:粤语带VAD识别可能会有少量shape 53 | # 不对报错的,但是不带VAD可以.不带vad只能分阶段单独加标点。不过标点模型对粤语效果真的不行… 54 | 55 | return AutoModel( 56 | model=path_asr, 57 | model_revision=model_revision, 58 | vad_model=path_vad, 59 | vad_model_revision=vad_model_revision, 60 | punc_model=path_punc, 61 | punc_model_revision=punc_model_revision, 62 | ) 63 | -------------------------------------------------------------------------------- /server/tool/asr/inference_task_asr.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | # 获取当前脚本所在的目录 6 | sys.path.append(os.getcwd()) 7 | 8 | from server.tool.asr.funasr_asr import LanguageModel 9 | from server.tool.asr.fasterwhisper_asr import FasterWhisperLanguageModel 10 | from server.bean.result_evaluation.obj_inference_task_result_audio import ObjInferenceTaskResultAudio 11 | from server.service.inference_task.inference_task_service import InferenceTaskService 12 | from server.service.result_evaluation.result_evaluation_service import ResultEvaluationService 13 | import argparse 14 | import server.common.log_config as log_config 15 | from server.util.util import str_to_int 16 | from server.dao.data_base_manager import db_config 17 | from server.bean.system.role import Role 18 | 19 | 20 | def asr_task(task_id: int): 21 | zh_model = None 22 | whisper_model = None 23 | 24 | task = InferenceTaskService.find_whole_inference_task_by_id(task_id) 25 | if task is None: 26 | log_config.logger.error(f'task_id:{task_id} not found') 27 | return 28 | 29 | task_result_audio_list = ResultEvaluationService.find_task_result_audio_list_by_task_id(task) 30 | if task_result_audio_list is None or len(task_result_audio_list) == 0: 31 | log_config.logger.error(f'task_id:{task_id} result audio list is empty') 32 | return 33 | 34 | # Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径 35 | all_count = len(task_result_audio_list) 36 | has_processed_count = 0 37 | detail_list = [] 38 | for result_audio in task_result_audio_list: 39 | if result_audio.status != 1 or not result_audio.obj_text or not result_audio.obj_text.text_language: 40 | continue 41 | if result_audio.obj_text.text_language.lower() in ['all_zh','zh', 'yue']: 42 | if zh_model is None: 43 | zh_model = LanguageModel(result_audio.obj_text.text_language) 44 | asr_text = zh_model.generate(result_audio.path) 45 | else: 46 | if whisper_model is None: 47 | whisper_model = FasterWhisperLanguageModel() 48 | asr_text = whisper_model.generate(result_audio.path) 49 | detail = ObjInferenceTaskResultAudio( 50 | id=result_audio.id, 51 | asr_text=asr_text 52 | ) 53 | detail_list.append(detail) 54 | has_processed_count += 1 55 | log_config.logger.info(f'进度:{has_processed_count}/{all_count}') 56 | 57 | ResultEvaluationService.batch_update_result_audio_asr_text(detail_list) 58 | 59 | 60 | def parse_arguments(): 61 | parser = argparse.ArgumentParser(description="Audio processing script arguments") 62 | 63 | # Reference audio path 64 | parser.add_argument("-r", "--role_name", type=str, required=True, 65 | help="Path to the role name.") 66 | 67 | # Reference audio path 68 | parser.add_argument("-c", "--role_category", type=str, required=True, 69 | help="Path to the role category.") 70 | 71 | # Reference audio path 72 | parser.add_argument("-t", "--task_id", type=str, required=True, 73 | help="Path to the task id.") 74 | 75 | return parser.parse_args() 76 | 77 | 78 | if __name__ == '__main__': 79 | cmd = parse_arguments() 80 | db_config.update_db_path(Role(category=cmd.role_category, name=cmd.role_name)) 81 | asr_task( 82 | task_id=str_to_int(cmd.task_id) 83 | ) 84 | 85 | # compare_audio_and_generate_report( 86 | # reference_audio_path="D:/tt/渡鸦/refer_audio_all/也对,你的身份和我们不同吗?.wav", 87 | # comparison_dir_path='D:/tt/渡鸦/refer_audio_all', 88 | # output_file_path='D:/tt/渡鸦/test.txt', 89 | # ) 90 | -------------------------------------------------------------------------------- /server/tool/asr/models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /server/tool/speaker_verification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/tool/speaker_verification/__init__.py -------------------------------------------------------------------------------- /server/tool/speaker_verification/audio_compare.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback 4 | from modelscope.pipelines import pipeline 5 | sys.path.append(os.getcwd()) 6 | from server.common.log_config import logger 7 | 8 | speaker_verification_models = { 9 | 'speech_campplus_sv_zh-cn_16k-common': { 10 | 'task': 'speaker-verification', 11 | 'model': 'server/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common', 12 | 'model_revision': 'v1.0.0' 13 | }, 14 | 'speech_eres2net_sv_zh-cn_16k-common': { 15 | 'task': 'speaker-verification', 16 | 'model': 'server/tool/speaker_verification/models/speech_eres2net_sv_zh-cn_16k-common', 17 | 'model_revision': 'v1.0.5' 18 | } 19 | } 20 | 21 | 22 | def init_model(model_type='speech_campplus_sv_zh-cn_16k-common'): 23 | models = speaker_verification_models 24 | return pipeline( 25 | task=models[model_type]['task'], 26 | model=models[model_type]['model'], 27 | model_revision=models[model_type]['model_revision'] 28 | ) 29 | 30 | 31 | sv_pipeline = init_model() 32 | 33 | 34 | def compare(audio_path_1, audio_path_2): 35 | try: 36 | return sv_pipeline([audio_path_1, audio_path_2])['score'] 37 | except: 38 | logger.error(traceback.format_exc()) 39 | -------------------------------------------------------------------------------- /server/tool/speaker_verification/inference_task_voice_similarity.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | # 获取当前脚本所在的目录 5 | sys.path.append(os.getcwd()) 6 | 7 | from server.tool.speaker_verification.audio_compare import compare 8 | from server.bean.result_evaluation.obj_inference_task_result_audio import ObjInferenceTaskResultAudio 9 | from server.service.inference_task.inference_task_service import InferenceTaskService 10 | from server.service.result_evaluation.result_evaluation_service import ResultEvaluationService 11 | import argparse 12 | import server.common.log_config as log_config 13 | from server.common.time_util import timeit_decorator 14 | from server.util.util import str_to_int 15 | from server.dao.data_base_manager import db_config 16 | from server.bean.system.role import Role 17 | 18 | 19 | @timeit_decorator 20 | def compare_audio_and_generate_report(task_id: int): 21 | task = InferenceTaskService.find_whole_inference_task_by_id(task_id) 22 | if task is None: 23 | log_config.logger.error(f'task_id:{task_id} not found') 24 | return 25 | 26 | task_result_audio_list = ResultEvaluationService.find_task_result_audio_list_by_task_id(task) 27 | if task_result_audio_list is None or len(task_result_audio_list) == 0: 28 | log_config.logger.error(f'task_id:{task_id} result audio list is empty') 29 | return 30 | 31 | # Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径 32 | all_count = len(task_result_audio_list) 33 | has_processed_count = 0 34 | detail_list = [] 35 | for result_audio in task_result_audio_list: 36 | if result_audio.status != 1 or not result_audio.obj_audio: 37 | continue 38 | score = compare(result_audio.path, result_audio.obj_audio.audio_path) 39 | detail = ObjInferenceTaskResultAudio( 40 | id=result_audio.id, 41 | audio_similar_score=score 42 | ) 43 | detail_list.append(detail) 44 | has_processed_count += 1 45 | log_config.logger.info(f'进度:{has_processed_count}/{all_count}') 46 | 47 | ResultEvaluationService.batch_update_result_audio_similar_score(detail_list) 48 | 49 | InferenceTaskService.update_task_execute_audio_similarity(task_id, 1) 50 | 51 | 52 | def parse_arguments(): 53 | parser = argparse.ArgumentParser(description="Audio processing script arguments") 54 | 55 | # Reference audio path 56 | parser.add_argument("-r", "--role_name", type=str, required=True, 57 | help="Path to the role name.") 58 | 59 | # Reference audio path 60 | parser.add_argument("-c", "--role_category", type=str, required=True, 61 | help="Path to the role category.") 62 | 63 | # Reference audio path 64 | parser.add_argument("-t", "--task_id", type=str, required=True, 65 | help="Path to the task id.") 66 | 67 | return parser.parse_args() 68 | 69 | 70 | if __name__ == '__main__': 71 | cmd = parse_arguments() 72 | db_config.update_db_path(Role(category=cmd.role_category, name=cmd.role_name)) 73 | compare_audio_and_generate_report( 74 | task_id=str_to_int(cmd.task_id) 75 | ) 76 | 77 | # compare_audio_and_generate_report( 78 | # reference_audio_path="D:/tt/渡鸦/refer_audio_all/也对,你的身份和我们不同吗?.wav", 79 | # comparison_dir_path='D:/tt/渡鸦/refer_audio_all', 80 | # output_file_path='D:/tt/渡鸦/test.txt', 81 | # ) 82 | -------------------------------------------------------------------------------- /server/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common/保留: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/tool/speaker_verification/models/speech_campplus_sv_zh-cn_16k-common/保留 -------------------------------------------------------------------------------- /server/tool/speaker_verification/voice_similarity.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.append(os.getcwd()) 5 | 6 | from server.tool.speaker_verification.audio_compare import compare 7 | 8 | import argparse 9 | import server.common.log_config as log_config 10 | from server.bean.reference_audio.obj_reference_audio import ObjReferenceAudioFilter 11 | from server.bean.reference_audio.obj_reference_audio_compare_detail import ObjReferenceAudioCompareDetail 12 | from server.common.time_util import timeit_decorator 13 | 14 | from server.service.reference_audio.reference_audio_compare_sevice import ReferenceAudioCompareService 15 | from server.service.reference_audio.reference_audio_service import ReferenceAudioService 16 | from server.util.util import str_to_int 17 | from server.dao.data_base_manager import db_config 18 | from server.bean.system.role import Role 19 | 20 | 21 | @timeit_decorator 22 | def compare_audio_and_generate_report(task_id: int): 23 | task = ReferenceAudioCompareService.get_task_by_id(task_id) 24 | if task is None: 25 | log_config.logger.error(f'task_id:{task_id} not found') 26 | return 27 | 28 | audio = ReferenceAudioService.get_audio_by_id(task.audio_id) 29 | if audio is None: 30 | log_config.logger.error(f'audio_id:{task.audio_id} not found') 31 | ReferenceAudioCompareService.update_task_to_fail(task_id) 32 | return 33 | 34 | compare_audio_list = ReferenceAudioService.find_list(ObjReferenceAudioFilter({ 35 | 'categories': task.category_names 36 | })) 37 | 38 | if len(compare_audio_list) == 0: 39 | log_config.logger.error(f'category:{task.category_names} not found') 40 | ReferenceAudioCompareService.update_task_to_fail(task_id) 41 | return 42 | 43 | ReferenceAudioCompareService.update_task_to_start(task_id) 44 | 45 | reference_audio_path = audio.audio_path 46 | 47 | # Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径 48 | all_count = len(compare_audio_list) 49 | has_processed_count = 0 50 | detail_list = [] 51 | for audio in compare_audio_list: 52 | score = compare(reference_audio_path, audio.audio_path) 53 | detail = ObjReferenceAudioCompareDetail( 54 | task_id=task_id, 55 | compare_audio_id=audio.id, 56 | score=score 57 | ) 58 | detail_list.append(detail) 59 | has_processed_count += 1 60 | log_config.logger.info(f'进度:{has_processed_count}/{all_count}') 61 | 62 | ReferenceAudioCompareService.batch_insert_task_detail(detail_list) 63 | 64 | ReferenceAudioCompareService.update_task_to_finish(task_id) 65 | 66 | 67 | def parse_arguments(): 68 | parser = argparse.ArgumentParser(description="Audio processing script arguments") 69 | 70 | # Reference audio path 71 | parser.add_argument("-r", "--role_name", type=str, required=True, 72 | help="Path to the role name.") 73 | 74 | # Reference audio path 75 | parser.add_argument("-c", "--role_category", type=str, required=True, 76 | help="Path to the role category.") 77 | 78 | # Reference audio path 79 | parser.add_argument("-t", "--task_id", type=str, required=True, 80 | help="Path to the task id.") 81 | 82 | return parser.parse_args() 83 | 84 | 85 | if __name__ == '__main__': 86 | cmd = parse_arguments() 87 | db_config.update_db_path(Role(category=cmd.role_category, name=cmd.role_name)) 88 | compare_audio_and_generate_report( 89 | task_id=str_to_int(cmd.task_id) 90 | ) 91 | 92 | # compare_audio_and_generate_report( 93 | # reference_audio_path="D:/tt/渡鸦/refer_audio_all/也对,你的身份和我们不同吗?.wav", 94 | # comparison_dir_path='D:/tt/渡鸦/refer_audio_all', 95 | # output_file_path='D:/tt/渡鸦/test.txt', 96 | # ) 97 | -------------------------------------------------------------------------------- /server/tool/text_comparison/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/tool/text_comparison/__init__.py -------------------------------------------------------------------------------- /server/tool/text_comparison/asr_text_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.getcwd()) 5 | from server.bean.result_evaluation.obj_inference_task_result_audio import ObjInferenceTaskResultAudio 6 | from server.common import log_config 7 | from server.dao.data_base_manager import db_config 8 | from server.service.inference_task.inference_task_service import InferenceTaskService 9 | from server.service.result_evaluation.result_evaluation_service import ResultEvaluationService 10 | from server.tool.text_comparison.text_comparison import calculate_result 11 | from server.util.util import str_to_int 12 | import argparse 13 | from server.bean.system.role import Role 14 | 15 | 16 | def process(task_id: int): 17 | task = InferenceTaskService.find_whole_inference_task_by_id(task_id) 18 | if task is None: 19 | log_config.logger.error(f'task_id:{task_id} not found') 20 | return 21 | 22 | task_result_audio_list = ResultEvaluationService.find_task_result_audio_list_by_task_id(task) 23 | if task_result_audio_list is None or len(task_result_audio_list) == 0: 24 | log_config.logger.error(f'task_id:{task_id} result audio list is empty') 25 | return 26 | 27 | # Step 2: 用参考音频依次比较音频目录下的每个音频,获取相似度分数及对应路径 28 | all_count = len(task_result_audio_list) 29 | has_processed_count = 0 30 | detail_list = [] 31 | for result_audio in task_result_audio_list: 32 | if result_audio.status != 1 or not result_audio.obj_text or not result_audio.asr_text: 33 | continue 34 | _, adjusted_similarity_score2 = calculate_result(result_audio.asr_text, result_audio.obj_text.text_content) 35 | detail = ObjInferenceTaskResultAudio( 36 | id=result_audio.id, 37 | asr_similar_score=adjusted_similarity_score2 38 | ) 39 | detail_list.append(detail) 40 | has_processed_count += 1 41 | log_config.logger.info(f'进度:{has_processed_count}/{all_count}') 42 | 43 | ResultEvaluationService.batch_update_result_asr_similar_score(detail_list) 44 | 45 | InferenceTaskService.update_task_execute_text_similarity(task_id, 1) 46 | 47 | 48 | def parse_arguments(): 49 | parser = argparse.ArgumentParser(description="Audio processing script arguments") 50 | 51 | # Reference audio path 52 | parser.add_argument("-r", "--role_name", type=str, required=True, 53 | help="Path to the role name.") 54 | 55 | # Reference audio path 56 | parser.add_argument("-c", "--role_category", type=str, required=True, 57 | help="Path to the role category.") 58 | 59 | # Reference audio path 60 | parser.add_argument("-t", "--task_id", type=str, required=True, 61 | help="Path to the task id.") 62 | 63 | return parser.parse_args() 64 | 65 | 66 | if __name__ == '__main__': 67 | cmd = parse_arguments() 68 | db_config.update_db_path(Role(category=cmd.role_category, name=cmd.role_name)) 69 | process( 70 | task_id=str_to_int(cmd.task_id) 71 | ) 72 | 73 | # compare_audio_and_generate_report( 74 | # reference_audio_path="D:/tt/渡鸦/refer_audio_all/也对,你的身份和我们不同吗?.wav", 75 | # comparison_dir_path='D:/tt/渡鸦/refer_audio_all', 76 | # output_file_path='D:/tt/渡鸦/test.txt', 77 | # ) 78 | -------------------------------------------------------------------------------- /server/tool/text_comparison/models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /server/tool/text_comparison/text_comparison.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import traceback 5 | from transformers import AutoTokenizer, AutoModel 6 | from scipy.spatial.distance import cosine 7 | 8 | sys.path.append(os.getcwd()) 9 | 10 | from server.common.log_config import logger 11 | import server.common.config_params as params 12 | 13 | # bert_path = "server/tool/text_comparison/models/chinese-roberta-wwm-ext-large" 14 | 15 | # 获取当前脚本所在的绝对路径 16 | current_dir = os.getcwd() 17 | # 计算上上级目录的绝对路径 18 | api_dir = os.path.join(current_dir, params.gsv2_dir) 19 | bert_path = os.path.join(api_dir, 'GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large') 20 | 21 | bert_path = os.environ.get( 22 | "bert_path", bert_path 23 | ) 24 | 25 | # Set device to GPU if available, else CPU 26 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 27 | 28 | logger.info(f'使用计算设备: {device}') 29 | 30 | tokenizer = AutoTokenizer.from_pretrained(bert_path) 31 | model = AutoModel.from_pretrained(bert_path).to(device) 32 | 33 | 34 | def calculate_similarity(text1, text2, max_length=512): 35 | try: 36 | # 预处理文本,设置最大长度 37 | inputs1 = tokenizer(text1, padding=True, truncation=True, max_length=max_length, return_tensors='pt').to(device) 38 | inputs2 = tokenizer(text2, padding=True, truncation=True, max_length=max_length, return_tensors='pt').to(device) 39 | 40 | # 获取句子向量(这里是取CLS token的向量并展平为一维) 41 | with torch.no_grad(): 42 | encoded_text1 = model(**inputs1)[0][:, 0, :].flatten() 43 | encoded_text2 = model(**inputs2)[0][:, 0, :].flatten() 44 | 45 | # 确保转换为numpy数组并且是一维的 46 | similarity = 1 - cosine(encoded_text1.cpu().numpy().flatten(), encoded_text2.cpu().numpy().flatten()) 47 | 48 | return similarity 49 | except: 50 | logger.error(traceback.format_exc()) 51 | 52 | 53 | # 对boundary到1区间的值进行放大 54 | def adjusted_similarity(similarity_score2, boundary): 55 | if similarity_score2 < boundary: 56 | return 0 57 | 58 | # 倍数 59 | multiple = 1 / (1 - boundary) 60 | 61 | adjusted_score = (similarity_score2 - boundary) * multiple 62 | 63 | return adjusted_score 64 | 65 | 66 | def calculate_result(t1, t2, boundary=0.9): 67 | # 计算并打印相似度 68 | similarity_score2 = calculate_similarity(t1, t2) 69 | 70 | # 调整相似度 71 | adjusted_similarity_score2 = adjusted_similarity(similarity_score2, boundary) 72 | 73 | return similarity_score2, adjusted_similarity_score2 74 | 75 | 76 | def print_result(t1, t2, boundary): 77 | print(f't2: {t2}') 78 | # 计算并打印相似度 79 | similarity_score2 = calculate_similarity(t1, t2) 80 | print(f"两句话的相似度为: {similarity_score2:.4f}") 81 | 82 | # 调整相似度 83 | adjusted_similarity_score2 = adjusted_similarity(similarity_score2, boundary) 84 | print(f"调整后的相似度为: {adjusted_similarity_score2:.4f}") 85 | 86 | 87 | def test(boundary): 88 | # 原始文本 89 | text1 = "这是第一个句子" 90 | list = """ 91 | 这是第一个句子 92 | 这是第二个句子。 93 | 那么,这是第三个表达。 94 | 当前呈现的是第四个句子。 95 | 接下来,我们有第五句话。 96 | 在此,展示第六条陈述。 97 | 继续下去,这是第七个短句。 98 | 不容忽视的是第八个表述。 99 | 顺延着序列,这是第九句。 100 | 此处列举的是第十个说法。 101 | 进入新的篇章,这是第十一个句子。 102 | 下一段内容即为第十二个句子。 103 | 显而易见,这是第十三个叙述。 104 | 渐进地,我们来到第十四句话。 105 | 向下滚动,您会看到第十五个表达。 106 | 此刻,呈现在眼前的是第十六个句子。 107 | 它们中的一个——第十七个句子在此。 108 | 如同链条般连接,这是第十八个断言。 109 | 按照顺序排列,接下来是第十九个话语。 110 | 逐一列举,这是第二十个陈述句。 111 | 结构相似,本例给出第二十一个实例句。 112 | 这是最初的陈述句。 113 | 首先表达的是这一个句子。 114 | 第一句内容即为此处所示。 115 | 这是起始的叙述段落。 116 | 开篇所展示的第一句话就是这个。 117 | 明媚的阳光洒满大地 118 | 窗外飘落粉色樱花瓣 119 | 笔尖轻触纸面思绪万千 120 | 深夜的月光如水般静谧 121 | 穿越丛林的小径蜿蜒曲折 122 | 浅酌清茶品味人生百态 123 | 破晓时分雄鸡一唱天下白 124 | 草原上奔驰的骏马无拘无束 125 | 秋叶纷飞描绘季节更替画卷 126 | 寒冬雪夜炉火旁围坐共话家常 127 | kszdRjYXw 128 | pfsMgTlVHnB 129 | uQaGxIbWz 130 | ZtqNhPmKcOe 131 | jfyrXsStVUo 132 | wDiEgLkZbn 133 | yhNvAfUmqC 134 | TpKjxMrWgs 135 | eBzHUaFJtYd 136 | oQnXcVSiPkL 137 | 00000 138 | """ 139 | list2 = list.strip().split('\n') 140 | for item in list2: 141 | print_result(text1, item, boundary) 142 | 143 | 144 | if __name__ == '__main__': 145 | test(0.9) 146 | -------------------------------------------------------------------------------- /server/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/server/util/__init__.py -------------------------------------------------------------------------------- /server_api.bat: -------------------------------------------------------------------------------- 1 | @REM ..\\GPT-SoVITS-v2-240821\\runtime\\python.exe server_api.py 2 | ..\\GPT-SoVITS-v3lora-20250228\\runtime\\python.exe server_api.py 3 | pause -------------------------------------------------------------------------------- /server_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import webbrowser 5 | 6 | sys.path.append(os.getcwd()) 7 | 8 | from fastapi import FastAPI, Request 9 | from fastapi.exceptions import RequestValidationError 10 | from fastapi.middleware.cors import CORSMiddleware 11 | from starlette.responses import JSONResponse 12 | from starlette.staticfiles import StaticFiles 13 | 14 | from server.common.custom_exception import CustomException 15 | from server.common.log_config import logger 16 | from server.common.ras_api_monitor import RasApiMonitor 17 | from server.common.response_result import ResponseResult 18 | from server.controller.reference_audio.reference_audio_controller import router as audio_router 19 | from server.controller.inference_task.inference_task_controller import router as task_router 20 | from server.controller.long_text_inference.long_text_inference_controller import router as long_text_router 21 | from server.controller.result_evaluation.result_evaluation_controller import router as result_evaluation_router 22 | from server.controller.finished_product.finished_product_controller import router as finished_product_router 23 | from server.controller.system.system_controller import router as system_router 24 | from server.controller.sound_fusion.sound_fusion_controller import router as sound_fusion_router 25 | from server.controller.tts_correction.tts_correction_controller import router as tts_correction_router 26 | from server.controller.text.text_controller import router as text_controller 27 | from server.dao.data_base_manager import db_config 28 | from server.common import config_params 29 | from server.service.system.system_service import SystemService 30 | 31 | app = FastAPI() 32 | app.add_middleware( 33 | CORSMiddleware, 34 | allow_origins=["*"], # 允许所有域 35 | allow_credentials=True, 36 | allow_methods=["*"], # 允许所有方法 37 | allow_headers=["*"], # 允许所有头 38 | ) 39 | 40 | 41 | # 自定义异常处理器 42 | @app.exception_handler(CustomException) 43 | async def custom_exception_handler(request: Request, exc: CustomException): 44 | result = ResponseResult(code=1, msg=exc.message) 45 | return JSONResponse(content=result.to_dict(), status_code=200) 46 | 47 | 48 | # 注册默认的异常处理器 49 | @app.exception_handler(Exception) 50 | async def validation_exception_handler(request, exc: Exception): 51 | logger.error(exc) 52 | result = ResponseResult(code=1, msg=str(exc)) 53 | return JSONResponse(content=result.to_dict(), status_code=500) 54 | 55 | 56 | # 注册路由 57 | app.include_router(audio_router) 58 | app.include_router(task_router) 59 | app.include_router(long_text_router) 60 | app.include_router(result_evaluation_router) 61 | app.include_router(finished_product_router) 62 | app.include_router(system_router) 63 | app.include_router(sound_fusion_router) 64 | app.include_router(tts_correction_router) 65 | app.include_router(text_controller) 66 | 67 | # Mount static files directory 68 | app.mount("/static", StaticFiles(directory="."), name="static") 69 | 70 | if __name__ == "__main__": 71 | import uvicorn 72 | 73 | url = f"http://localhost:{config_params.service_port}/static/main.html?apiPort={config_params.api_port}" 74 | print(f"Open url: {url}") 75 | webbrowser.open(url) 76 | # 测试 77 | db_config.init_master_db_path() 78 | role = SystemService.get_valid_role() 79 | if role: 80 | db_config.update_db_path(role) 81 | uvicorn.run(app, host="127.0.0.1", port=config_params.service_port) 82 | -------------------------------------------------------------------------------- /web/config.js: -------------------------------------------------------------------------------- 1 | const SysConfig = (function () { 2 | 3 | const openTips = false //是否开启提示 4 | 5 | const languageList = [ 6 | { 7 | name: '中文(all_zh)', 8 | code: 'all_zh' 9 | }, 10 | { 11 | name: '粤语(all_yue)', 12 | code: 'all_yue' 13 | }, 14 | { 15 | name: '英文(en)', 16 | code: 'en' 17 | }, 18 | { 19 | name: '日文(all_ja)', 20 | code: 'all_ja' 21 | }, 22 | { 23 | name: '韩文(all_ko)', 24 | code: 'all_ko' 25 | }, 26 | { 27 | name: '中英混合(zh)', 28 | code: 'zh' 29 | }, 30 | { 31 | name: '粤英混合(yue)', 32 | code: 'yue' 33 | }, 34 | { 35 | name: '日英混合(ja)', 36 | code: 'ja' 37 | }, 38 | { 39 | name: '韩英混合(ko)', 40 | code: 'ko' 41 | }, 42 | { 43 | name: '多语种混合(auto)', 44 | code: 'auto' 45 | }, 46 | { 47 | name: '多语种混合(粤语)(auto_yue)', 48 | code: 'auto_yue' 49 | } 50 | ] 51 | 52 | const kpt = ` 53 |
以下内容说明来自官方文档:
54 | 55 | 关于top_p,top_k和temperature 56 | 这三个值都是用来控制采样的。在推理的时候要挑出一个最好的token,但机器并不知道哪个是最好的。于是先按照top_k挑出前几个token,top_p在top_k的基础上筛选token。最后temperature控制随机性输出。 57 | 比如总共有100个token,top_k设置5,top_p设置0.6,temperature设置为0.5。那么就会从100个token中先挑出5个概率最大的token,这五个token的概率分别是(0.3,0.3,0.2,0.2,0.1),那么再挑出累加概率不超过0.6的token(0.3和0.3), 58 | 再从这两个token中随机挑出一个token输出,其中前一个token被挑选到的几率更大。以此类推 59 | 60 | ` 61 | 62 | const temperature = ` 63 |
以下内容说明来自deepseek:
64 |
temperature 控制生成随机性:0-1 更确定,1 为默认,1+ 更随机。
65 | ${kpt} 66 | ` 67 | 68 | const topK = ` 69 |
以下内容说明来自deepseek:
70 |
topK 控制候选词数量:按概率排序,仅从最高 K 个词中采样,K 越小输出越集中,K 越大输出越多样。
71 | ${kpt} 72 | ` 73 | 74 | const topP = ` 75 |
以下内容说明来自deepseek:
76 |
topP(核采样)控制候选词范围:按概率排序,仅从累积概率超过 P 的最高概率词中采样(P 取值范围 0-1),P 越小输出越集中,P 越大输出越多样。
77 | ${kpt} 78 | ` 79 | 80 | const tippyDesc = { 81 | 'gptSovitsVersion': 'GptSoVits模型版本', 82 | 'gptModelName': 'Gpt模型名称', 83 | 'vitsModelName': 'SoVits模型名称', 84 | 'topK': topK, 85 | 'topP': topP, 86 | 'temperature': temperature, 87 | 'textDelimiter': '文本分隔符,GptSoVits模型不适合一次性推理长文本,设置此参数可将推送文本在api服务端进行二次切分', 88 | 'speed': '设置音频语速,1为默认', 89 | 'sampleSteps': '采样步数,仅v3有效', 90 | 'ifSr': '启动超分会将音频采样频率从24000超分为48000', 91 | 'inpRefsList': '融合音频,在【参考音频】tab下,点击【融合音频】进入管理界面,可选项:通过选择多个音频(建议同性),平均融合他们的音色。如不选择此项,音色由参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。', 92 | } 93 | 94 | const defaultGptSovitsVersion = 'v3' 95 | // const defaultTopK = 12 96 | const defaultTopK = 15 97 | // const defaultTopP = 0.6 98 | const defaultTopP = 1 99 | // const defaultTemperature = 0.6 100 | const defaultTemperature = 1 101 | const defaultTextDelimiter = `,.;?!、,。?!;:…"` 102 | const defaultSpeed = 1.0 103 | const defaultSampleSteps = 32 //采样步数 104 | const defaultIfSr = 0 //0 不超分 1 超分 105 | 106 | const defaultDialogWidth = '95%' 107 | const defaultDialogHeight = '95%' 108 | 109 | return { 110 | tippyDesc: tippyDesc, 111 | languageList: languageList, 112 | defaultGptSovitsVersion: defaultGptSovitsVersion, 113 | defaultTopK: defaultTopK, 114 | defaultTopP: defaultTopP, 115 | defaultTemperature: defaultTemperature, 116 | defaultTextDelimiter: defaultTextDelimiter, 117 | defaultSpeed: defaultSpeed, 118 | defaultSampleSteps: defaultSampleSteps, 119 | defaultIfSr: defaultIfSr, 120 | defaultDialogWidth: defaultDialogWidth, 121 | defaultDialogHeight: defaultDialogHeight, 122 | openTips: openTips 123 | } 124 | })() 125 | 126 | const BaseUrl = `http://localhost:${window.location.port}/` 127 | // const BaseUrl = `http://localhost:9000/` 128 | const RasApiUrl = (() => { 129 | const url = window.location.href; 130 | const params = new URLSearchParams(url.split('?')[1]); 131 | let apiPort = params.get('apiPort'); // 假设apiPort总是存在 132 | 133 | console.log('API Port:', apiPort); 134 | 135 | // apiPort = 8002 136 | return `http://localhost:${apiPort}/`; 137 | })() -------------------------------------------------------------------------------- /web/image/top_p top_k temperauter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/web/image/top_p top_k temperauter.png -------------------------------------------------------------------------------- /web/js/layui/font/iconfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/web/js/layui/font/iconfont.eot -------------------------------------------------------------------------------- /web/js/layui/font/iconfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/web/js/layui/font/iconfont.ttf -------------------------------------------------------------------------------- /web/js/layui/font/iconfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/web/js/layui/font/iconfont.woff -------------------------------------------------------------------------------- /web/js/layui/font/iconfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Downupanddownup/RefAudioSelectorV2-BaseOn-GptSoVits/bae12022911de4f1546977b154489e246d0acdeb/web/js/layui/font/iconfont.woff2 -------------------------------------------------------------------------------- /web/js/tippy/light-border.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-theme~=light-border]{background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,8,16,.15);color:#333;box-shadow:0 4px 14px -2px rgba(0,8,16,.08)}.tippy-box[data-theme~=light-border]>.tippy-backdrop{background-color:#fff}.tippy-box[data-theme~=light-border]>.tippy-arrow:after,.tippy-box[data-theme~=light-border]>.tippy-svg-arrow:after{content:"";position:absolute;z-index:-1}.tippy-box[data-theme~=light-border]>.tippy-arrow:after{border-color:transparent;border-style:solid}.tippy-box[data-theme~=light-border][data-placement^=top]>.tippy-arrow:before{border-top-color:#fff}.tippy-box[data-theme~=light-border][data-placement^=top]>.tippy-arrow:after{border-top-color:rgba(0,8,16,.2);border-width:7px 7px 0;top:17px;left:1px}.tippy-box[data-theme~=light-border][data-placement^=top]>.tippy-svg-arrow>svg{top:16px}.tippy-box[data-theme~=light-border][data-placement^=top]>.tippy-svg-arrow:after{top:17px}.tippy-box[data-theme~=light-border][data-placement^=bottom]>.tippy-arrow:before{border-bottom-color:#fff;bottom:16px}.tippy-box[data-theme~=light-border][data-placement^=bottom]>.tippy-arrow:after{border-bottom-color:rgba(0,8,16,.2);border-width:0 7px 7px;bottom:17px;left:1px}.tippy-box[data-theme~=light-border][data-placement^=bottom]>.tippy-svg-arrow>svg{bottom:16px}.tippy-box[data-theme~=light-border][data-placement^=bottom]>.tippy-svg-arrow:after{bottom:17px}.tippy-box[data-theme~=light-border][data-placement^=left]>.tippy-arrow:before{border-left-color:#fff}.tippy-box[data-theme~=light-border][data-placement^=left]>.tippy-arrow:after{border-left-color:rgba(0,8,16,.2);border-width:7px 0 7px 7px;left:17px;top:1px}.tippy-box[data-theme~=light-border][data-placement^=left]>.tippy-svg-arrow>svg{left:11px}.tippy-box[data-theme~=light-border][data-placement^=left]>.tippy-svg-arrow:after{left:12px}.tippy-box[data-theme~=light-border][data-placement^=right]>.tippy-arrow:before{border-right-color:#fff;right:16px}.tippy-box[data-theme~=light-border][data-placement^=right]>.tippy-arrow:after{border-width:7px 7px 7px 0;right:17px;top:1px;border-right-color:rgba(0,8,16,.2)}.tippy-box[data-theme~=light-border][data-placement^=right]>.tippy-svg-arrow>svg{right:11px}.tippy-box[data-theme~=light-border][data-placement^=right]>.tippy-svg-arrow:after{right:12px}.tippy-box[data-theme~=light-border]>.tippy-svg-arrow{fill:#fff}.tippy-box[data-theme~=light-border]>.tippy-svg-arrow:after{background-image:url(data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iNiIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMCA2czEuNzk2LS4wMTMgNC42Ny0zLjYxNUM1Ljg1MS45IDYuOTMuMDA2IDggMGMxLjA3LS4wMDYgMi4xNDguODg3IDMuMzQzIDIuMzg1QzE0LjIzMyA2LjAwNSAxNiA2IDE2IDZIMHoiIGZpbGw9InJnYmEoMCwgOCwgMTYsIDAuMikiLz48L3N2Zz4=);background-size:16px 6px;width:16px;height:6px} -------------------------------------------------------------------------------- /web/js/tippy/light.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-theme~=light]{color:#26323d;box-shadow:0 0 20px 4px rgba(154,161,177,.15),0 4px 80px -8px rgba(36,40,47,.25),0 4px 4px -2px rgba(91,94,105,.15);background-color:#fff}.tippy-box[data-theme~=light][data-placement^=top]>.tippy-arrow:before{border-top-color:#fff}.tippy-box[data-theme~=light][data-placement^=bottom]>.tippy-arrow:before{border-bottom-color:#fff}.tippy-box[data-theme~=light][data-placement^=left]>.tippy-arrow:before{border-left-color:#fff}.tippy-box[data-theme~=light][data-placement^=right]>.tippy-arrow:before{border-right-color:#fff}.tippy-box[data-theme~=light]>.tippy-backdrop{background-color:#fff}.tippy-box[data-theme~=light]>.tippy-svg-arrow{fill:#fff} -------------------------------------------------------------------------------- /web/js/tippy/material.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-theme~=material]{background-color:#505355;font-weight:600}.tippy-box[data-theme~=material][data-placement^=top]>.tippy-arrow:before{border-top-color:#505355}.tippy-box[data-theme~=material][data-placement^=bottom]>.tippy-arrow:before{border-bottom-color:#505355}.tippy-box[data-theme~=material][data-placement^=left]>.tippy-arrow:before{border-left-color:#505355}.tippy-box[data-theme~=material][data-placement^=right]>.tippy-arrow:before{border-right-color:#505355}.tippy-box[data-theme~=material]>.tippy-backdrop{background-color:#505355}.tippy-box[data-theme~=material]>.tippy-svg-arrow{fill:#505355} -------------------------------------------------------------------------------- /web/js/tippy/scale-extreme.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-animation=scale-extreme][data-placement^=top]{transform-origin:bottom}.tippy-box[data-animation=scale-extreme][data-placement^=bottom]{transform-origin:top}.tippy-box[data-animation=scale-extreme][data-placement^=left]{transform-origin:right}.tippy-box[data-animation=scale-extreme][data-placement^=right]{transform-origin:left}.tippy-box[data-animation=scale-extreme][data-state=hidden]{transform:scale(0);opacity:.25} -------------------------------------------------------------------------------- /web/js/tippy/scale-subtle.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-animation=scale-subtle][data-placement^=top]{transform-origin:bottom}.tippy-box[data-animation=scale-subtle][data-placement^=bottom]{transform-origin:top}.tippy-box[data-animation=scale-subtle][data-placement^=left]{transform-origin:right}.tippy-box[data-animation=scale-subtle][data-placement^=right]{transform-origin:left}.tippy-box[data-animation=scale-subtle][data-state=hidden]{transform:scale(.8);opacity:0} -------------------------------------------------------------------------------- /web/js/tippy/scale.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-animation=scale][data-placement^=top]{transform-origin:bottom}.tippy-box[data-animation=scale][data-placement^=bottom]{transform-origin:top}.tippy-box[data-animation=scale][data-placement^=left]{transform-origin:right}.tippy-box[data-animation=scale][data-placement^=right]{transform-origin:left}.tippy-box[data-animation=scale][data-state=hidden]{transform:scale(.5);opacity:0} -------------------------------------------------------------------------------- /web/js/tippy/tippy.css: -------------------------------------------------------------------------------- 1 | .tippy-tooltip[data-animation=fade][data-state=hidden]{opacity:0}.tippy-iOS{cursor:pointer!important;-webkit-tap-highlight-color:transparent}.tippy-popper{pointer-events:none;max-width:calc(100vw - 10px);transition-timing-function:cubic-bezier(.165,.84,.44,1);transition-property:transform}.tippy-tooltip{position:relative;color:#fff;border-radius:4px;font-size:14px;line-height:1.4;background-color:#333;transition-property:visibility,opacity,transform;outline:0}.tippy-tooltip[data-placement^=top]>.tippy-arrow{border-width:8px 8px 0;border-top-color:#333;margin:0 3px;transform-origin:50% 0;bottom:-7px}.tippy-tooltip[data-placement^=bottom]>.tippy-arrow{border-width:0 8px 8px;border-bottom-color:#333;margin:0 3px;transform-origin:50% 7px;top:-7px}.tippy-tooltip[data-placement^=left]>.tippy-arrow{border-width:8px 0 8px 8px;border-left-color:#333;margin:3px 0;transform-origin:0 50%;right:-7px}.tippy-tooltip[data-placement^=right]>.tippy-arrow{border-width:8px 8px 8px 0;border-right-color:#333;margin:3px 0;transform-origin:7px 50%;left:-7px}.tippy-tooltip[data-interactive][data-state=visible]{pointer-events:auto}.tippy-tooltip[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{position:absolute;border-color:transparent;border-style:solid}.tippy-content{padding:5px 9px} -------------------------------------------------------------------------------- /web/js/tippy/translucent.css: -------------------------------------------------------------------------------- 1 | .tippy-box[data-theme~=translucent]{background-color:rgba(0,0,0,.7)}.tippy-box[data-theme~=translucent]>.tippy-arrow{width:14px;height:14px}.tippy-box[data-theme~=translucent][data-placement^=top]>.tippy-arrow:before{border-width:7px 7px 0;border-top-color:rgba(0,0,0,.7)}.tippy-box[data-theme~=translucent][data-placement^=bottom]>.tippy-arrow:before{border-width:0 7px 7px;border-bottom-color:rgba(0,0,0,.7)}.tippy-box[data-theme~=translucent][data-placement^=left]>.tippy-arrow:before{border-width:7px 0 7px 7px;border-left-color:rgba(0,0,0,.7)}.tippy-box[data-theme~=translucent][data-placement^=right]>.tippy-arrow:before{border-width:7px 7px 7px 0;border-right-color:rgba(0,0,0,.7)}.tippy-box[data-theme~=translucent]>.tippy-backdrop{background-color:rgba(0,0,0,.7)}.tippy-box[data-theme~=translucent]>.tippy-svg-arrow{fill:rgba(0,0,0,.7)} -------------------------------------------------------------------------------- /web/js/wavesurfer/hover.min.js: -------------------------------------------------------------------------------- 1 | !function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):((e="undefined"!=typeof globalThis?globalThis:e||self).WaveSurfer=e.WaveSurfer||{},e.WaveSurfer.Hover=t())}(this,(function(){"use strict";class e{constructor(){this.listeners={}}on(e,t,s){if(this.listeners[e]||(this.listeners[e]=new Set),this.listeners[e].add(t),null==s?void 0:s.once){const s=()=>{this.un(e,s),this.un(e,t)};return this.on(e,s),s}return()=>this.un(e,t)}un(e,t){var s;null===(s=this.listeners[e])||void 0===s||s.delete(t)}once(e,t){return this.on(e,t,{once:!0})}unAll(){this.listeners={}}emit(e,...t){this.listeners[e]&&this.listeners[e].forEach((e=>e(...t)))}}class t extends e{constructor(e){super(),this.subscriptions=[],this.options=e}onInit(){}_init(e){this.wavesurfer=e,this.onInit()}destroy(){this.emit("destroy"),this.subscriptions.forEach((e=>e()))}}function s(e,t){const i=t.xmlns?document.createElementNS(t.xmlns,e):document.createElement(e);for(const[e,n]of Object.entries(t))if("children"===e)for(const[e,n]of Object.entries(t))"string"==typeof n?i.appendChild(document.createTextNode(n)):i.appendChild(s(e,n));else"style"===e?Object.assign(i.style,n):"textContent"===e?i.textContent=n:i.setAttribute(e,n.toString());return i}function i(e,t,i){const n=s(e,t||{});return null==i||i.appendChild(n),n}const n={lineWidth:1,labelSize:11,formatTimeCallback:e=>`${Math.floor(e/60)}:${`0${Math.floor(e)%60}`.slice(-2)}`};class o extends t{constructor(e){super(e||{}),this.unsubscribe=()=>{},this.onPointerMove=e=>{if(!this.wavesurfer)return;const t=this.wavesurfer.getWrapper().getBoundingClientRect(),{width:s}=t,i=e.clientX-t.left,n=Math.min(1,Math.max(0,i/s)),o=Math.min(s-this.options.lineWidth-1,i);this.wrapper.style.transform=`translateX(${o}px)`,this.wrapper.style.opacity="1";const r=this.wavesurfer.getDuration()||0;this.label.textContent=this.options.formatTimeCallback(r*n);const a=this.label.offsetWidth;this.label.style.transform=o+a>s?`translateX(-${a+this.options.lineWidth}px)`:"",this.emit("hover",n)},this.onPointerLeave=()=>{this.wrapper.style.opacity="0"},this.options=Object.assign({},n,e),this.wrapper=i("div",{part:"hover"}),this.label=i("span",{part:"hover-label"},this.wrapper)}static create(e){return new o(e)}addUnits(e){return`${e}${"number"==typeof e?"px":""}`}onInit(){if(!this.wavesurfer)throw Error("WaveSurfer is not initialized");const e=this.wavesurfer.options,t=this.options.lineColor||e.cursorColor||e.progressColor;Object.assign(this.wrapper.style,{position:"absolute",zIndex:10,left:0,top:0,height:"100%",pointerEvents:"none",borderLeft:`${this.addUnits(this.options.lineWidth)} solid ${t}`,opacity:"0",transition:"opacity .1s ease-in"}),Object.assign(this.label.style,{display:"block",backgroundColor:this.options.labelBackground,color:this.options.labelColor,fontSize:`${this.addUnits(this.options.labelSize)}`,transition:"transform .1s ease-in",padding:"2px 3px"});const s=this.wavesurfer.getWrapper();s.appendChild(this.wrapper),s.addEventListener("pointermove",this.onPointerMove),s.addEventListener("pointerleave",this.onPointerLeave),s.addEventListener("wheel",this.onPointerMove),this.unsubscribe=()=>{s.removeEventListener("pointermove",this.onPointerMove),s.removeEventListener("pointerleave",this.onPointerLeave),s.removeEventListener("wheel",this.onPointerLeave)}}destroy(){super.destroy(),this.unsubscribe(),this.wrapper.remove()}}return o})); 2 | -------------------------------------------------------------------------------- /web/js/wavesurfer/timeline.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"undefined"!=typeof module?module.exports=e():"function"==typeof define&&define.amd?define(e):((t="undefined"!=typeof globalThis?globalThis:t||self).WaveSurfer=t.WaveSurfer||{},t.WaveSurfer.Timeline=e())}(this,(function(){"use strict";class t{constructor(){this.listeners={}}on(t,e,i){if(this.listeners[t]||(this.listeners[t]=new Set),this.listeners[t].add(e),null==i?void 0:i.once){const i=()=>{this.un(t,i),this.un(t,e)};return this.on(t,i),i}return()=>this.un(t,e)}un(t,e){var i;null===(i=this.listeners[t])||void 0===i||i.delete(e)}once(t,e){return this.on(t,e,{once:!0})}unAll(){this.listeners={}}emit(t,...e){this.listeners[t]&&this.listeners[t].forEach((t=>t(...e)))}}class e extends t{constructor(t){super(),this.subscriptions=[],this.options=t}onInit(){}_init(t){this.wavesurfer=t,this.onInit()}destroy(){this.emit("destroy"),this.subscriptions.forEach((t=>t()))}}function i(t,e){const n=e.xmlns?document.createElementNS(e.xmlns,t):document.createElement(t);for(const[t,s]of Object.entries(e))if("children"===t)for(const[t,s]of Object.entries(e))"string"==typeof s?n.appendChild(document.createTextNode(s)):n.appendChild(i(t,s));else"style"===t?Object.assign(n.style,s):"textContent"===t?n.textContent=s:n.setAttribute(t,s.toString());return n}function n(t,e,n){return i(t,e||{})}const s={height:20,formatTimeCallback:t=>{if(t/60>1){return`${Math.floor(t/60)}:${`${(t=Math.round(t%60))<10?"0":""}${t}`}`}return`${Math.round(1e3*t)/1e3}`}};class r extends e{constructor(t){super(t||{}),this.options=Object.assign({},s,t),this.timelineWrapper=this.initTimelineWrapper()}static create(t){return new r(t)}onInit(){var t;if(!this.wavesurfer)throw Error("WaveSurfer is not initialized");let e=this.wavesurfer.getWrapper();if(this.options.container instanceof HTMLElement)e=this.options.container;else if("string"==typeof this.options.container){const t=document.querySelector(this.options.container);if(!t)throw Error(`No Timeline container found matching ${this.options.container}`);e=t}this.options.insertPosition?(e.firstElementChild||e).insertAdjacentElement(this.options.insertPosition,this.timelineWrapper):e.appendChild(this.timelineWrapper),this.subscriptions.push(this.wavesurfer.on("redraw",(()=>this.initTimeline()))),((null===(t=this.wavesurfer)||void 0===t?void 0:t.getDuration())||this.options.duration)&&this.initTimeline()}destroy(){this.timelineWrapper.remove(),super.destroy()}initTimelineWrapper(){return n("div",{part:"timeline-wrapper",style:{pointerEvents:"none"}})}defaultTimeInterval(t){return t>=25?1:5*t>=25?5:15*t>=25?15:60*Math.ceil(.5/t)}defaultPrimaryLabelInterval(t){return t>=25?10:5*t>=25?6:4}defaultSecondaryLabelInterval(t){return t>=25?5:2}virtualAppend(t,e,i){let n=!1;const s=(s,r)=>{if(!this.wavesurfer)return;const o=i.clientWidth,l=t>s&&t+o{s(i,n)})))}initTimeline(){var t,e,i,s,r,o,l,a;const h=null!==(i=null!==(e=null===(t=this.wavesurfer)||void 0===t?void 0:t.getDuration())&&void 0!==e?e:this.options.duration)&&void 0!==i?i:0,p=((null===(s=this.wavesurfer)||void 0===s?void 0:s.getWrapper().scrollWidth)||this.timelineWrapper.scrollWidth)/h,u=null!==(r=this.options.timeInterval)&&void 0!==r?r:this.defaultTimeInterval(p),c=null!==(o=this.options.primaryLabelInterval)&&void 0!==o?o:this.defaultPrimaryLabelInterval(p),d=this.options.primaryLabelSpacing,f=null!==(l=this.options.secondaryLabelInterval)&&void 0!==l?l:this.defaultSecondaryLabelInterval(p),v=this.options.secondaryLabelSpacing,m="beforebegin"===this.options.insertPosition,y=n("div",{style:Object.assign({height:`${this.options.height}px`,overflow:"hidden",fontSize:this.options.height/2+"px",whiteSpace:"nowrap"},m?{position:"absolute",top:"0",left:"0",right:"0",zIndex:"2"}:{position:"relative"})});y.setAttribute("part","timeline"),"string"==typeof this.options.style?y.setAttribute("style",y.getAttribute("style")+this.options.style):"object"==typeof this.options.style&&Object.assign(y.style,this.options.style);const b=n("div",{style:{width:"0",height:"50%",display:"flex",flexDirection:"column",justifyContent:m?"flex-start":"flex-end",top:m?"0":"auto",bottom:m?"auto":"0",overflow:"visible",borderLeft:"1px solid currentColor",opacity:`${null!==(a=this.options.secondaryLabelOpacity)&&void 0!==a?a:.25}`,position:"absolute",zIndex:"1"}});for(let t=0,e=0;t{this.un(t,s),this.un(t,e)};return this.on(t,s),s}return()=>this.un(t,e)}un(t,e){var s;null===(s=this.listeners[t])||void 0===s||s.delete(e)}once(t,e){return this.on(t,e,{once:!0})}unAll(){this.listeners={}}emit(t,...e){this.listeners[t]&&this.listeners[t].forEach((t=>t(...e)))}}class e extends t{constructor(t){super(),this.subscriptions=[],this.options=t}onInit(){}_init(t){this.wavesurfer=t,this.onInit()}destroy(){this.emit("destroy"),this.subscriptions.forEach((t=>t()))}}const s={scale:.5,deltaThreshold:5};class i extends e{constructor(t){super(t||{}),this.wrapper=void 0,this.container=null,this.accumulatedDelta=0,this.onWheel=t=>{if(this.wavesurfer&&this.container&&!(Math.abs(t.deltaX)>=Math.abs(t.deltaY))&&(t.preventDefault(),this.accumulatedDelta+=-t.deltaY,0===this.options.deltaThreshold||Math.abs(this.accumulatedDelta)>=this.options.deltaThreshold)){const e=this.wavesurfer.getDuration(),s=this.wavesurfer.options.minPxPerSec,i=t.clientX,r=this.container.clientWidth,n=(this.wavesurfer.getScroll()+i)/s,o=this.calculateNewZoom(s,this.accumulatedDelta),a=r/o*(i/r);o*e{const s=Math.max(0,t+e*this.options.scale);return void 0===this.options.maxZoom?s:Math.min(s,this.options.maxZoom)},this.options=Object.assign({},s,t)}static create(t){return new i(t)}onInit(){var t;this.wrapper=null===(t=this.wavesurfer)||void 0===t?void 0:t.getWrapper(),this.wrapper&&(this.container=this.wrapper.parentElement,this.wrapper.addEventListener("wheel",this.onWheel))}destroy(){this.wrapper&&this.wrapper.removeEventListener("wheel",this.onWheel),super.destroy()}}return i})); 2 | -------------------------------------------------------------------------------- /web/pages/business/finished_product/finished_product_speak.html: -------------------------------------------------------------------------------- 1 | 2 | 71 | 72 | -------------------------------------------------------------------------------- /web/pages/business/long_text_inference/long_text_inference.html: -------------------------------------------------------------------------------- 1 | 2 | 39 | 40 | -------------------------------------------------------------------------------- /web/pages/business/ras_setting.html: -------------------------------------------------------------------------------- 1 | 2 | 70 | 71 | -------------------------------------------------------------------------------- /web/pages/business/reference_audio/reference_audio_split.html: -------------------------------------------------------------------------------- 1 | 2 | 128 | 129 | 142 | -------------------------------------------------------------------------------- /web/pages/business/sound_fusion/sound_fusion_select.html: -------------------------------------------------------------------------------- 1 | 2 | 30 | 31 | -------------------------------------------------------------------------------- /web/pages/business/template.html: -------------------------------------------------------------------------------- 1 | 2 | 30 | 31 | -------------------------------------------------------------------------------- /web/pages/business/tts_correction/tts_correction_create.html: -------------------------------------------------------------------------------- 1 | 2 | 69 | 70 | -------------------------------------------------------------------------------- /web/pages/business/tts_correction/tts_correction_inference.html: -------------------------------------------------------------------------------- 1 | 2 | 30 | 31 | -------------------------------------------------------------------------------- /web/pages/common/common_jquery.js: -------------------------------------------------------------------------------- 1 | const CommonSpace = (function () { 2 | 3 | function customJquery($) { 4 | // 初始化 _requestStates 5 | const _requestStates = {}; 6 | 7 | // 新增一个名为 customAjax 的方法 8 | $.customAjax = function (settings) { 9 | // 获取请求的唯一标识符 10 | const requestKey = settings.url + settings.type + JSON.stringify(settings.data); 11 | 12 | // 检查是否有请求正在进行 13 | if (_requestStates[requestKey]) { 14 | console.log('请求已在进行中,不再重复发送。'); 15 | return; 16 | } 17 | 18 | // 加载层 19 | const loadIndex = layui.layer.load(0); 20 | 21 | // 设置请求状态为进行中 22 | _requestStates[requestKey] = true; 23 | 24 | const complete = settings.complete; 25 | 26 | settings.complete = function (jqXHR, statusText) { 27 | 28 | delete _requestStates[requestKey]; 29 | layui.layer.close(loadIndex) 30 | 31 | if (complete) { 32 | complete(jqXHR, statusText) 33 | } 34 | 35 | } 36 | 37 | // 调用原始的 $.ajax 方法 38 | return $.ajax(settings); 39 | } 40 | } 41 | 42 | 43 | function loadHtml(url, callback) { 44 | // 使用 $.ajax 加载 HTML 文件 45 | $.ajax({ 46 | url: url, // 替换为你的HTML文件路径 47 | type: 'GET', 48 | dataType: 'html', 49 | success: function (data) { 50 | // 成功加载后,将内容追加到文档末尾 51 | $(document.body).append(data); 52 | callback() 53 | }, 54 | error: function (jqXHR, textStatus, errorThrown) { 55 | console.error('加载HTML文件失败:', textStatus, errorThrown); 56 | } 57 | }); 58 | } 59 | 60 | function loadHtmls(urls, callback) { 61 | const unLoads = new Set(urls) 62 | const allUrls = new Set(urls) 63 | allUrls.forEach(url => { 64 | loadHtml(url, function () { 65 | unLoads.delete(url) 66 | if (unLoads.size === 0) { 67 | callback() 68 | } 69 | }) 70 | }) 71 | } 72 | 73 | return { 74 | customJquery: customJquery, 75 | loadHtmls: loadHtmls 76 | } 77 | 78 | } 79 | 80 | ) 81 | () 82 | -------------------------------------------------------------------------------- /web/pages/json/reference_audio.json: -------------------------------------------------------------------------------- 1 | { 2 | "code": 0, 3 | "count": 2, 4 | "msg": "success", 5 | "data": [ 6 | { 7 | "id": 1, 8 | "audioName": "音频名称", 9 | "audioPath": "音频路径", 10 | "content": "音频内容", 11 | "language": "音频语种", 12 | "category": "音频分类", 13 | "audioLength": 10, 14 | "createTime": "2023-03-08T06:05:08.000Z" 15 | }, 16 | { 17 | "id": 2, 18 | "audioName": "音频名称", 19 | "audioPath": "音频路径", 20 | "content": "音频内容", 21 | "language": "音频语种", 22 | "category": "音频分类", 23 | "audioLength": 10, 24 | "createTime": "2023-03-08T06:05:08.000Z" 25 | } 26 | ] 27 | } --------------------------------------------------------------------------------