├── .gitattributes ├── EasierGUI.py ├── GUI.py ├── LICENSE ├── Packages.tar.gz ├── README.md ├── easy-infer.py ├── easy-infer2.py ├── filefinder ├── myinfer.py ├── ngrokabled.zip ├── somegirl.mp3 ├── someguy.mp3 ├── wav2lip-HD.tar.gz └── wav2lip-cache.tar.gz /.gitattributes: -------------------------------------------------------------------------------- 1 | *.tar.gz filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /EasierGUI.py: -------------------------------------------------------------------------------- 1 | import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np 2 | from mega import Mega 3 | os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" 4 | import threading 5 | from time import sleep 6 | from subprocess import Popen 7 | import faiss 8 | from random import shuffle 9 | import json, datetime 10 | 11 | now_dir = os.getcwd() 12 | sys.path.append(now_dir) 13 | tmp = os.path.join(now_dir, "TEMP") 14 | shutil.rmtree(tmp, ignore_errors=True) 15 | shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) 16 | shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) 17 | os.makedirs(tmp, exist_ok=True) 18 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) 19 | os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) 20 | os.environ["TEMP"] = tmp 21 | warnings.filterwarnings("ignore") 22 | torch.manual_seed(114514) 23 | from i18n import I18nAuto 24 | import ffmpeg 25 | from MDXNet import MDXNetDereverb 26 | 27 | i18n = I18nAuto() 28 | i18n.print() 29 | # 判断是否有能用来训练和加速推理的N卡 30 | ngpu = torch.cuda.device_count() 31 | gpu_infos = [] 32 | mem = [] 33 | if (not torch.cuda.is_available()) or ngpu == 0: 34 | if_gpu_ok = False 35 | else: 36 | if_gpu_ok = False 37 | for i in range(ngpu): 38 | gpu_name = torch.cuda.get_device_name(i) 39 | if ( 40 | "10" in gpu_name 41 | or "16" in gpu_name 42 | or "20" in gpu_name 43 | or "30" in gpu_name 44 | or "40" in gpu_name 45 | or "A2" in gpu_name.upper() 46 | or "A3" in gpu_name.upper() 47 | or "A4" in gpu_name.upper() 48 | or "P4" in gpu_name.upper() 49 | or "A50" in gpu_name.upper() 50 | or "A60" in gpu_name.upper() 51 | or "70" in gpu_name 52 | or "80" in gpu_name 53 | or "90" in gpu_name 54 | or "M4" in gpu_name.upper() 55 | or "T4" in gpu_name.upper() 56 | or "TITAN" in gpu_name.upper() 57 | ): # A10#A100#V100#A40#P40#M40#K80#A4500 58 | if_gpu_ok = True # 至少有一张能用的N卡 59 | gpu_infos.append("%s\t%s" % (i, gpu_name)) 60 | mem.append( 61 | int( 62 | torch.cuda.get_device_properties(i).total_memory 63 | / 1024 64 | / 1024 65 | / 1024 66 | + 0.4 67 | ) 68 | ) 69 | if if_gpu_ok == True and len(gpu_infos) > 0: 70 | gpu_info = "\n".join(gpu_infos) 71 | default_batch_size = min(mem) // 2 72 | else: 73 | gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") 74 | default_batch_size = 1 75 | gpus = "-".join([i[0] for i in gpu_infos]) 76 | from infer_pack.models import ( 77 | SynthesizerTrnMs256NSFsid, 78 | SynthesizerTrnMs256NSFsid_nono, 79 | SynthesizerTrnMs768NSFsid, 80 | SynthesizerTrnMs768NSFsid_nono, 81 | ) 82 | import soundfile as sf 83 | from fairseq import checkpoint_utils 84 | import gradio as gr 85 | import logging 86 | from vc_infer_pipeline import VC 87 | from config import Config 88 | from infer_uvr5 import _audio_pre_, _audio_pre_new 89 | from my_utils import load_audio 90 | from train.process_ckpt import show_info, change_info, merge, extract_small_model 91 | 92 | config = Config() 93 | # from trainset_preprocess_pipeline import PreProcess 94 | logging.getLogger("numba").setLevel(logging.WARNING) 95 | 96 | 97 | class ToolButton(gr.Button, gr.components.FormComponent): 98 | """Small button with single emoji as text, fits inside gradio forms""" 99 | 100 | def __init__(self, **kwargs): 101 | super().__init__(variant="tool", **kwargs) 102 | 103 | def get_block_name(self): 104 | return "button" 105 | 106 | 107 | hubert_model = None 108 | 109 | 110 | def load_hubert(): 111 | global hubert_model 112 | models, _, _ = checkpoint_utils.load_model_ensemble_and_task( 113 | ["hubert_base.pt"], 114 | suffix="", 115 | ) 116 | hubert_model = models[0] 117 | hubert_model = hubert_model.to(config.device) 118 | if config.is_half: 119 | hubert_model = hubert_model.half() 120 | else: 121 | hubert_model = hubert_model.float() 122 | hubert_model.eval() 123 | 124 | 125 | weight_root = "weights" 126 | weight_uvr5_root = "uvr5_weights" 127 | index_root = "logs" 128 | names = [] 129 | for name in os.listdir(weight_root): 130 | if name.endswith(".pth"): 131 | names.append(name) 132 | index_paths = [] 133 | for root, dirs, files in os.walk(index_root, topdown=False): 134 | for name in files: 135 | if name.endswith(".index") and "trained" not in name: 136 | index_paths.append("%s/%s" % (root, name)) 137 | uvr5_names = [] 138 | for name in os.listdir(weight_uvr5_root): 139 | if name.endswith(".pth") or "onnx" in name: 140 | uvr5_names.append(name.replace(".pth", "")) 141 | 142 | 143 | def vc_single( 144 | sid, 145 | input_audio_path, 146 | f0_up_key, 147 | f0_file, 148 | f0_method, 149 | file_index, 150 | #file_index2, 151 | # file_big_npy, 152 | index_rate, 153 | filter_radius, 154 | resample_sr, 155 | rms_mix_rate, 156 | protect, 157 | crepe_hop_length, 158 | ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 159 | global tgt_sr, net_g, vc, hubert_model, version 160 | if input_audio_path is None: 161 | return "You need to upload an audio", None 162 | f0_up_key = int(f0_up_key) 163 | try: 164 | audio = load_audio(input_audio_path, 16000) 165 | audio_max = np.abs(audio).max() / 0.95 166 | if audio_max > 1: 167 | audio /= audio_max 168 | times = [0, 0, 0] 169 | if hubert_model == None: 170 | load_hubert() 171 | if_f0 = cpt.get("f0", 1) 172 | file_index = ( 173 | ( 174 | file_index.strip(" ") 175 | .strip('"') 176 | .strip("\n") 177 | .strip('"') 178 | .strip(" ") 179 | .replace("trained", "added") 180 | ) 181 | ) # 防止小白写错,自动帮他替换掉 182 | # file_big_npy = ( 183 | # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 184 | # ) 185 | audio_opt = vc.pipeline( 186 | hubert_model, 187 | net_g, 188 | sid, 189 | audio, 190 | input_audio_path, 191 | times, 192 | f0_up_key, 193 | f0_method, 194 | file_index, 195 | # file_big_npy, 196 | index_rate, 197 | if_f0, 198 | filter_radius, 199 | tgt_sr, 200 | resample_sr, 201 | rms_mix_rate, 202 | version, 203 | protect, 204 | crepe_hop_length, 205 | f0_file=f0_file, 206 | ) 207 | if resample_sr >= 16000 and tgt_sr != resample_sr: 208 | tgt_sr = resample_sr 209 | index_info = ( 210 | "Using index:%s." % file_index 211 | if os.path.exists(file_index) 212 | else "Index not used." 213 | ) 214 | return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( 215 | index_info, 216 | times[0], 217 | times[1], 218 | times[2], 219 | ), (tgt_sr, audio_opt) 220 | except: 221 | info = traceback.format_exc() 222 | print(info) 223 | return info, (None, None) 224 | 225 | 226 | def vc_multi( 227 | sid, 228 | dir_path, 229 | opt_root, 230 | paths, 231 | f0_up_key, 232 | f0_method, 233 | file_index, 234 | file_index2, 235 | # file_big_npy, 236 | index_rate, 237 | filter_radius, 238 | resample_sr, 239 | rms_mix_rate, 240 | protect, 241 | format1, 242 | crepe_hop_length, 243 | ): 244 | try: 245 | dir_path = ( 246 | dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 247 | ) # 防止小白拷路径头尾带了空格和"和回车 248 | opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 249 | os.makedirs(opt_root, exist_ok=True) 250 | try: 251 | if dir_path != "": 252 | paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] 253 | else: 254 | paths = [path.name for path in paths] 255 | except: 256 | traceback.print_exc() 257 | paths = [path.name for path in paths] 258 | infos = [] 259 | for path in paths: 260 | info, opt = vc_single( 261 | sid, 262 | path, 263 | f0_up_key, 264 | None, 265 | f0_method, 266 | file_index, 267 | file_index2, 268 | # file_big_npy, 269 | index_rate, 270 | filter_radius, 271 | resample_sr, 272 | rms_mix_rate, 273 | protect, 274 | crepe_hop_length 275 | ) 276 | if "Success" in info: 277 | try: 278 | tgt_sr, audio_opt = opt 279 | if format1 in ["wav", "flac"]: 280 | sf.write( 281 | "%s/%s.%s" % (opt_root, os.path.basename(path), format1), 282 | audio_opt, 283 | tgt_sr, 284 | ) 285 | else: 286 | path = "%s/%s.wav" % (opt_root, os.path.basename(path)) 287 | sf.write( 288 | path, 289 | audio_opt, 290 | tgt_sr, 291 | ) 292 | if os.path.exists(path): 293 | os.system( 294 | "ffmpeg -i %s -vn %s -q:a 2 -y" 295 | % (path, path[:-4] + ".%s" % format1) 296 | ) 297 | except: 298 | info += traceback.format_exc() 299 | infos.append("%s->%s" % (os.path.basename(path), info)) 300 | yield "\n".join(infos) 301 | yield "\n".join(infos) 302 | except: 303 | yield traceback.format_exc() 304 | 305 | 306 | def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): 307 | infos = [] 308 | try: 309 | inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 310 | save_root_vocal = ( 311 | save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 312 | ) 313 | save_root_ins = ( 314 | save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 315 | ) 316 | if model_name == "onnx_dereverb_By_FoxJoy": 317 | pre_fun = MDXNetDereverb(15) 318 | else: 319 | func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new 320 | pre_fun = func( 321 | agg=int(agg), 322 | model_path=os.path.join(weight_uvr5_root, model_name + ".pth"), 323 | device=config.device, 324 | is_half=config.is_half, 325 | ) 326 | if inp_root != "": 327 | paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)] 328 | else: 329 | paths = [path.name for path in paths] 330 | for path in paths: 331 | inp_path = os.path.join(inp_root, path) 332 | need_reformat = 1 333 | done = 0 334 | try: 335 | info = ffmpeg.probe(inp_path, cmd="ffprobe") 336 | if ( 337 | info["streams"][0]["channels"] == 2 338 | and info["streams"][0]["sample_rate"] == "44100" 339 | ): 340 | need_reformat = 0 341 | pre_fun._path_audio_( 342 | inp_path, save_root_ins, save_root_vocal, format0 343 | ) 344 | done = 1 345 | except: 346 | need_reformat = 1 347 | traceback.print_exc() 348 | if need_reformat == 1: 349 | tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path)) 350 | os.system( 351 | "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" 352 | % (inp_path, tmp_path) 353 | ) 354 | inp_path = tmp_path 355 | try: 356 | if done == 0: 357 | pre_fun._path_audio_( 358 | inp_path, save_root_ins, save_root_vocal, format0 359 | ) 360 | infos.append("%s->Success" % (os.path.basename(inp_path))) 361 | yield "\n".join(infos) 362 | except: 363 | infos.append( 364 | "%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) 365 | ) 366 | yield "\n".join(infos) 367 | except: 368 | infos.append(traceback.format_exc()) 369 | yield "\n".join(infos) 370 | finally: 371 | try: 372 | if model_name == "onnx_dereverb_By_FoxJoy": 373 | del pre_fun.pred.model 374 | del pre_fun.pred.model_ 375 | else: 376 | del pre_fun.model 377 | del pre_fun 378 | except: 379 | traceback.print_exc() 380 | print("clean_empty_cache") 381 | if torch.cuda.is_available(): 382 | torch.cuda.empty_cache() 383 | yield "\n".join(infos) 384 | 385 | 386 | # 一个选项卡全局只能有一个音色 387 | def get_vc(sid): 388 | global n_spk, tgt_sr, net_g, vc, cpt, version 389 | if sid == "" or sid == []: 390 | global hubert_model 391 | if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 392 | print("clean_empty_cache") 393 | del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt 394 | hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None 395 | if torch.cuda.is_available(): 396 | torch.cuda.empty_cache() 397 | ###楼下不这么折腾清理不干净 398 | if_f0 = cpt.get("f0", 1) 399 | version = cpt.get("version", "v1") 400 | if version == "v1": 401 | if if_f0 == 1: 402 | net_g = SynthesizerTrnMs256NSFsid( 403 | *cpt["config"], is_half=config.is_half 404 | ) 405 | else: 406 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 407 | elif version == "v2": 408 | if if_f0 == 1: 409 | net_g = SynthesizerTrnMs768NSFsid( 410 | *cpt["config"], is_half=config.is_half 411 | ) 412 | else: 413 | net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) 414 | del net_g, cpt 415 | if torch.cuda.is_available(): 416 | torch.cuda.empty_cache() 417 | cpt = None 418 | return {"visible": False, "__type__": "update"} 419 | person = "%s/%s" % (weight_root, sid) 420 | print("loading %s" % person) 421 | cpt = torch.load(person, map_location="cpu") 422 | tgt_sr = cpt["config"][-1] 423 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk 424 | if_f0 = cpt.get("f0", 1) 425 | version = cpt.get("version", "v1") 426 | if version == "v1": 427 | if if_f0 == 1: 428 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) 429 | else: 430 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 431 | elif version == "v2": 432 | if if_f0 == 1: 433 | net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) 434 | else: 435 | net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) 436 | del net_g.enc_q 437 | print(net_g.load_state_dict(cpt["weight"], strict=False)) 438 | net_g.eval().to(config.device) 439 | if config.is_half: 440 | net_g = net_g.half() 441 | else: 442 | net_g = net_g.float() 443 | vc = VC(tgt_sr, config) 444 | n_spk = cpt["config"][-3] 445 | return {"visible": False, "maximum": n_spk, "__type__": "update"} 446 | 447 | 448 | def change_choices(): 449 | names = [] 450 | for name in os.listdir(weight_root): 451 | if name.endswith(".pth"): 452 | names.append(name) 453 | index_paths = [] 454 | for root, dirs, files in os.walk(index_root, topdown=False): 455 | for name in files: 456 | if name.endswith(".index") and "trained" not in name: 457 | index_paths.append("%s/%s" % (root, name)) 458 | return {"choices": sorted(names), "__type__": "update"}, { 459 | "choices": sorted(index_paths), 460 | "__type__": "update", 461 | } 462 | 463 | 464 | def clean(): 465 | return {"value": "", "__type__": "update"} 466 | 467 | 468 | sr_dict = { 469 | "32k": 32000, 470 | "40k": 40000, 471 | "48k": 48000, 472 | } 473 | 474 | 475 | def if_done(done, p): 476 | while 1: 477 | if p.poll() == None: 478 | sleep(0.5) 479 | else: 480 | break 481 | done[0] = True 482 | 483 | 484 | def if_done_multi(done, ps): 485 | while 1: 486 | # poll==None代表进程未结束 487 | # 只要有一个进程未结束都不停 488 | flag = 1 489 | for p in ps: 490 | if p.poll() == None: 491 | flag = 0 492 | sleep(0.5) 493 | break 494 | if flag == 1: 495 | break 496 | done[0] = True 497 | 498 | 499 | def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): 500 | sr = sr_dict[sr] 501 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) 502 | f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") 503 | f.close() 504 | cmd = ( 505 | config.python_cmd 506 | + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s " 507 | % (trainset_dir, sr, n_p, now_dir, exp_dir) 508 | + str(config.noparallel) 509 | ) 510 | print(cmd) 511 | p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir 512 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 513 | done = [False] 514 | threading.Thread( 515 | target=if_done, 516 | args=( 517 | done, 518 | p, 519 | ), 520 | ).start() 521 | while 1: 522 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: 523 | yield (f.read()) 524 | sleep(1) 525 | if done[0] == True: 526 | break 527 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: 528 | log = f.read() 529 | print(log) 530 | yield log 531 | 532 | 533 | # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) 534 | def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl): 535 | gpus = gpus.split("-") 536 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) 537 | f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") 538 | f.close() 539 | if if_f0: 540 | cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s %s" % ( 541 | now_dir, 542 | exp_dir, 543 | n_p, 544 | f0method, 545 | echl, 546 | ) 547 | print(cmd) 548 | p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE 549 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 550 | done = [False] 551 | threading.Thread( 552 | target=if_done, 553 | args=( 554 | done, 555 | p, 556 | ), 557 | ).start() 558 | while 1: 559 | with open( 560 | "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" 561 | ) as f: 562 | yield (f.read()) 563 | sleep(1) 564 | if done[0] == True: 565 | break 566 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: 567 | log = f.read() 568 | print(log) 569 | yield log 570 | ####对不同part分别开多进程 571 | """ 572 | n_part=int(sys.argv[1]) 573 | i_part=int(sys.argv[2]) 574 | i_gpu=sys.argv[3] 575 | exp_dir=sys.argv[4] 576 | os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) 577 | """ 578 | leng = len(gpus) 579 | ps = [] 580 | for idx, n_g in enumerate(gpus): 581 | cmd = ( 582 | config.python_cmd 583 | + " extract_feature_print.py %s %s %s %s %s/logs/%s %s" 584 | % ( 585 | config.device, 586 | leng, 587 | idx, 588 | n_g, 589 | now_dir, 590 | exp_dir, 591 | version19, 592 | ) 593 | ) 594 | print(cmd) 595 | p = Popen( 596 | cmd, shell=True, cwd=now_dir 597 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir 598 | ps.append(p) 599 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 600 | done = [False] 601 | threading.Thread( 602 | target=if_done_multi, 603 | args=( 604 | done, 605 | ps, 606 | ), 607 | ).start() 608 | while 1: 609 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: 610 | yield (f.read()) 611 | sleep(1) 612 | if done[0] == True: 613 | break 614 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: 615 | log = f.read() 616 | print(log) 617 | yield log 618 | 619 | 620 | def change_sr2(sr2, if_f0_3, version19): 621 | path_str = "" if version19 == "v1" else "_v2" 622 | f0_str = "f0" if if_f0_3 else "" 623 | if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK) 624 | if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK) 625 | if (if_pretrained_generator_exist == False): 626 | print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") 627 | if (if_pretrained_discriminator_exist == False): 628 | print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") 629 | return ( 630 | ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "", 631 | ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "", 632 | {"visible": True, "__type__": "update"} 633 | ) 634 | 635 | def change_version19(sr2, if_f0_3, version19): 636 | path_str = "" if version19 == "v1" else "_v2" 637 | f0_str = "f0" if if_f0_3 else "" 638 | if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK) 639 | if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK) 640 | if (if_pretrained_generator_exist == False): 641 | print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") 642 | if (if_pretrained_discriminator_exist == False): 643 | print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") 644 | return ( 645 | ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "", 646 | ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "", 647 | ) 648 | 649 | 650 | def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 651 | path_str = "" if version19 == "v1" else "_v2" 652 | if_pretrained_generator_exist = os.access("pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK) 653 | if_pretrained_discriminator_exist = os.access("pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK) 654 | if (if_pretrained_generator_exist == False): 655 | print("pretrained%s/f0G%s.pth" % (path_str, sr2), "not exist, will not use pretrained model") 656 | if (if_pretrained_discriminator_exist == False): 657 | print("pretrained%s/f0D%s.pth" % (path_str, sr2), "not exist, will not use pretrained model") 658 | if if_f0_3: 659 | return ( 660 | {"visible": True, "__type__": "update"}, 661 | "pretrained%s/f0G%s.pth" % (path_str, sr2) if if_pretrained_generator_exist else "", 662 | "pretrained%s/f0D%s.pth" % (path_str, sr2) if if_pretrained_discriminator_exist else "", 663 | ) 664 | return ( 665 | {"visible": False, "__type__": "update"}, 666 | ("pretrained%s/G%s.pth" % (path_str, sr2)) if if_pretrained_generator_exist else "", 667 | ("pretrained%s/D%s.pth" % (path_str, sr2)) if if_pretrained_discriminator_exist else "", 668 | ) 669 | 670 | 671 | # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16]) 672 | def click_train( 673 | exp_dir1, 674 | sr2, 675 | if_f0_3, 676 | spk_id5, 677 | save_epoch10, 678 | total_epoch11, 679 | batch_size12, 680 | if_save_latest13, 681 | pretrained_G14, 682 | pretrained_D15, 683 | gpus16, 684 | if_cache_gpu17, 685 | if_save_every_weights18, 686 | version19, 687 | ): 688 | # 生成filelist 689 | exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) 690 | os.makedirs(exp_dir, exist_ok=True) 691 | gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) 692 | feature_dir = ( 693 | "%s/3_feature256" % (exp_dir) 694 | if version19 == "v1" 695 | else "%s/3_feature768" % (exp_dir) 696 | ) 697 | if if_f0_3: 698 | f0_dir = "%s/2a_f0" % (exp_dir) 699 | f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) 700 | names = ( 701 | set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) 702 | & set([name.split(".")[0] for name in os.listdir(feature_dir)]) 703 | & set([name.split(".")[0] for name in os.listdir(f0_dir)]) 704 | & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) 705 | ) 706 | else: 707 | names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( 708 | [name.split(".")[0] for name in os.listdir(feature_dir)] 709 | ) 710 | opt = [] 711 | for name in names: 712 | if if_f0_3: 713 | opt.append( 714 | "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" 715 | % ( 716 | gt_wavs_dir.replace("\\", "\\\\"), 717 | name, 718 | feature_dir.replace("\\", "\\\\"), 719 | name, 720 | f0_dir.replace("\\", "\\\\"), 721 | name, 722 | f0nsf_dir.replace("\\", "\\\\"), 723 | name, 724 | spk_id5, 725 | ) 726 | ) 727 | else: 728 | opt.append( 729 | "%s/%s.wav|%s/%s.npy|%s" 730 | % ( 731 | gt_wavs_dir.replace("\\", "\\\\"), 732 | name, 733 | feature_dir.replace("\\", "\\\\"), 734 | name, 735 | spk_id5, 736 | ) 737 | ) 738 | fea_dim = 256 if version19 == "v1" else 768 739 | if if_f0_3: 740 | for _ in range(2): 741 | opt.append( 742 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" 743 | % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) 744 | ) 745 | else: 746 | for _ in range(2): 747 | opt.append( 748 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" 749 | % (now_dir, sr2, now_dir, fea_dim, spk_id5) 750 | ) 751 | shuffle(opt) 752 | with open("%s/filelist.txt" % exp_dir, "w") as f: 753 | f.write("\n".join(opt)) 754 | print("write filelist done") 755 | # 生成config#无需生成config 756 | # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" 757 | print("use gpus:", gpus16) 758 | if pretrained_G14 == "": 759 | print("no pretrained Generator") 760 | if pretrained_D15 == "": 761 | print("no pretrained Discriminator") 762 | if gpus16: 763 | cmd = ( 764 | config.python_cmd 765 | + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" 766 | % ( 767 | exp_dir1, 768 | sr2, 769 | 1 if if_f0_3 else 0, 770 | batch_size12, 771 | gpus16, 772 | total_epoch11, 773 | save_epoch10, 774 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "", 775 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "", 776 | 1 if if_save_latest13 == i18n("是") else 0, 777 | 1 if if_cache_gpu17 == i18n("是") else 0, 778 | 1 if if_save_every_weights18 == i18n("是") else 0, 779 | version19, 780 | ) 781 | ) 782 | else: 783 | cmd = ( 784 | config.python_cmd 785 | + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" 786 | % ( 787 | exp_dir1, 788 | sr2, 789 | 1 if if_f0_3 else 0, 790 | batch_size12, 791 | total_epoch11, 792 | save_epoch10, 793 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "\b", 794 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "\b", 795 | 1 if if_save_latest13 == i18n("是") else 0, 796 | 1 if if_cache_gpu17 == i18n("是") else 0, 797 | 1 if if_save_every_weights18 == i18n("是") else 0, 798 | version19, 799 | ) 800 | ) 801 | print(cmd) 802 | p = Popen(cmd, shell=True, cwd=now_dir) 803 | p.wait() 804 | return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log" 805 | 806 | 807 | # but4.click(train_index, [exp_dir1], info3) 808 | def train_index(exp_dir1, version19): 809 | exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) 810 | os.makedirs(exp_dir, exist_ok=True) 811 | feature_dir = ( 812 | "%s/3_feature256" % (exp_dir) 813 | if version19 == "v1" 814 | else "%s/3_feature768" % (exp_dir) 815 | ) 816 | if os.path.exists(feature_dir) == False: 817 | return "请先进行特征提取!" 818 | listdir_res = list(os.listdir(feature_dir)) 819 | if len(listdir_res) == 0: 820 | return "请先进行特征提取!" 821 | npys = [] 822 | for name in sorted(listdir_res): 823 | phone = np.load("%s/%s" % (feature_dir, name)) 824 | npys.append(phone) 825 | big_npy = np.concatenate(npys, 0) 826 | big_npy_idx = np.arange(big_npy.shape[0]) 827 | np.random.shuffle(big_npy_idx) 828 | big_npy = big_npy[big_npy_idx] 829 | np.save("%s/total_fea.npy" % exp_dir, big_npy) 830 | # n_ivf = big_npy.shape[0] // 39 831 | n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) 832 | infos = [] 833 | infos.append("%s,%s" % (big_npy.shape, n_ivf)) 834 | yield "\n".join(infos) 835 | index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) 836 | # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) 837 | infos.append("training") 838 | yield "\n".join(infos) 839 | index_ivf = faiss.extract_index_ivf(index) # 840 | index_ivf.nprobe = 1 841 | index.train(big_npy) 842 | faiss.write_index( 843 | index, 844 | "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" 845 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), 846 | ) 847 | # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) 848 | infos.append("adding") 849 | yield "\n".join(infos) 850 | batch_size_add = 8192 851 | for i in range(0, big_npy.shape[0], batch_size_add): 852 | index.add(big_npy[i : i + batch_size_add]) 853 | faiss.write_index( 854 | index, 855 | "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" 856 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), 857 | ) 858 | infos.append( 859 | "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index" 860 | % (n_ivf, index_ivf.nprobe, exp_dir1, version19) 861 | ) 862 | # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) 863 | # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) 864 | yield "\n".join(infos) 865 | 866 | 867 | # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3) 868 | def train1key( 869 | exp_dir1, 870 | sr2, 871 | if_f0_3, 872 | trainset_dir4, 873 | spk_id5, 874 | np7, 875 | f0method8, 876 | save_epoch10, 877 | total_epoch11, 878 | batch_size12, 879 | if_save_latest13, 880 | pretrained_G14, 881 | pretrained_D15, 882 | gpus16, 883 | if_cache_gpu17, 884 | if_save_every_weights18, 885 | version19, 886 | echl 887 | ): 888 | infos = [] 889 | 890 | def get_info_str(strr): 891 | infos.append(strr) 892 | return "\n".join(infos) 893 | 894 | model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1) 895 | preprocess_log_path = "%s/preprocess.log" % model_log_dir 896 | extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir 897 | gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir 898 | feature_dir = ( 899 | "%s/3_feature256" % model_log_dir 900 | if version19 == "v1" 901 | else "%s/3_feature768" % model_log_dir 902 | ) 903 | 904 | os.makedirs(model_log_dir, exist_ok=True) 905 | #########step1:处理数据 906 | open(preprocess_log_path, "w").close() 907 | cmd = ( 908 | config.python_cmd 909 | + " trainset_preprocess_pipeline_print.py %s %s %s %s " 910 | % (trainset_dir4, sr_dict[sr2], np7, model_log_dir) 911 | + str(config.noparallel) 912 | ) 913 | yield get_info_str(i18n("step1:正在处理数据")) 914 | yield get_info_str(cmd) 915 | p = Popen(cmd, shell=True) 916 | p.wait() 917 | with open(preprocess_log_path, "r") as f: 918 | print(f.read()) 919 | #########step2a:提取音高 920 | open(extract_f0_feature_log_path, "w") 921 | if if_f0_3: 922 | yield get_info_str("step2a:正在提取音高") 923 | cmd = config.python_cmd + " extract_f0_print.py %s %s %s %s" % ( 924 | model_log_dir, 925 | np7, 926 | f0method8, 927 | echl 928 | ) 929 | yield get_info_str(cmd) 930 | p = Popen(cmd, shell=True, cwd=now_dir) 931 | p.wait() 932 | with open(extract_f0_feature_log_path, "r") as f: 933 | print(f.read()) 934 | else: 935 | yield get_info_str(i18n("step2a:无需提取音高")) 936 | #######step2b:提取特征 937 | yield get_info_str(i18n("step2b:正在提取特征")) 938 | gpus = gpus16.split("-") 939 | leng = len(gpus) 940 | ps = [] 941 | for idx, n_g in enumerate(gpus): 942 | cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % ( 943 | config.device, 944 | leng, 945 | idx, 946 | n_g, 947 | model_log_dir, 948 | version19, 949 | ) 950 | yield get_info_str(cmd) 951 | p = Popen( 952 | cmd, shell=True, cwd=now_dir 953 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir 954 | ps.append(p) 955 | for p in ps: 956 | p.wait() 957 | with open(extract_f0_feature_log_path, "r") as f: 958 | print(f.read()) 959 | #######step3a:训练模型 960 | yield get_info_str(i18n("step3a:正在训练模型")) 961 | # 生成filelist 962 | if if_f0_3: 963 | f0_dir = "%s/2a_f0" % model_log_dir 964 | f0nsf_dir = "%s/2b-f0nsf" % model_log_dir 965 | names = ( 966 | set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) 967 | & set([name.split(".")[0] for name in os.listdir(feature_dir)]) 968 | & set([name.split(".")[0] for name in os.listdir(f0_dir)]) 969 | & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) 970 | ) 971 | else: 972 | names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( 973 | [name.split(".")[0] for name in os.listdir(feature_dir)] 974 | ) 975 | opt = [] 976 | for name in names: 977 | if if_f0_3: 978 | opt.append( 979 | "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" 980 | % ( 981 | gt_wavs_dir.replace("\\", "\\\\"), 982 | name, 983 | feature_dir.replace("\\", "\\\\"), 984 | name, 985 | f0_dir.replace("\\", "\\\\"), 986 | name, 987 | f0nsf_dir.replace("\\", "\\\\"), 988 | name, 989 | spk_id5, 990 | ) 991 | ) 992 | else: 993 | opt.append( 994 | "%s/%s.wav|%s/%s.npy|%s" 995 | % ( 996 | gt_wavs_dir.replace("\\", "\\\\"), 997 | name, 998 | feature_dir.replace("\\", "\\\\"), 999 | name, 1000 | spk_id5, 1001 | ) 1002 | ) 1003 | fea_dim = 256 if version19 == "v1" else 768 1004 | if if_f0_3: 1005 | for _ in range(2): 1006 | opt.append( 1007 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" 1008 | % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) 1009 | ) 1010 | else: 1011 | for _ in range(2): 1012 | opt.append( 1013 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" 1014 | % (now_dir, sr2, now_dir, fea_dim, spk_id5) 1015 | ) 1016 | shuffle(opt) 1017 | with open("%s/filelist.txt" % model_log_dir, "w") as f: 1018 | f.write("\n".join(opt)) 1019 | yield get_info_str("write filelist done") 1020 | if gpus16: 1021 | cmd = ( 1022 | config.python_cmd 1023 | +" train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" 1024 | % ( 1025 | exp_dir1, 1026 | sr2, 1027 | 1 if if_f0_3 else 0, 1028 | batch_size12, 1029 | gpus16, 1030 | total_epoch11, 1031 | save_epoch10, 1032 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "", 1033 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "", 1034 | 1 if if_save_latest13 == i18n("是") else 0, 1035 | 1 if if_cache_gpu17 == i18n("是") else 0, 1036 | 1 if if_save_every_weights18 == i18n("是") else 0, 1037 | version19, 1038 | ) 1039 | ) 1040 | else: 1041 | cmd = ( 1042 | config.python_cmd 1043 | + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" 1044 | % ( 1045 | exp_dir1, 1046 | sr2, 1047 | 1 if if_f0_3 else 0, 1048 | batch_size12, 1049 | total_epoch11, 1050 | save_epoch10, 1051 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "", 1052 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "", 1053 | 1 if if_save_latest13 == i18n("是") else 0, 1054 | 1 if if_cache_gpu17 == i18n("是") else 0, 1055 | 1 if if_save_every_weights18 == i18n("是") else 0, 1056 | version19, 1057 | ) 1058 | ) 1059 | yield get_info_str(cmd) 1060 | p = Popen(cmd, shell=True, cwd=now_dir) 1061 | p.wait() 1062 | yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) 1063 | #######step3b:训练索引 1064 | npys = [] 1065 | listdir_res = list(os.listdir(feature_dir)) 1066 | for name in sorted(listdir_res): 1067 | phone = np.load("%s/%s" % (feature_dir, name)) 1068 | npys.append(phone) 1069 | big_npy = np.concatenate(npys, 0) 1070 | 1071 | big_npy_idx = np.arange(big_npy.shape[0]) 1072 | np.random.shuffle(big_npy_idx) 1073 | big_npy = big_npy[big_npy_idx] 1074 | np.save("%s/total_fea.npy" % model_log_dir, big_npy) 1075 | 1076 | # n_ivf = big_npy.shape[0] // 39 1077 | n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) 1078 | yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) 1079 | index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) 1080 | yield get_info_str("training index") 1081 | index_ivf = faiss.extract_index_ivf(index) # 1082 | index_ivf.nprobe = 1 1083 | index.train(big_npy) 1084 | faiss.write_index( 1085 | index, 1086 | "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" 1087 | % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), 1088 | ) 1089 | yield get_info_str("adding index") 1090 | batch_size_add = 8192 1091 | for i in range(0, big_npy.shape[0], batch_size_add): 1092 | index.add(big_npy[i : i + batch_size_add]) 1093 | faiss.write_index( 1094 | index, 1095 | "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" 1096 | % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), 1097 | ) 1098 | yield get_info_str( 1099 | "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index" 1100 | % (n_ivf, index_ivf.nprobe, exp_dir1, version19) 1101 | ) 1102 | yield get_info_str(i18n("全流程结束!")) 1103 | 1104 | 1105 | # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__]) 1106 | def change_info_(ckpt_path): 1107 | if ( 1108 | os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")) 1109 | == False 1110 | ): 1111 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} 1112 | try: 1113 | with open( 1114 | ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" 1115 | ) as f: 1116 | info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) 1117 | sr, f0 = info["sample_rate"], info["if_f0"] 1118 | version = "v2" if ("version" in info and info["version"] == "v2") else "v1" 1119 | return sr, str(f0), version 1120 | except: 1121 | traceback.print_exc() 1122 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} 1123 | 1124 | 1125 | from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM 1126 | 1127 | 1128 | def export_onnx(ModelPath, ExportedPath, MoeVS=True): 1129 | cpt = torch.load(ModelPath, map_location="cpu") 1130 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk 1131 | hidden_channels = 256 if cpt.get("version","v1")=="v1"else 768#cpt["config"][-2] # hidden_channels,为768Vec做准备 1132 | 1133 | test_phone = torch.rand(1, 200, hidden_channels) # hidden unit 1134 | test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用) 1135 | test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹) 1136 | test_pitchf = torch.rand(1, 200) # nsf基频 1137 | test_ds = torch.LongTensor([0]) # 说话人ID 1138 | test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子) 1139 | 1140 | device = "cpu" # 导出时设备(不影响使用模型) 1141 | 1142 | 1143 | net_g = SynthesizerTrnMsNSFsidM( 1144 | *cpt["config"], is_half=False,version=cpt.get("version","v1") 1145 | ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) 1146 | net_g.load_state_dict(cpt["weight"], strict=False) 1147 | input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] 1148 | output_names = [ 1149 | "audio", 1150 | ] 1151 | # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出 1152 | torch.onnx.export( 1153 | net_g, 1154 | ( 1155 | test_phone.to(device), 1156 | test_phone_lengths.to(device), 1157 | test_pitch.to(device), 1158 | test_pitchf.to(device), 1159 | test_ds.to(device), 1160 | test_rnd.to(device), 1161 | ), 1162 | ExportedPath, 1163 | dynamic_axes={ 1164 | "phone": [1], 1165 | "pitch": [1], 1166 | "pitchf": [1], 1167 | "rnd": [2], 1168 | }, 1169 | do_constant_folding=False, 1170 | opset_version=16, 1171 | verbose=False, 1172 | input_names=input_names, 1173 | output_names=output_names, 1174 | ) 1175 | return "Finished" 1176 | 1177 | 1178 | #region Mangio-RVC-Fork CLI App 1179 | import re as regex 1180 | import scipy.io.wavfile as wavfile 1181 | 1182 | cli_current_page = "HOME" 1183 | 1184 | def cli_split_command(com): 1185 | exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)' 1186 | split_array = regex.findall(exp, com) 1187 | split_array = [group[0] if group[0] else group[1] for group in split_array] 1188 | return split_array 1189 | 1190 | def execute_generator_function(genObject): 1191 | for _ in genObject: pass 1192 | 1193 | def cli_infer(com): 1194 | # get VC first 1195 | com = cli_split_command(com) 1196 | model_name = com[0] 1197 | source_audio_path = com[1] 1198 | output_file_name = com[2] 1199 | feature_index_path = com[3] 1200 | f0_file = None # Not Implemented Yet 1201 | 1202 | # Get parameters for inference 1203 | speaker_id = int(com[4]) 1204 | transposition = float(com[5]) 1205 | f0_method = com[6] 1206 | crepe_hop_length = int(com[7]) 1207 | harvest_median_filter = int(com[8]) 1208 | resample = int(com[9]) 1209 | mix = float(com[10]) 1210 | feature_ratio = float(com[11]) 1211 | protection_amnt = float(com[12]) 1212 | 1213 | print("Mangio-RVC-Fork Infer-CLI: Starting the inference...") 1214 | vc_data = get_vc(model_name) 1215 | print(vc_data) 1216 | print("Mangio-RVC-Fork Infer-CLI: Performing inference...") 1217 | conversion_data = vc_single( 1218 | speaker_id, 1219 | source_audio_path, 1220 | transposition, 1221 | f0_file, 1222 | f0_method, 1223 | feature_index_path, 1224 | #feature_index_path, 1225 | feature_ratio, 1226 | harvest_median_filter, 1227 | resample, 1228 | mix, 1229 | protection_amnt, 1230 | crepe_hop_length, 1231 | ) 1232 | if "Success." in conversion_data[0]: 1233 | print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name)) 1234 | wavfile.write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1]) 1235 | print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name)) 1236 | else: 1237 | print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ") 1238 | print(conversion_data[0]) 1239 | 1240 | def cli_pre_process(com): 1241 | com = cli_split_command(com) 1242 | model_name = com[0] 1243 | trainset_directory = com[1] 1244 | sample_rate = com[2] 1245 | num_processes = int(com[3]) 1246 | 1247 | print("Mangio-RVC-Fork Pre-process: Starting...") 1248 | generator = preprocess_dataset( 1249 | trainset_directory, 1250 | model_name, 1251 | sample_rate, 1252 | num_processes 1253 | ) 1254 | execute_generator_function(generator) 1255 | print("Mangio-RVC-Fork Pre-process: Finished") 1256 | 1257 | def cli_extract_feature(com): 1258 | com = cli_split_command(com) 1259 | model_name = com[0] 1260 | gpus = com[1] 1261 | num_processes = int(com[2]) 1262 | has_pitch_guidance = True if (int(com[3]) == 1) else False 1263 | f0_method = com[4] 1264 | crepe_hop_length = int(com[5]) 1265 | version = com[6] # v1 or v2 1266 | 1267 | print("Mangio-RVC-CLI: Extract Feature Has Pitch: " + str(has_pitch_guidance)) 1268 | print("Mangio-RVC-CLI: Extract Feature Version: " + str(version)) 1269 | print("Mangio-RVC-Fork Feature Extraction: Starting...") 1270 | generator = extract_f0_feature( 1271 | gpus, 1272 | num_processes, 1273 | f0_method, 1274 | has_pitch_guidance, 1275 | model_name, 1276 | version, 1277 | crepe_hop_length 1278 | ) 1279 | execute_generator_function(generator) 1280 | print("Mangio-RVC-Fork Feature Extraction: Finished") 1281 | 1282 | def cli_train(com): 1283 | com = cli_split_command(com) 1284 | model_name = com[0] 1285 | sample_rate = com[1] 1286 | has_pitch_guidance = True if (int(com[2]) == 1) else False 1287 | speaker_id = int(com[3]) 1288 | save_epoch_iteration = int(com[4]) 1289 | total_epoch = int(com[5]) # 10000 1290 | batch_size = int(com[6]) 1291 | gpu_card_slot_numbers = com[7] 1292 | if_save_latest = i18n("是") if (int(com[8]) == 1) else i18n("否") 1293 | if_cache_gpu = i18n("是") if (int(com[9]) == 1) else i18n("否") 1294 | if_save_every_weight = i18n("是") if (int(com[10]) == 1) else i18n("否") 1295 | version = com[11] 1296 | 1297 | pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/" 1298 | 1299 | g_pretrained_path = "%sf0G%s.pth" % (pretrained_base, sample_rate) 1300 | d_pretrained_path = "%sf0D%s.pth" % (pretrained_base, sample_rate) 1301 | 1302 | print("Mangio-RVC-Fork Train-CLI: Training...") 1303 | click_train( 1304 | model_name, 1305 | sample_rate, 1306 | has_pitch_guidance, 1307 | speaker_id, 1308 | save_epoch_iteration, 1309 | total_epoch, 1310 | batch_size, 1311 | if_save_latest, 1312 | g_pretrained_path, 1313 | d_pretrained_path, 1314 | gpu_card_slot_numbers, 1315 | if_cache_gpu, 1316 | if_save_every_weight, 1317 | version 1318 | ) 1319 | 1320 | def cli_train_feature(com): 1321 | com = cli_split_command(com) 1322 | model_name = com[0] 1323 | version = com[1] 1324 | print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait") 1325 | generator = train_index( 1326 | model_name, 1327 | version 1328 | ) 1329 | execute_generator_function(generator) 1330 | print("Mangio-RVC-Fork Train Feature Index-CLI: Done!") 1331 | 1332 | def cli_extract_model(com): 1333 | com = cli_split_command(com) 1334 | model_path = com[0] 1335 | save_name = com[1] 1336 | sample_rate = com[2] 1337 | has_pitch_guidance = com[3] 1338 | info = com[4] 1339 | version = com[5] 1340 | extract_small_model_process = extract_small_model( 1341 | model_path, 1342 | save_name, 1343 | sample_rate, 1344 | has_pitch_guidance, 1345 | info, 1346 | version 1347 | ) 1348 | if extract_small_model_process == "Success.": 1349 | print("Mangio-RVC-Fork Extract Small Model: Success!") 1350 | else: 1351 | print(str(extract_small_model_process)) 1352 | print("Mangio-RVC-Fork Extract Small Model: Failed!") 1353 | 1354 | def print_page_details(): 1355 | if cli_current_page == "HOME": 1356 | print(" go home : Takes you back to home with a navigation list.") 1357 | print(" go infer : Takes you to inference command execution.\n") 1358 | print(" go pre-process : Takes you to training step.1) pre-process command execution.") 1359 | print(" go extract-feature : Takes you to training step.2) extract-feature command execution.") 1360 | print(" go train : Takes you to training step.3) being or continue training command execution.") 1361 | print(" go train-feature : Takes you to the train feature index command execution.\n") 1362 | print(" go extract-model : Takes you to the extract small model command execution.") 1363 | elif cli_current_page == "INFER": 1364 | print(" arg 1) model name with .pth in ./weights: mi-test.pth") 1365 | print(" arg 2) source audio path: myFolder\\MySource.wav") 1366 | print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav") 1367 | print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index") 1368 | print(" arg 5) speaker id: 0") 1369 | print(" arg 6) transposition: 0") 1370 | print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)") 1371 | print(" arg 8) crepe hop length: 160") 1372 | print(" arg 9) harvest median filter radius: 3 (0-7)") 1373 | print(" arg 10) post resample rate: 0") 1374 | print(" arg 11) mix volume envelope: 1") 1375 | print(" arg 12) feature index ratio: 0.78 (0-1)") 1376 | print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n") 1377 | print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33") 1378 | elif cli_current_page == "PRE-PROCESS": 1379 | print(" arg 1) Model folder name in ./logs: mi-test") 1380 | print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set") 1381 | print(" arg 3) Sample rate: 40k (32k, 40k, 48k)") 1382 | print(" arg 4) Number of CPU threads to use: 8 \n") 1383 | print("Example: mi-test mydataset 40k 24") 1384 | elif cli_current_page == "EXTRACT-FEATURE": 1385 | print(" arg 1) Model folder name in ./logs: mi-test") 1386 | print(" arg 2) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") 1387 | print(" arg 3) Number of CPU threads to use: 8") 1388 | print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") 1389 | print(" arg 5) f0 Method: harvest (pm, harvest, dio, crepe)") 1390 | print(" arg 6) Crepe hop length: 128") 1391 | print(" arg 7) Version for pre-trained models: v2 (use either v1 or v2)\n") 1392 | print("Example: mi-test 0 24 1 harvest 128 v2") 1393 | elif cli_current_page == "TRAIN": 1394 | print(" arg 1) Model folder name in ./logs: mi-test") 1395 | print(" arg 2) Sample rate: 40k (32k, 40k, 48k)") 1396 | print(" arg 3) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") 1397 | print(" arg 4) speaker id: 0") 1398 | print(" arg 5) Save epoch iteration: 50") 1399 | print(" arg 6) Total epochs: 10000") 1400 | print(" arg 7) Batch size: 8") 1401 | print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") 1402 | print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)") 1403 | print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)") 1404 | print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)") 1405 | print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n") 1406 | print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2") 1407 | elif cli_current_page == "TRAIN-FEATURE": 1408 | print(" arg 1) Model folder name in ./logs: mi-test") 1409 | print(" arg 2) Model architecture version: v2 (use either v1 or v2)\n") 1410 | print("Example: mi-test v2") 1411 | elif cli_current_page == "EXTRACT-MODEL": 1412 | print(" arg 1) Model Path: logs/mi-test/G_168000.pth") 1413 | print(" arg 2) Model save name: MyModel") 1414 | print(" arg 3) Sample rate: 40k (32k, 40k, 48k)") 1415 | print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") 1416 | print(' arg 5) Model information: "My Model"') 1417 | print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n") 1418 | print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2') 1419 | print("") 1420 | 1421 | def change_page(page): 1422 | global cli_current_page 1423 | cli_current_page = page 1424 | return 0 1425 | 1426 | def execute_command(com): 1427 | if com == "go home": 1428 | return change_page("HOME") 1429 | elif com == "go infer": 1430 | return change_page("INFER") 1431 | elif com == "go pre-process": 1432 | return change_page("PRE-PROCESS") 1433 | elif com == "go extract-feature": 1434 | return change_page("EXTRACT-FEATURE") 1435 | elif com == "go train": 1436 | return change_page("TRAIN") 1437 | elif com == "go train-feature": 1438 | return change_page("TRAIN-FEATURE") 1439 | elif com == "go extract-model": 1440 | return change_page("EXTRACT-MODEL") 1441 | else: 1442 | if com[:3] == "go ": 1443 | print("page '%s' does not exist!" % com[3:]) 1444 | return 0 1445 | 1446 | if cli_current_page == "INFER": 1447 | cli_infer(com) 1448 | elif cli_current_page == "PRE-PROCESS": 1449 | cli_pre_process(com) 1450 | elif cli_current_page == "EXTRACT-FEATURE": 1451 | cli_extract_feature(com) 1452 | elif cli_current_page == "TRAIN": 1453 | cli_train(com) 1454 | elif cli_current_page == "TRAIN-FEATURE": 1455 | cli_train_feature(com) 1456 | elif cli_current_page == "EXTRACT-MODEL": 1457 | cli_extract_model(com) 1458 | 1459 | def cli_navigation_loop(): 1460 | while True: 1461 | print("You are currently in '%s':" % cli_current_page) 1462 | print_page_details() 1463 | command = input("%s: " % cli_current_page) 1464 | try: 1465 | execute_command(command) 1466 | except: 1467 | print(traceback.format_exc()) 1468 | 1469 | if(config.is_cli): 1470 | print("\n\nMangio-RVC-Fork v2 CLI App!\n") 1471 | print("Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n") 1472 | cli_navigation_loop() 1473 | 1474 | #endregion 1475 | 1476 | #region RVC WebUI App 1477 | 1478 | def get_presets(): 1479 | data = None 1480 | with open('../inference-presets.json', 'r') as file: 1481 | data = json.load(file) 1482 | preset_names = [] 1483 | for preset in data['presets']: 1484 | preset_names.append(preset['name']) 1485 | 1486 | return preset_names 1487 | 1488 | def change_choices2(): 1489 | audio_files=[] 1490 | for filename in os.listdir("./audios"): 1491 | if filename.endswith(('.wav','.mp3')): 1492 | audio_files.append(os.path.join('./audios',filename)) 1493 | return {"choices": sorted(audio_files), "__type__": "update"} 1494 | 1495 | audio_files=[] 1496 | for filename in os.listdir("./audios"): 1497 | if filename.endswith(('.wav','.mp3')): 1498 | audio_files.append(filename) 1499 | 1500 | def get_index(): 1501 | if check_for_name() != '': 1502 | if config.iscolab: 1503 | chosen_model=sorted(names)[0].split(".")[0] 1504 | logs_path="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+chosen_model 1505 | for file in os.listdir(logs_path): 1506 | if file.endswith(".index"): 1507 | return os.path.join(logs_path, file) 1508 | return '' 1509 | else: 1510 | return '' 1511 | 1512 | def get_indexes(): 1513 | indexes_list=[] 1514 | if config.iscolab: 1515 | for dirpath, dirnames, filenames in os.walk("/content/Retrieval-based-Voice-Conversion-WebUI/logs/"): 1516 | for filename in filenames: 1517 | if filename.endswith(".index"): 1518 | indexes_list.append(os.path.join(dirpath,filename)) 1519 | return indexes_list 1520 | else: 1521 | return '' 1522 | 1523 | def get_name(): 1524 | if len(audio_files) > 0: 1525 | return sorted(audio_files)[0] 1526 | else: 1527 | return '' 1528 | 1529 | def save_to_wav(record_button): 1530 | if record_button is None: 1531 | pass 1532 | else: 1533 | path_to_file=record_button 1534 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' 1535 | new_path='./audios/'+new_name 1536 | shutil.move(path_to_file,new_path) 1537 | return new_path 1538 | 1539 | def save_to_wav2(dropbox): 1540 | file_path=dropbox.name 1541 | shutil.move(file_path,'./audios') 1542 | return os.path.join('./audios',os.path.basename(file_path)) 1543 | 1544 | def match_index(speaker): 1545 | folder=speaker.split(".")[0] 1546 | parent_dir="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+folder 1547 | for filename in os.listdir(parent_dir): 1548 | if filename.endswith(".index"): 1549 | index_path=os.path.join(parent_dir,filename) 1550 | return index_path 1551 | 1552 | def check_for_name(): 1553 | if len(names) > 0: 1554 | return sorted(names)[0] 1555 | else: 1556 | return '' 1557 | 1558 | def download_from_url(url, model): 1559 | url = url.strip() 1560 | if url == '': 1561 | return "URL cannot be left empty." 1562 | zip_dirs = ["zips", "unzips"] 1563 | for directory in zip_dirs: 1564 | if os.path.exists(directory): 1565 | shutil.rmtree(directory) 1566 | os.makedirs("zips", exist_ok=True) 1567 | os.makedirs("unzips", exist_ok=True) 1568 | zipfile = model + '.zip' 1569 | zipfile_path = './zips/' + zipfile 1570 | MODELEPOCH = '' 1571 | if "drive.google.com" in url: 1572 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) 1573 | elif "mega.nz" in url: 1574 | m = Mega() 1575 | m.download_url(url, './zips') 1576 | else: 1577 | subprocess.run(["wget", url, "-O", f"./zips/{zipfile}"]) 1578 | for filename in os.listdir("./zips"): 1579 | if filename.endswith(".zip"): 1580 | zipfile_path = os.path.join("./zips/",filename) 1581 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip') 1582 | else: 1583 | return "No zipfile found." 1584 | for root, dirs, files in os.walk('./unzips'): 1585 | for file in files: 1586 | if "G_" in file: 1587 | MODELEPOCH = file.split("G_")[1].split(".")[0] 1588 | if MODELEPOCH == '': 1589 | MODELEPOCH = '404' 1590 | for file in files: 1591 | file_path = os.path.join(root, file) 1592 | if file.endswith(".npy") or file.endswith(".index"): 1593 | subprocess.run(["mkdir", "-p", f"./logs/{model}"]) 1594 | subprocess.run(["mv", file_path, f"./logs/{model}/"]) 1595 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"): 1596 | subprocess.run(["mv", file_path, f"./weights/{model}.pth"]) 1597 | shutil.rmtree("zips") 1598 | shutil.rmtree("unzips") 1599 | return "Success." 1600 | 1601 | with gr.Blocks(theme=gr.themes.Base()) as app: 1602 | with gr.Tabs(): 1603 | with gr.TabItem("Inference"): 1604 | gr.HTML("

Easy GUI v2 (rejekts) - adapted to Mangio-RVC-Fork 💻

") 1605 | # Inference Preset Row 1606 | # with gr.Row(): 1607 | # mangio_preset = gr.Dropdown(label="Inference Preset", choices=sorted(get_presets())) 1608 | # mangio_preset_name_save = gr.Textbox( 1609 | # label="Your preset name" 1610 | # ) 1611 | # mangio_preset_save_btn = gr.Button('Save Preset', variant="primary") 1612 | 1613 | # Other RVC stuff 1614 | with gr.Row(): 1615 | sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name()) 1616 | refresh_button = gr.Button("Refresh", variant="primary") 1617 | if check_for_name() != '': 1618 | get_vc(sorted(names)[0]) 1619 | vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0) 1620 | #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") 1621 | spk_item = gr.Slider( 1622 | minimum=0, 1623 | maximum=2333, 1624 | step=1, 1625 | label=i18n("请选择说话人id"), 1626 | value=0, 1627 | visible=False, 1628 | interactive=True, 1629 | ) 1630 | #clean_button.click(fn=clean, inputs=[], outputs=[sid0]) 1631 | sid0.change( 1632 | fn=get_vc, 1633 | inputs=[sid0], 1634 | outputs=[spk_item], 1635 | ) 1636 | but0 = gr.Button("Convert", variant="primary") 1637 | with gr.Row(): 1638 | with gr.Column(): 1639 | with gr.Row(): 1640 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.") 1641 | with gr.Row(): 1642 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath") 1643 | with gr.Row(): 1644 | input_audio0 = gr.Dropdown( 1645 | label="2.Choose your audio.", 1646 | value="./audios/someguy.mp3", 1647 | ) 1648 | dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0]) 1649 | dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0]) 1650 | refresh_button2 = gr.Button("Refresh", variant="primary", size='sm') 1651 | refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0]) 1652 | record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0]) 1653 | record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0]) 1654 | with gr.Column(): 1655 | with gr.Accordion("Index Settings", open=True): 1656 | file_index1 = gr.Dropdown( 1657 | label="3. Path to your added.index file (if it didn't automatically find it.)", 1658 | choices=get_indexes(), 1659 | value=get_index(), 1660 | interactive=True, 1661 | ) 1662 | refresh_button.click( 1663 | fn=change_choices, inputs=[], outputs=[sid0, file_index1] 1664 | ) 1665 | # file_big_npy1 = gr.Textbox( 1666 | # label=i18n("特征文件路径"), 1667 | # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", 1668 | # interactive=True, 1669 | # ) 1670 | index_rate1 = gr.Slider( 1671 | minimum=0, 1672 | maximum=1, 1673 | label=i18n("检索特征占比"), 1674 | value=0.66, 1675 | interactive=True, 1676 | ) 1677 | vc_output2 = gr.Audio(label="Output Audio (Click on the Three Dots in the Right Corner to Download)") 1678 | f0method0 = gr.Radio( 1679 | label="Optional: Change the Pitch Extraction Algorithm. Use PM for fast results or Harvest for better low range (slower results) or Crepe for the best of both worlds.", 1680 | choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny"], # Fork Feature. Add Crepe-Tiny 1681 | value="pm", 1682 | interactive=True, 1683 | ) 1684 | with gr.Accordion("More", open=False): 1685 | crepe_hop_length = gr.Slider( 1686 | minimum=1, 1687 | maximum=512, 1688 | step=1, 1689 | label=i18n("crepe_hop_length"), 1690 | value=160, 1691 | interactive=True 1692 | ) 1693 | filter_radius0 = gr.Slider( 1694 | minimum=0, 1695 | maximum=7, 1696 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), 1697 | value=3, 1698 | step=1, 1699 | interactive=True, 1700 | ) 1701 | resample_sr0 = gr.Slider( 1702 | minimum=0, 1703 | maximum=48000, 1704 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"), 1705 | value=0, 1706 | step=1, 1707 | interactive=True, 1708 | ) 1709 | rms_mix_rate0 = gr.Slider( 1710 | minimum=0, 1711 | maximum=1, 1712 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), 1713 | value=1, 1714 | interactive=True, 1715 | ) 1716 | protect0 = gr.Slider( 1717 | minimum=0, 1718 | maximum=0.5, 1719 | label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"), 1720 | value=0.33, 1721 | step=0.01, 1722 | interactive=True, 1723 | ) 1724 | with gr.Row(): 1725 | vc_output1 = gr.Textbox("") 1726 | f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False) 1727 | 1728 | but0.click( 1729 | vc_single, 1730 | [ 1731 | spk_item, 1732 | input_audio0, 1733 | vc_transform0, 1734 | f0_file, 1735 | f0method0, 1736 | file_index1, 1737 | # file_index2, 1738 | # file_big_npy1, 1739 | index_rate1, 1740 | filter_radius0, 1741 | resample_sr0, 1742 | rms_mix_rate0, 1743 | protect0, 1744 | crepe_hop_length 1745 | ], 1746 | [vc_output1, vc_output2], 1747 | ) 1748 | with gr.Accordion("Batch Conversion",open=False): 1749 | with gr.Row(): 1750 | with gr.Column(): 1751 | vc_transform1 = gr.Number( 1752 | label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 1753 | ) 1754 | opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt") 1755 | f0method1 = gr.Radio( 1756 | label=i18n( 1757 | "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" 1758 | ), 1759 | choices=["pm", "harvest", "crepe"], 1760 | value="pm", 1761 | interactive=True, 1762 | ) 1763 | filter_radius1 = gr.Slider( 1764 | minimum=0, 1765 | maximum=7, 1766 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), 1767 | value=3, 1768 | step=1, 1769 | interactive=True, 1770 | ) 1771 | with gr.Column(): 1772 | file_index3 = gr.Textbox( 1773 | label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), 1774 | value="", 1775 | interactive=True, 1776 | ) 1777 | file_index4 = gr.Dropdown( 1778 | label=i18n("自动检测index路径,下拉式选择(dropdown)"), 1779 | choices=sorted(index_paths), 1780 | interactive=True, 1781 | ) 1782 | refresh_button.click( 1783 | fn=lambda: change_choices()[1], 1784 | inputs=[], 1785 | outputs=file_index4, 1786 | ) 1787 | # file_big_npy2 = gr.Textbox( 1788 | # label=i18n("特征文件路径"), 1789 | # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", 1790 | # interactive=True, 1791 | # ) 1792 | index_rate2 = gr.Slider( 1793 | minimum=0, 1794 | maximum=1, 1795 | label=i18n("检索特征占比"), 1796 | value=1, 1797 | interactive=True, 1798 | ) 1799 | with gr.Column(): 1800 | resample_sr1 = gr.Slider( 1801 | minimum=0, 1802 | maximum=48000, 1803 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"), 1804 | value=0, 1805 | step=1, 1806 | interactive=True, 1807 | ) 1808 | rms_mix_rate1 = gr.Slider( 1809 | minimum=0, 1810 | maximum=1, 1811 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), 1812 | value=1, 1813 | interactive=True, 1814 | ) 1815 | protect1 = gr.Slider( 1816 | minimum=0, 1817 | maximum=0.5, 1818 | label=i18n( 1819 | "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" 1820 | ), 1821 | value=0.33, 1822 | step=0.01, 1823 | interactive=True, 1824 | ) 1825 | with gr.Column(): 1826 | dir_input = gr.Textbox( 1827 | label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), 1828 | value="E:\codes\py39\\test-20230416b\\todo-songs", 1829 | ) 1830 | inputs = gr.File( 1831 | file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") 1832 | ) 1833 | with gr.Row(): 1834 | format1 = gr.Radio( 1835 | label=i18n("导出文件格式"), 1836 | choices=["wav", "flac", "mp3", "m4a"], 1837 | value="flac", 1838 | interactive=True, 1839 | ) 1840 | but1 = gr.Button(i18n("转换"), variant="primary") 1841 | vc_output3 = gr.Textbox(label=i18n("输出信息")) 1842 | but1.click( 1843 | vc_multi, 1844 | [ 1845 | spk_item, 1846 | dir_input, 1847 | opt_input, 1848 | inputs, 1849 | vc_transform1, 1850 | f0method1, 1851 | file_index3, 1852 | file_index4, 1853 | # file_big_npy2, 1854 | index_rate2, 1855 | filter_radius1, 1856 | resample_sr1, 1857 | rms_mix_rate1, 1858 | protect1, 1859 | format1, 1860 | crepe_hop_length, 1861 | ], 1862 | [vc_output3], 1863 | ) 1864 | with gr.TabItem("Download Model"): 1865 | with gr.Row(): 1866 | url=gr.Textbox(label="Enter the URL to the Model:") 1867 | with gr.Row(): 1868 | model = gr.Textbox(label="Name your model:") 1869 | download_button=gr.Button(label="Download") 1870 | with gr.Row(): 1871 | status_bar=gr.Textbox(label="") 1872 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) 1873 | with gr.Row(): 1874 | gr.Markdown( 1875 | """ 1876 | Original RVC:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI 1877 | Mangio's RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork 1878 | ❤️ If you like the EasyGUI, help me keep it.❤️ 1879 | https://paypal.me/lesantillan 1880 | """ 1881 | ) 1882 | ''' 1883 | with gr.TabItem("Train", visible=False): 1884 | with gr.Row(): 1885 | exp_dir1 = gr.Textbox(label="Voice Name:", value="My-Voice") 1886 | sr2 = gr.Radio( 1887 | label=i18n("目标采样率"), 1888 | choices=["40k", "48k"], 1889 | value="40k", 1890 | interactive=True, 1891 | visible=False 1892 | ) 1893 | if_f0_3 = gr.Radio( 1894 | label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"), 1895 | choices=[True, False], 1896 | value=True, 1897 | interactive=True, 1898 | visible=False 1899 | ) 1900 | version19 = gr.Radio( 1901 | label=i18n("版本(目前仅40k支持了v2)"), 1902 | choices=["v1", "v2"], 1903 | value="v2", 1904 | interactive=True, 1905 | visible=False, 1906 | ) 1907 | np7 = gr.Slider( 1908 | minimum=0, 1909 | maximum=config.n_cpu, 1910 | step=1, 1911 | label=i18n("提取音高和处理数据使用的CPU进程数"), 1912 | value=config.n_cpu, 1913 | interactive=True, 1914 | ) 1915 | with gr.Group(): # 暂时单人的, 后面支持最多4人的#数据处理 1916 | with gr.Row(): 1917 | trainset_dir4 = gr.Textbox( 1918 | label=i18n("输入训练文件夹路径"), value="/content/dataset" 1919 | ) 1920 | spk_id5 = gr.Slider( 1921 | minimum=0, 1922 | maximum=4, 1923 | step=1, 1924 | label=i18n("请指定说话人id"), 1925 | value=0, 1926 | interactive=True, 1927 | visible=False 1928 | ) 1929 | but1 = gr.Button(i18n("处理数据"), variant="primary") 1930 | info1 = gr.Textbox(label=i18n("输出信息"), value="") 1931 | but1.click( 1932 | preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1] 1933 | ) 1934 | with gr.Group(): 1935 | #gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)")) 1936 | with gr.Row(): 1937 | with gr.Accordion('GPU Settings', open=False): 1938 | gpus6 = gr.Textbox( 1939 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), 1940 | value=gpus, 1941 | interactive=True, 1942 | ) 1943 | gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info) 1944 | with gr.Column(): 1945 | f0method8 = gr.Radio( 1946 | label=i18n( 1947 | "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢" 1948 | ), 1949 | choices=["pm", "harvest", "dio", "crepe", "mangio-crepe"], # Fork feature: Crepe on f0 extraction for training. 1950 | value="harvest", 1951 | interactive=True, 1952 | ) 1953 | extraction_crepe_hop_length = gr.Slider( 1954 | minimum=1, 1955 | maximum=512, 1956 | step=1, 1957 | label=i18n("crepe_hop_length"), 1958 | value=128, 1959 | interactive=True 1960 | ) 1961 | but2 = gr.Button(i18n("特征提取"), variant="primary") 1962 | info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) 1963 | but2.click( 1964 | extract_f0_feature, 1965 | [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length], 1966 | [info2], 1967 | ) 1968 | with gr.Group(): 1969 | #gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引")) 1970 | with gr.Row(): 1971 | save_epoch10 = gr.Slider( 1972 | minimum=0, 1973 | maximum=50, 1974 | step=1, 1975 | label="Create a backup every # of epochs:", 1976 | value=10, 1977 | interactive=True, 1978 | ) 1979 | total_epoch11 = gr.Slider( 1980 | minimum=0, 1981 | maximum=10000, 1982 | step=1, 1983 | label=i18n("总训练轮数total_epoch"), 1984 | value=100, 1985 | interactive=True, 1986 | ) 1987 | batch_size12 = gr.Slider( 1988 | minimum=1, 1989 | maximum=40, 1990 | step=1, 1991 | label=i18n("每张显卡的batch_size"), 1992 | value=default_batch_size, 1993 | interactive=True, 1994 | ) 1995 | if_save_latest13 = gr.Radio( 1996 | label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), 1997 | choices=[i18n("是"), i18n("否")], 1998 | value=i18n("是"), 1999 | interactive=True, 2000 | ) 2001 | if_cache_gpu17 = gr.Radio( 2002 | label=i18n( 2003 | "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速" 2004 | ), 2005 | choices=[i18n("是"), i18n("否")], 2006 | value=i18n("否"), 2007 | interactive=True, 2008 | ) 2009 | if_save_every_weights18 = gr.Radio( 2010 | label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), 2011 | choices=[i18n("是"), i18n("否")], 2012 | value=i18n("否"), 2013 | interactive=True, 2014 | ) 2015 | with gr.Row(): 2016 | with gr.Accordion("Advanced", open=False): 2017 | pretrained_G14 = gr.Textbox( 2018 | label=i18n("加载预训练底模G路径"), 2019 | value="pretrained/f0G40k.pth", 2020 | interactive=True, 2021 | ) 2022 | pretrained_D15 = gr.Textbox( 2023 | label=i18n("加载预训练底模D路径"), 2024 | value="pretrained/f0D40k.pth", 2025 | interactive=True, 2026 | ) 2027 | sr2.change( 2028 | change_sr2, 2029 | [sr2, if_f0_3, version19], 2030 | [pretrained_G14, pretrained_D15, version19], 2031 | ) 2032 | version19.change( 2033 | change_version19, 2034 | [sr2, if_f0_3, version19], 2035 | [pretrained_G14, pretrained_D15], 2036 | ) 2037 | if_f0_3.change( 2038 | change_f0, 2039 | [if_f0_3, sr2, version19], 2040 | [f0method8, pretrained_G14, pretrained_D15], 2041 | ) 2042 | gpus16 = gr.Textbox( 2043 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), 2044 | value=gpus, 2045 | interactive=True, 2046 | ) 2047 | but3 = gr.Button(i18n("训练模型"), variant="primary") 2048 | but4 = gr.Button(i18n("训练特征索引"), variant="primary") 2049 | but5 = gr.Button(i18n("一键训练"), variant="primary", visible=False) 2050 | info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10) 2051 | but3.click( 2052 | click_train, 2053 | [ 2054 | exp_dir1, 2055 | sr2, 2056 | if_f0_3, 2057 | spk_id5, 2058 | save_epoch10, 2059 | total_epoch11, 2060 | batch_size12, 2061 | if_save_latest13, 2062 | pretrained_G14, 2063 | pretrained_D15, 2064 | gpus16, 2065 | if_cache_gpu17, 2066 | if_save_every_weights18, 2067 | version19, 2068 | ], 2069 | info3, 2070 | ) 2071 | but4.click(train_index, [exp_dir1, version19], info3) 2072 | but5.click( 2073 | train1key, 2074 | [ 2075 | exp_dir1, 2076 | sr2, 2077 | if_f0_3, 2078 | trainset_dir4, 2079 | spk_id5, 2080 | np7, 2081 | f0method8, 2082 | save_epoch10, 2083 | total_epoch11, 2084 | batch_size12, 2085 | if_save_latest13, 2086 | pretrained_G14, 2087 | pretrained_D15, 2088 | gpus16, 2089 | if_cache_gpu17, 2090 | if_save_every_weights18, 2091 | version19, 2092 | extraction_crepe_hop_length 2093 | ], 2094 | info3, 2095 | ) 2096 | 2097 | 2098 | try: 2099 | if tab_faq == "常见问题解答": 2100 | with open("docs/faq.md", "r", encoding="utf8") as f: 2101 | info = f.read() 2102 | else: 2103 | with open("docs/faq_en.md", "r", encoding="utf8") as f: 2104 | info = f.read() 2105 | gr.Markdown(value=info) 2106 | except: 2107 | gr.Markdown(traceback.format_exc()) 2108 | ''' 2109 | 2110 | #region Mangio Preset Handler Region 2111 | def save_preset( 2112 | preset_name, 2113 | sid0, 2114 | vc_transform, 2115 | input_audio, 2116 | f0method, 2117 | crepe_hop_length, 2118 | filter_radius, 2119 | file_index1, 2120 | file_index2, 2121 | index_rate, 2122 | resample_sr, 2123 | rms_mix_rate, 2124 | protect, 2125 | f0_file 2126 | ): 2127 | data = None 2128 | with open('../inference-presets.json', 'r') as file: 2129 | data = json.load(file) 2130 | preset_json = { 2131 | 'name': preset_name, 2132 | 'model': sid0, 2133 | 'transpose': vc_transform, 2134 | 'audio_file': input_audio, 2135 | 'f0_method': f0method, 2136 | 'crepe_hop_length': crepe_hop_length, 2137 | 'median_filtering': filter_radius, 2138 | 'feature_path': file_index1, 2139 | 'auto_feature_path': file_index2, 2140 | 'search_feature_ratio': index_rate, 2141 | 'resample': resample_sr, 2142 | 'volume_envelope': rms_mix_rate, 2143 | 'protect_voiceless': protect, 2144 | 'f0_file_path': f0_file 2145 | } 2146 | data['presets'].append(preset_json) 2147 | with open('../inference-presets.json', 'w') as file: 2148 | json.dump(data, file) 2149 | file.flush() 2150 | print("Saved Preset %s into inference-presets.json!" % preset_name) 2151 | 2152 | 2153 | def on_preset_changed(preset_name): 2154 | print("Changed Preset to %s!" % preset_name) 2155 | data = None 2156 | with open('../inference-presets.json', 'r') as file: 2157 | data = json.load(file) 2158 | 2159 | print("Searching for " + preset_name) 2160 | returning_preset = None 2161 | for preset in data['presets']: 2162 | if(preset['name'] == preset_name): 2163 | print("Found a preset") 2164 | returning_preset = preset 2165 | # return all new input values 2166 | return ( 2167 | # returning_preset['model'], 2168 | # returning_preset['transpose'], 2169 | # returning_preset['audio_file'], 2170 | # returning_preset['f0_method'], 2171 | # returning_preset['crepe_hop_length'], 2172 | # returning_preset['median_filtering'], 2173 | # returning_preset['feature_path'], 2174 | # returning_preset['auto_feature_path'], 2175 | # returning_preset['search_feature_ratio'], 2176 | # returning_preset['resample'], 2177 | # returning_preset['volume_envelope'], 2178 | # returning_preset['protect_voiceless'], 2179 | # returning_preset['f0_file_path'] 2180 | ) 2181 | 2182 | # Preset State Changes 2183 | 2184 | # This click calls save_preset that saves the preset into inference-presets.json with the preset name 2185 | # mangio_preset_save_btn.click( 2186 | # fn=save_preset, 2187 | # inputs=[ 2188 | # mangio_preset_name_save, 2189 | # sid0, 2190 | # vc_transform0, 2191 | # input_audio0, 2192 | # f0method0, 2193 | # crepe_hop_length, 2194 | # filter_radius0, 2195 | # file_index1, 2196 | # file_index2, 2197 | # index_rate1, 2198 | # resample_sr0, 2199 | # rms_mix_rate0, 2200 | # protect0, 2201 | # f0_file 2202 | # ], 2203 | # outputs=[] 2204 | # ) 2205 | 2206 | # mangio_preset.change( 2207 | # on_preset_changed, 2208 | # inputs=[ 2209 | # # Pass inputs here 2210 | # mangio_preset 2211 | # ], 2212 | # outputs=[ 2213 | # # Pass Outputs here. These refer to the gradio elements that we want to directly change 2214 | # # sid0, 2215 | # # vc_transform0, 2216 | # # input_audio0, 2217 | # # f0method0, 2218 | # # crepe_hop_length, 2219 | # # filter_radius0, 2220 | # # file_index1, 2221 | # # file_index2, 2222 | # # index_rate1, 2223 | # # resample_sr0, 2224 | # # rms_mix_rate0, 2225 | # # protect0, 2226 | # # f0_file 2227 | # ] 2228 | # ) 2229 | 2230 | 2231 | # with gr.TabItem(i18n("招募音高曲线前端编辑器")): 2232 | # gr.Markdown(value=i18n("加开发群联系我xxxxx")) 2233 | # with gr.TabItem(i18n("点击查看交流、问题反馈群号")): 2234 | # gr.Markdown(value=i18n("xxxxx")) 2235 | 2236 | 2237 | if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE) 2238 | app.queue(concurrency_count=511, max_size=1022).launch(share=True) 2239 | else: 2240 | app.queue(concurrency_count=511, max_size=1022).launch( 2241 | server_name="0.0.0.0", 2242 | inbrowser=not config.noautoopen, 2243 | server_port=config.listen_port, 2244 | quiet=True, 2245 | ) 2246 | -------------------------------------------------------------------------------- /GUI.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import datetime, subprocess 3 | from mega import Mega 4 | now_dir = os.getcwd() 5 | sys.path.append(now_dir) 6 | import logging 7 | import shutil 8 | import threading 9 | import traceback 10 | import warnings 11 | from random import shuffle 12 | from subprocess import Popen 13 | from time import sleep 14 | import json 15 | import pathlib 16 | 17 | import fairseq 18 | import faiss 19 | import gradio as gr 20 | import numpy as np 21 | import torch 22 | from dotenv import load_dotenv 23 | from sklearn.cluster import MiniBatchKMeans 24 | 25 | from configs.config import Config 26 | from i18n.i18n import I18nAuto 27 | from infer.lib.train.process_ckpt import ( 28 | change_info, 29 | extract_small_model, 30 | merge, 31 | show_info, 32 | ) 33 | from infer.modules.uvr5.modules import uvr 34 | from infer.modules.vc.modules import VC 35 | 36 | logging.getLogger("numba").setLevel(logging.WARNING) 37 | 38 | logger = logging.getLogger(__name__) 39 | 40 | tmp = os.path.join(now_dir, "TEMP") 41 | shutil.rmtree(tmp, ignore_errors=True) 42 | shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) 43 | shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) 44 | os.makedirs(tmp, exist_ok=True) 45 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) 46 | os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True) 47 | os.environ["TEMP"] = tmp 48 | warnings.filterwarnings("ignore") 49 | torch.manual_seed(114514) 50 | 51 | 52 | load_dotenv() 53 | config = Config() 54 | vc = VC(config) 55 | 56 | 57 | if config.dml == True: 58 | 59 | def forward_dml(ctx, x, scale): 60 | ctx.scale = scale 61 | res = x.clone().detach() 62 | return res 63 | 64 | fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml 65 | i18n = I18nAuto() 66 | logger.info(i18n) 67 | # 判断是否有能用来训练和加速推理的N卡 68 | ngpu = torch.cuda.device_count() 69 | gpu_infos = [] 70 | mem = [] 71 | if_gpu_ok = False 72 | 73 | if torch.cuda.is_available() or ngpu != 0: 74 | for i in range(ngpu): 75 | gpu_name = torch.cuda.get_device_name(i) 76 | if any( 77 | value in gpu_name.upper() 78 | for value in [ 79 | "10", 80 | "16", 81 | "20", 82 | "30", 83 | "40", 84 | "A2", 85 | "A3", 86 | "A4", 87 | "P4", 88 | "A50", 89 | "500", 90 | "A60", 91 | "70", 92 | "80", 93 | "90", 94 | "M4", 95 | "T4", 96 | "TITAN", 97 | ] 98 | ): 99 | # A10#A100#V100#A40#P40#M40#K80#A4500 100 | if_gpu_ok = True # 至少有一张能用的N卡 101 | gpu_infos.append("%s\t%s" % (i, gpu_name)) 102 | mem.append( 103 | int( 104 | torch.cuda.get_device_properties(i).total_memory 105 | / 1024 106 | / 1024 107 | / 1024 108 | + 0.4 109 | ) 110 | ) 111 | if if_gpu_ok and len(gpu_infos) > 0: 112 | gpu_info = "\n".join(gpu_infos) 113 | default_batch_size = min(mem) // 2 114 | else: 115 | gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") 116 | default_batch_size = 1 117 | gpus = "-".join([i[0] for i in gpu_infos]) 118 | 119 | 120 | class ToolButton(gr.Button, gr.components.FormComponent): 121 | """Small button with single emoji as text, fits inside gradio forms""" 122 | 123 | def __init__(self, **kwargs): 124 | super().__init__(variant="tool", **kwargs) 125 | 126 | def get_block_name(self): 127 | return "button" 128 | 129 | 130 | weight_root = os.getenv("weight_root") 131 | weight_uvr5_root = os.getenv("weight_uvr5_root") 132 | index_root = os.getenv("index_root") 133 | 134 | names = [] 135 | for name in os.listdir(weight_root): 136 | if name.endswith(".pth"): 137 | names.append(name) 138 | index_paths = [] 139 | for root, dirs, files in os.walk(index_root, topdown=False): 140 | for name in files: 141 | if name.endswith(".index") and "trained" not in name: 142 | index_paths.append("%s/%s" % (root, name)) 143 | uvr5_names = [] 144 | for name in os.listdir(weight_uvr5_root): 145 | if name.endswith(".pth") or "onnx" in name: 146 | uvr5_names.append(name.replace(".pth", "")) 147 | 148 | 149 | def change_choices(): 150 | names = [] 151 | for name in os.listdir(weight_root): 152 | if name.endswith(".pth"): 153 | names.append(name) 154 | index_paths = [] 155 | for root, dirs, files in os.walk(index_root, topdown=False): 156 | for name in files: 157 | if name.endswith(".index") and "trained" not in name: 158 | index_paths.append("%s/%s" % (root, name)) 159 | audio_files=[] 160 | for filename in os.listdir("./audios"): 161 | if filename.endswith(('.wav','.mp3','.ogg')): 162 | audio_files.append(filename) 163 | return {"choices": sorted(names), "__type__": "update"}, { 164 | "choices": sorted(index_paths), 165 | "__type__": "update", 166 | }, {"choices": sorted(audio_files), "__type__": "update"} 167 | 168 | def clean(): 169 | return {"value": "", "__type__": "update"} 170 | 171 | 172 | def export_onnx(): 173 | from infer.modules.onnx.export import export_onnx as eo 174 | 175 | eo() 176 | 177 | 178 | sr_dict = { 179 | "32k": 32000, 180 | "40k": 40000, 181 | "48k": 48000, 182 | } 183 | 184 | 185 | def if_done(done, p): 186 | while 1: 187 | if p.poll() is None: 188 | sleep(0.5) 189 | else: 190 | break 191 | done[0] = True 192 | 193 | 194 | def if_done_multi(done, ps): 195 | while 1: 196 | # poll==None代表进程未结束 197 | # 只要有一个进程未结束都不停 198 | flag = 1 199 | for p in ps: 200 | if p.poll() is None: 201 | flag = 0 202 | sleep(0.5) 203 | break 204 | if flag == 1: 205 | break 206 | done[0] = True 207 | 208 | 209 | def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): 210 | sr = sr_dict[sr] 211 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) 212 | f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") 213 | f.close() 214 | per = 3.0 if config.is_half else 3.7 215 | cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % ( 216 | config.python_cmd, 217 | trainset_dir, 218 | sr, 219 | n_p, 220 | now_dir, 221 | exp_dir, 222 | config.noparallel, 223 | per, 224 | ) 225 | logger.info(cmd) 226 | p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir 227 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 228 | done = [False] 229 | threading.Thread( 230 | target=if_done, 231 | args=( 232 | done, 233 | p, 234 | ), 235 | ).start() 236 | while 1: 237 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: 238 | yield (f.read()) 239 | sleep(1) 240 | if done[0]: 241 | break 242 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: 243 | log = f.read() 244 | logger.info(log) 245 | yield log 246 | 247 | 248 | # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) 249 | def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe): 250 | gpus = gpus.split("-") 251 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) 252 | f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") 253 | f.close() 254 | if if_f0: 255 | if f0method != "rmvpe_gpu": 256 | cmd = ( 257 | '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s' 258 | % ( 259 | config.python_cmd, 260 | now_dir, 261 | exp_dir, 262 | n_p, 263 | f0method, 264 | ) 265 | ) 266 | logger.info(cmd) 267 | p = Popen( 268 | cmd, shell=True, cwd=now_dir 269 | ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE 270 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 271 | done = [False] 272 | threading.Thread( 273 | target=if_done, 274 | args=( 275 | done, 276 | p, 277 | ), 278 | ).start() 279 | else: 280 | if gpus_rmvpe != "-": 281 | gpus_rmvpe = gpus_rmvpe.split("-") 282 | leng = len(gpus_rmvpe) 283 | ps = [] 284 | for idx, n_g in enumerate(gpus_rmvpe): 285 | cmd = ( 286 | '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' 287 | % ( 288 | config.python_cmd, 289 | leng, 290 | idx, 291 | n_g, 292 | now_dir, 293 | exp_dir, 294 | config.is_half, 295 | ) 296 | ) 297 | logger.info(cmd) 298 | p = Popen( 299 | cmd, shell=True, cwd=now_dir 300 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir 301 | ps.append(p) 302 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 303 | done = [False] 304 | threading.Thread( 305 | target=if_done_multi, # 306 | args=( 307 | done, 308 | ps, 309 | ), 310 | ).start() 311 | else: 312 | cmd = ( 313 | config.python_cmd 314 | + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" ' 315 | % ( 316 | now_dir, 317 | exp_dir, 318 | ) 319 | ) 320 | logger.info(cmd) 321 | p = Popen( 322 | cmd, shell=True, cwd=now_dir 323 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir 324 | p.wait() 325 | done = [True] 326 | while 1: 327 | with open( 328 | "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" 329 | ) as f: 330 | yield (f.read()) 331 | sleep(1) 332 | if done[0]: 333 | break 334 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: 335 | log = f.read() 336 | logger.info(log) 337 | yield log 338 | ####对不同part分别开多进程 339 | """ 340 | n_part=int(sys.argv[1]) 341 | i_part=int(sys.argv[2]) 342 | i_gpu=sys.argv[3] 343 | exp_dir=sys.argv[4] 344 | os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) 345 | """ 346 | leng = len(gpus) 347 | ps = [] 348 | for idx, n_g in enumerate(gpus): 349 | cmd = ( 350 | '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' 351 | % ( 352 | config.python_cmd, 353 | config.device, 354 | leng, 355 | idx, 356 | n_g, 357 | now_dir, 358 | exp_dir, 359 | version19, 360 | ) 361 | ) 362 | logger.info(cmd) 363 | p = Popen( 364 | cmd, shell=True, cwd=now_dir 365 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir 366 | ps.append(p) 367 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 368 | done = [False] 369 | threading.Thread( 370 | target=if_done_multi, 371 | args=( 372 | done, 373 | ps, 374 | ), 375 | ).start() 376 | while 1: 377 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: 378 | yield (f.read()) 379 | sleep(1) 380 | if done[0]: 381 | break 382 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: 383 | log = f.read() 384 | logger.info(log) 385 | yield log 386 | 387 | 388 | def get_pretrained_models(path_str, f0_str, sr2): 389 | if_pretrained_generator_exist = os.access( 390 | "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK 391 | ) 392 | if_pretrained_discriminator_exist = os.access( 393 | "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK 394 | ) 395 | if not if_pretrained_generator_exist: 396 | logger.warn( 397 | "assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model", 398 | path_str, 399 | f0_str, 400 | sr2, 401 | ) 402 | if not if_pretrained_discriminator_exist: 403 | logger.warn( 404 | "assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model", 405 | path_str, 406 | f0_str, 407 | sr2, 408 | ) 409 | return ( 410 | "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) 411 | if if_pretrained_generator_exist 412 | else "", 413 | "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) 414 | if if_pretrained_discriminator_exist 415 | else "", 416 | ) 417 | 418 | 419 | def change_sr2(sr2, if_f0_3, version19): 420 | path_str = "" if version19 == "v1" else "_v2" 421 | f0_str = "f0" if if_f0_3 else "" 422 | return get_pretrained_models(path_str, f0_str, sr2) 423 | 424 | 425 | def change_version19(sr2, if_f0_3, version19): 426 | path_str = "" if version19 == "v1" else "_v2" 427 | if sr2 == "32k" and version19 == "v1": 428 | sr2 = "40k" 429 | to_return_sr2 = ( 430 | {"choices": ["40k", "48k"], "__type__": "update", "value": sr2} 431 | if version19 == "v1" 432 | else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2} 433 | ) 434 | f0_str = "f0" if if_f0_3 else "" 435 | return ( 436 | *get_pretrained_models(path_str, f0_str, sr2), 437 | to_return_sr2, 438 | ) 439 | 440 | 441 | def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 442 | path_str = "" if version19 == "v1" else "_v2" 443 | return ( 444 | {"visible": if_f0_3, "__type__": "update"}, 445 | *get_pretrained_models(path_str, "f0", sr2), 446 | ) 447 | 448 | 449 | # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16]) 450 | def click_train( 451 | exp_dir1, 452 | sr2, 453 | if_f0_3, 454 | spk_id5, 455 | save_epoch10, 456 | total_epoch11, 457 | batch_size12, 458 | if_save_latest13, 459 | pretrained_G14, 460 | pretrained_D15, 461 | gpus16, 462 | if_cache_gpu17, 463 | if_save_every_weights18, 464 | version19, 465 | ): 466 | # 生成filelist 467 | exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) 468 | os.makedirs(exp_dir, exist_ok=True) 469 | gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) 470 | feature_dir = ( 471 | "%s/3_feature256" % (exp_dir) 472 | if version19 == "v1" 473 | else "%s/3_feature768" % (exp_dir) 474 | ) 475 | if if_f0_3: 476 | f0_dir = "%s/2a_f0" % (exp_dir) 477 | f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) 478 | names = ( 479 | set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) 480 | & set([name.split(".")[0] for name in os.listdir(feature_dir)]) 481 | & set([name.split(".")[0] for name in os.listdir(f0_dir)]) 482 | & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) 483 | ) 484 | else: 485 | names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( 486 | [name.split(".")[0] for name in os.listdir(feature_dir)] 487 | ) 488 | opt = [] 489 | for name in names: 490 | if if_f0_3: 491 | opt.append( 492 | "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" 493 | % ( 494 | gt_wavs_dir.replace("\\", "\\\\"), 495 | name, 496 | feature_dir.replace("\\", "\\\\"), 497 | name, 498 | f0_dir.replace("\\", "\\\\"), 499 | name, 500 | f0nsf_dir.replace("\\", "\\\\"), 501 | name, 502 | spk_id5, 503 | ) 504 | ) 505 | else: 506 | opt.append( 507 | "%s/%s.wav|%s/%s.npy|%s" 508 | % ( 509 | gt_wavs_dir.replace("\\", "\\\\"), 510 | name, 511 | feature_dir.replace("\\", "\\\\"), 512 | name, 513 | spk_id5, 514 | ) 515 | ) 516 | fea_dim = 256 if version19 == "v1" else 768 517 | if if_f0_3: 518 | for _ in range(2): 519 | opt.append( 520 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" 521 | % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) 522 | ) 523 | else: 524 | for _ in range(2): 525 | opt.append( 526 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" 527 | % (now_dir, sr2, now_dir, fea_dim, spk_id5) 528 | ) 529 | shuffle(opt) 530 | with open("%s/filelist.txt" % exp_dir, "w") as f: 531 | f.write("\n".join(opt)) 532 | logger.debug("Write filelist done") 533 | # 生成config#无需生成config 534 | # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" 535 | logger.info("Use gpus: %s", str(gpus16)) 536 | if pretrained_G14 == "": 537 | logger.info("No pretrained Generator") 538 | if pretrained_D15 == "": 539 | logger.info("No pretrained Discriminator") 540 | if version19 == "v1" or sr2 == "40k": 541 | config_path = "v1/%s.json" % sr2 542 | else: 543 | config_path = "v2/%s.json" % sr2 544 | config_save_path = os.path.join(exp_dir, "config.json") 545 | if not pathlib.Path(config_save_path).exists(): 546 | with open(config_save_path, "w", encoding="utf-8") as f: 547 | json.dump( 548 | config.json_config[config_path], 549 | f, 550 | ensure_ascii=False, 551 | indent=4, 552 | sort_keys=True, 553 | ) 554 | f.write("\n") 555 | if gpus16: 556 | cmd = ( 557 | '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' 558 | % ( 559 | config.python_cmd, 560 | exp_dir1, 561 | sr2, 562 | 1 if if_f0_3 else 0, 563 | batch_size12, 564 | gpus16, 565 | total_epoch11, 566 | save_epoch10, 567 | "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", 568 | "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", 569 | 1 if if_save_latest13 == i18n("是") else 0, 570 | 1 if if_cache_gpu17 == i18n("是") else 0, 571 | 1 if if_save_every_weights18 == i18n("是") else 0, 572 | version19, 573 | ) 574 | ) 575 | else: 576 | cmd = ( 577 | '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' 578 | % ( 579 | config.python_cmd, 580 | exp_dir1, 581 | sr2, 582 | 1 if if_f0_3 else 0, 583 | batch_size12, 584 | total_epoch11, 585 | save_epoch10, 586 | "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", 587 | "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", 588 | 1 if if_save_latest13 == i18n("是") else 0, 589 | 1 if if_cache_gpu17 == i18n("是") else 0, 590 | 1 if if_save_every_weights18 == i18n("是") else 0, 591 | version19, 592 | ) 593 | ) 594 | logger.info(cmd) 595 | p = Popen(cmd, shell=True, cwd=now_dir) 596 | p.wait() 597 | return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log" 598 | 599 | 600 | # but4.click(train_index, [exp_dir1], info3) 601 | def train_index(exp_dir1, version19): 602 | # exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) 603 | exp_dir = "logs/%s" % (exp_dir1) 604 | os.makedirs(exp_dir, exist_ok=True) 605 | feature_dir = ( 606 | "%s/3_feature256" % (exp_dir) 607 | if version19 == "v1" 608 | else "%s/3_feature768" % (exp_dir) 609 | ) 610 | if not os.path.exists(feature_dir): 611 | return "请先进行特征提取!" 612 | listdir_res = list(os.listdir(feature_dir)) 613 | if len(listdir_res) == 0: 614 | return "请先进行特征提取!" 615 | infos = [] 616 | npys = [] 617 | for name in sorted(listdir_res): 618 | phone = np.load("%s/%s" % (feature_dir, name)) 619 | npys.append(phone) 620 | big_npy = np.concatenate(npys, 0) 621 | big_npy_idx = np.arange(big_npy.shape[0]) 622 | np.random.shuffle(big_npy_idx) 623 | big_npy = big_npy[big_npy_idx] 624 | if big_npy.shape[0] > 2e5: 625 | infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) 626 | yield "\n".join(infos) 627 | try: 628 | big_npy = ( 629 | MiniBatchKMeans( 630 | n_clusters=10000, 631 | verbose=True, 632 | batch_size=256 * config.n_cpu, 633 | compute_labels=False, 634 | init="random", 635 | ) 636 | .fit(big_npy) 637 | .cluster_centers_ 638 | ) 639 | except: 640 | info = traceback.format_exc() 641 | logger.info(info) 642 | infos.append(info) 643 | yield "\n".join(infos) 644 | 645 | np.save("%s/total_fea.npy" % exp_dir, big_npy) 646 | n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) 647 | infos.append("%s,%s" % (big_npy.shape, n_ivf)) 648 | yield "\n".join(infos) 649 | index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) 650 | # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) 651 | infos.append("training") 652 | yield "\n".join(infos) 653 | index_ivf = faiss.extract_index_ivf(index) # 654 | index_ivf.nprobe = 1 655 | index.train(big_npy) 656 | faiss.write_index( 657 | index, 658 | "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" 659 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), 660 | ) 661 | 662 | infos.append("adding") 663 | yield "\n".join(infos) 664 | batch_size_add = 8192 665 | for i in range(0, big_npy.shape[0], batch_size_add): 666 | index.add(big_npy[i : i + batch_size_add]) 667 | faiss.write_index( 668 | index, 669 | "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" 670 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), 671 | ) 672 | infos.append( 673 | "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index" 674 | % (n_ivf, index_ivf.nprobe, exp_dir1, version19) 675 | ) 676 | # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) 677 | # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) 678 | yield "\n".join(infos) 679 | 680 | 681 | # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3) 682 | def train1key( 683 | exp_dir1, 684 | sr2, 685 | if_f0_3, 686 | trainset_dir4, 687 | spk_id5, 688 | np7, 689 | f0method8, 690 | save_epoch10, 691 | total_epoch11, 692 | batch_size12, 693 | if_save_latest13, 694 | pretrained_G14, 695 | pretrained_D15, 696 | gpus16, 697 | if_cache_gpu17, 698 | if_save_every_weights18, 699 | version19, 700 | gpus_rmvpe, 701 | ): 702 | infos = [] 703 | 704 | def get_info_str(strr): 705 | infos.append(strr) 706 | return "\n".join(infos) 707 | 708 | ####### step1:处理数据 709 | yield get_info_str(i18n("step1:正在处理数据")) 710 | [get_info_str(_) for _ in preprocess_dataset(trainset_dir4, exp_dir1, sr2, np7)] 711 | 712 | ####### step2a:提取音高 713 | yield get_info_str(i18n("step2:正在提取音高&正在提取特征")) 714 | [ 715 | get_info_str(_) 716 | for _ in extract_f0_feature( 717 | gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe 718 | ) 719 | ] 720 | 721 | ####### step3a:训练模型 722 | yield get_info_str(i18n("step3a:正在训练模型")) 723 | click_train( 724 | exp_dir1, 725 | sr2, 726 | if_f0_3, 727 | spk_id5, 728 | save_epoch10, 729 | total_epoch11, 730 | batch_size12, 731 | if_save_latest13, 732 | pretrained_G14, 733 | pretrained_D15, 734 | gpus16, 735 | if_cache_gpu17, 736 | if_save_every_weights18, 737 | version19, 738 | ) 739 | yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) 740 | 741 | ####### step3b:训练索引 742 | [get_info_str(_) for _ in train_index(exp_dir1, version19)] 743 | yield get_info_str(i18n("全流程结束!")) 744 | 745 | 746 | # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__]) 747 | def change_info_(ckpt_path): 748 | if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")): 749 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} 750 | try: 751 | with open( 752 | ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" 753 | ) as f: 754 | info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) 755 | sr, f0 = info["sample_rate"], info["if_f0"] 756 | version = "v2" if ("version" in info and info["version"] == "v2") else "v1" 757 | return sr, str(f0), version 758 | except: 759 | traceback.print_exc() 760 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} 761 | 762 | 763 | F0GPUVisible = config.dml == False 764 | 765 | 766 | def change_f0_method(f0method8): 767 | if f0method8 == "rmvpe_gpu": 768 | visible = F0GPUVisible 769 | else: 770 | visible = False 771 | return {"visible": visible, "__type__": "update"} 772 | 773 | def find_model(): 774 | if len(names) > 0: 775 | vc.get_vc(sorted(names)[0],None,None) 776 | return sorted(names)[0] 777 | else: 778 | gr.Info("Do not forget to choose a model.") 779 | return '' 780 | 781 | def find_audios(index=False): 782 | audio_files=[] 783 | if not os.path.exists('./audios'): os.mkdir("./audios") 784 | for filename in os.listdir("./audios"): 785 | if filename.endswith(('.wav','.mp3','.ogg')): 786 | audio_files.append("./audios/"+filename) 787 | if index: 788 | if len(audio_files) > 0: return sorted(audio_files)[0] 789 | else: return "" 790 | elif len(audio_files) > 0: return sorted(audio_files) 791 | else: return [] 792 | 793 | def get_index(): 794 | if find_model() != '': 795 | chosen_model=sorted(names)[0].split(".")[0] 796 | logs_path="./logs/"+chosen_model 797 | if os.path.exists(logs_path): 798 | for file in os.listdir(logs_path): 799 | if file.endswith(".index"): 800 | return os.path.join(logs_path, file) 801 | return '' 802 | else: 803 | return '' 804 | 805 | def get_indexes(): 806 | indexes_list=[] 807 | for dirpath, dirnames, filenames in os.walk("./logs/"): 808 | for filename in filenames: 809 | if filename.endswith(".index"): 810 | indexes_list.append(os.path.join(dirpath,filename)) 811 | if len(indexes_list) > 0: 812 | return indexes_list 813 | else: 814 | return '' 815 | 816 | def save_wav(file): 817 | try: 818 | file_path=file.name 819 | shutil.move(file_path,'./audios') 820 | return './audios/'+os.path.basename(file_path) 821 | except AttributeError: 822 | try: 823 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' 824 | new_path='./audios/'+new_name 825 | shutil.move(file,new_path) 826 | return new_path 827 | except TypeError: 828 | return None 829 | 830 | def download_from_url(url, model): 831 | if url == '': 832 | return "URL cannot be left empty." 833 | if model =='': 834 | return "You need to name your model. For example: My-Model" 835 | url = url.strip() 836 | zip_dirs = ["zips", "unzips"] 837 | for directory in zip_dirs: 838 | if os.path.exists(directory): 839 | shutil.rmtree(directory) 840 | os.makedirs("zips", exist_ok=True) 841 | os.makedirs("unzips", exist_ok=True) 842 | zipfile = model + '.zip' 843 | zipfile_path = './zips/' + zipfile 844 | try: 845 | if "drive.google.com" in url: 846 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) 847 | elif "mega.nz" in url: 848 | m = Mega() 849 | m.download_url(url, './zips') 850 | else: 851 | subprocess.run(["wget", url, "-O", zipfile_path]) 852 | for filename in os.listdir("./zips"): 853 | if filename.endswith(".zip"): 854 | zipfile_path = os.path.join("./zips/",filename) 855 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip') 856 | else: 857 | return "No zipfile found." 858 | for root, dirs, files in os.walk('./unzips'): 859 | for file in files: 860 | file_path = os.path.join(root, file) 861 | if file.endswith(".index"): 862 | os.mkdir(f'./logs/{model}') 863 | shutil.copy2(file_path,f'./logs/{model}') 864 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"): 865 | shutil.copy(file_path,f'./weights/{model}.pth') 866 | shutil.rmtree("zips") 867 | shutil.rmtree("unzips") 868 | return "Success." 869 | except: 870 | return "There's been an error." 871 | 872 | def upload_to_dataset(files, dir): 873 | if dir == '': 874 | dir = './dataset/'+datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 875 | if not os.path.exists(dir): 876 | os.makedirs(dir) 877 | for file in files: 878 | path=file.name 879 | shutil.copy2(path,dir) 880 | gr.Info(i18n("处理数据")) 881 | return i18n("处理数据"), {"value":dir,"__type__":"update"} 882 | 883 | with gr.Blocks(title="EasyGUI v2.9",theme=gr.themes.Base()) as app: 884 | gr.HTML("

EasyGUI v2.9

") 885 | with gr.Tabs(): 886 | with gr.TabItem(i18n("模型推理")): 887 | with gr.Row(): 888 | sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=find_model()) 889 | refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary") 890 | #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") 891 | spk_item = gr.Slider( 892 | minimum=0, 893 | maximum=2333, 894 | step=1, 895 | label=i18n("请选择说话人id"), 896 | value=0, 897 | visible=False, 898 | interactive=True, 899 | ) 900 | #clean_button.click( 901 | # fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean" 902 | #) 903 | vc_transform0 = gr.Number( 904 | label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 905 | ) 906 | but0 = gr.Button(i18n("转换"), variant="primary") 907 | with gr.Row(): 908 | with gr.Column(): 909 | with gr.Row(): 910 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.") 911 | with gr.Row(): 912 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath") 913 | with gr.Row(): 914 | input_audio0 = gr.Dropdown( 915 | label=i18n("输入待处理音频文件路径(默认是正确格式示例)"), 916 | value=find_audios(True), 917 | choices=find_audios() 918 | ) 919 | record_button.change(fn=save_wav, inputs=[record_button], outputs=[input_audio0]) 920 | dropbox.upload(fn=save_wav, inputs=[dropbox], outputs=[input_audio0]) 921 | with gr.Column(): 922 | with gr.Accordion(label=i18n("自动检测index路径,下拉式选择(dropdown)"), open=False): 923 | file_index2 = gr.Dropdown( 924 | label=i18n("自动检测index路径,下拉式选择(dropdown)"), 925 | choices=get_indexes(), 926 | interactive=True, 927 | value=get_index() 928 | ) 929 | index_rate1 = gr.Slider( 930 | minimum=0, 931 | maximum=1, 932 | label=i18n("检索特征占比"), 933 | value=0.66, 934 | interactive=True, 935 | ) 936 | vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) 937 | with gr.Accordion(label=i18n("常规设置"), open=False): 938 | f0method0 = gr.Radio( 939 | label=i18n( 940 | "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" 941 | ), 942 | choices=["pm", "harvest", "crepe", "rmvpe"] 943 | if config.dml == False 944 | else ["pm", "harvest", "rmvpe"], 945 | value="rmvpe", 946 | interactive=True, 947 | ) 948 | filter_radius0 = gr.Slider( 949 | minimum=0, 950 | maximum=7, 951 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), 952 | value=3, 953 | step=1, 954 | interactive=True, 955 | ) 956 | resample_sr0 = gr.Slider( 957 | minimum=0, 958 | maximum=48000, 959 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"), 960 | value=0, 961 | step=1, 962 | interactive=True, 963 | ) 964 | rms_mix_rate0 = gr.Slider( 965 | minimum=0, 966 | maximum=1, 967 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), 968 | value=0.21, 969 | interactive=True, 970 | ) 971 | protect0 = gr.Slider( 972 | minimum=0, 973 | maximum=0.5, 974 | label=i18n( 975 | "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" 976 | ), 977 | value=0.33, 978 | step=0.01, 979 | interactive=True, 980 | ) 981 | file_index1 = gr.Textbox( 982 | label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), 983 | value="", 984 | interactive=True, 985 | visible=False 986 | ) 987 | refresh_button.click( 988 | fn=change_choices, 989 | inputs=[], 990 | outputs=[sid0, file_index2, input_audio0], 991 | api_name="infer_refresh", 992 | ) 993 | # file_big_npy1 = gr.Textbox( 994 | # label=i18n("特征文件路径"), 995 | # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", 996 | # interactive=True, 997 | # ) 998 | with gr.Row(): 999 | f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False) 1000 | with gr.Row(): 1001 | vc_output1 = gr.Textbox(label=i18n("输出信息")) 1002 | but0.click( 1003 | vc.vc_single, 1004 | [ 1005 | spk_item, 1006 | input_audio0, 1007 | vc_transform0, 1008 | f0_file, 1009 | f0method0, 1010 | file_index1, 1011 | file_index2, 1012 | # file_big_npy1, 1013 | index_rate1, 1014 | filter_radius0, 1015 | resample_sr0, 1016 | rms_mix_rate0, 1017 | protect0, 1018 | ], 1019 | [vc_output1, vc_output2], 1020 | api_name="infer_convert", 1021 | ) 1022 | with gr.Row(): 1023 | with gr.Accordion(open=False, label=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ")): 1024 | with gr.Column(): 1025 | vc_transform1 = gr.Number( 1026 | label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 1027 | ) 1028 | opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt") 1029 | f0method1 = gr.Radio( 1030 | label=i18n( 1031 | "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" 1032 | ), 1033 | choices=["pm", "harvest", "crepe", "rmvpe"] 1034 | if config.dml == False 1035 | else ["pm", "harvest", "rmvpe"], 1036 | value="pm", 1037 | interactive=True, 1038 | ) 1039 | filter_radius1 = gr.Slider( 1040 | minimum=0, 1041 | maximum=7, 1042 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), 1043 | value=3, 1044 | step=1, 1045 | interactive=True, 1046 | ) 1047 | with gr.Column(): 1048 | file_index3 = gr.Textbox( 1049 | label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), 1050 | value="", 1051 | interactive=True, 1052 | visible=False 1053 | ) 1054 | file_index4 = gr.Dropdown( 1055 | label=i18n("自动检测index路径,下拉式选择(dropdown)"), 1056 | choices=sorted(index_paths), 1057 | interactive=True, 1058 | ) 1059 | refresh_button.click( 1060 | fn=lambda: change_choices()[1], 1061 | inputs=[], 1062 | outputs=file_index4, 1063 | api_name="infer_refresh_batch", 1064 | ) 1065 | # file_big_npy2 = gr.Textbox( 1066 | # label=i18n("特征文件路径"), 1067 | # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", 1068 | # interactive=True, 1069 | # ) 1070 | index_rate2 = gr.Slider( 1071 | minimum=0, 1072 | maximum=1, 1073 | label=i18n("检索特征占比"), 1074 | value=1, 1075 | interactive=True, 1076 | ) 1077 | with gr.Column(): 1078 | resample_sr1 = gr.Slider( 1079 | minimum=0, 1080 | maximum=48000, 1081 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"), 1082 | value=0, 1083 | step=1, 1084 | interactive=True, 1085 | ) 1086 | rms_mix_rate1 = gr.Slider( 1087 | minimum=0, 1088 | maximum=1, 1089 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), 1090 | value=1, 1091 | interactive=True, 1092 | ) 1093 | protect1 = gr.Slider( 1094 | minimum=0, 1095 | maximum=0.5, 1096 | label=i18n( 1097 | "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" 1098 | ), 1099 | value=0.33, 1100 | step=0.01, 1101 | interactive=True, 1102 | ) 1103 | with gr.Column(): 1104 | dir_input = gr.Textbox( 1105 | label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), 1106 | value="E:\codes\py39\\test-20230416b\\todo-songs", 1107 | ) 1108 | inputs = gr.File( 1109 | file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") 1110 | ) 1111 | with gr.Row(): 1112 | format1 = gr.Radio( 1113 | label=i18n("导出文件格式"), 1114 | choices=["wav", "flac", "mp3", "m4a"], 1115 | value="flac", 1116 | interactive=True, 1117 | ) 1118 | but1 = gr.Button(i18n("转换"), variant="primary") 1119 | vc_output3 = gr.Textbox(label=i18n("输出信息")) 1120 | but1.click( 1121 | vc.vc_multi, 1122 | [ 1123 | spk_item, 1124 | dir_input, 1125 | opt_input, 1126 | inputs, 1127 | vc_transform1, 1128 | f0method1, 1129 | file_index3, 1130 | file_index4, 1131 | # file_big_npy2, 1132 | index_rate2, 1133 | filter_radius1, 1134 | resample_sr1, 1135 | rms_mix_rate1, 1136 | protect1, 1137 | format1, 1138 | ], 1139 | [vc_output3], 1140 | api_name="infer_convert_batch", 1141 | ) 1142 | sid0.change( 1143 | fn=vc.get_vc, 1144 | inputs=[sid0, protect0, protect1], 1145 | outputs=[spk_item, protect0, protect1, file_index2, file_index4], 1146 | ) 1147 | with gr.TabItem("Download Model"): 1148 | with gr.Row(): 1149 | url=gr.Textbox(label="Enter the URL to the Model:") 1150 | with gr.Row(): 1151 | model = gr.Textbox(label="Name your model:") 1152 | download_button=gr.Button("Download") 1153 | with gr.Row(): 1154 | status_bar=gr.Textbox(label="") 1155 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) 1156 | with gr.Row(): 1157 | gr.Markdown( 1158 | """ 1159 | Original RVC:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI 1160 | Mangio's RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork 1161 | ❤️ If you like the EasyGUI, help me keep it.❤️ 1162 | https://paypal.me/lesantillan 1163 | """ 1164 | ) 1165 | with gr.TabItem(i18n("训练")): 1166 | with gr.Row(): 1167 | with gr.Column(): 1168 | exp_dir1 = gr.Textbox(label=i18n("输入实验名"), value="My-Voice") 1169 | np7 = gr.Slider( 1170 | minimum=0, 1171 | maximum=config.n_cpu, 1172 | step=1, 1173 | label=i18n("提取音高和处理数据使用的CPU进程数"), 1174 | value=int(np.ceil(config.n_cpu / 1.5)), 1175 | interactive=True, 1176 | ) 1177 | sr2 = gr.Radio( 1178 | label=i18n("目标采样率"), 1179 | choices=["40k", "48k"], 1180 | value="40k", 1181 | interactive=True, 1182 | visible=False 1183 | ) 1184 | if_f0_3 = gr.Radio( 1185 | label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"), 1186 | choices=[True, False], 1187 | value=True, 1188 | interactive=True, 1189 | visible=False 1190 | ) 1191 | version19 = gr.Radio( 1192 | label=i18n("版本"), 1193 | choices=["v1", "v2"], 1194 | value="v2", 1195 | interactive=True, 1196 | visible=False, 1197 | ) 1198 | trainset_dir4 = gr.Textbox( 1199 | label=i18n("输入训练文件夹路径"), value='./dataset/'+datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 1200 | ) 1201 | easy_uploader = gr.Files(label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),file_types=['audio']) 1202 | but1 = gr.Button(label=i18n("处理数据"), variant="primary") 1203 | info1 = gr.Textbox(label=i18n("输出信息"), value="") 1204 | easy_uploader.upload(fn=upload_to_dataset, inputs=[easy_uploader, trainset_dir4], outputs=[info1, trainset_dir4]) 1205 | gpus6 = gr.Textbox( 1206 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), 1207 | value=gpus, 1208 | interactive=True, 1209 | visible=F0GPUVisible, 1210 | ) 1211 | gpu_info9 = gr.Textbox( 1212 | label=i18n("显卡信息"), value=gpu_info, visible=F0GPUVisible 1213 | ) 1214 | spk_id5 = gr.Slider( 1215 | minimum=0, 1216 | maximum=4, 1217 | step=1, 1218 | label=i18n("请指定说话人id"), 1219 | value=0, 1220 | interactive=True, 1221 | visible=False 1222 | ) 1223 | but1.click( 1224 | preprocess_dataset, 1225 | [trainset_dir4, exp_dir1, sr2, np7], 1226 | [info1], 1227 | api_name="train_preprocess", 1228 | ) 1229 | with gr.Column(): 1230 | f0method8 = gr.Radio( 1231 | label=i18n( 1232 | "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢,rmvpe效果最好且微吃CPU/GPU" 1233 | ), 1234 | choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"], 1235 | value="rmvpe_gpu", 1236 | interactive=True, 1237 | ) 1238 | gpus_rmvpe = gr.Textbox( 1239 | label=i18n( 1240 | "rmvpe卡号配置:以-分隔输入使用的不同进程卡号,例如0-0-1使用在卡0上跑2个进程并在卡1上跑1个进程" 1241 | ), 1242 | value="%s-%s" % (gpus, gpus), 1243 | interactive=True, 1244 | visible=F0GPUVisible, 1245 | ) 1246 | but2 = gr.Button(i18n("特征提取"), variant="primary") 1247 | info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) 1248 | f0method8.change( 1249 | fn=change_f0_method, 1250 | inputs=[f0method8], 1251 | outputs=[gpus_rmvpe], 1252 | ) 1253 | but2.click( 1254 | extract_f0_feature, 1255 | [ 1256 | gpus6, 1257 | np7, 1258 | f0method8, 1259 | if_f0_3, 1260 | exp_dir1, 1261 | version19, 1262 | gpus_rmvpe, 1263 | ], 1264 | [info2], 1265 | api_name="train_extract_f0_feature", 1266 | ) 1267 | with gr.Column(): 1268 | total_epoch11 = gr.Slider( 1269 | minimum=2, 1270 | maximum=1000, 1271 | step=1, 1272 | label=i18n("总训练轮数total_epoch"), 1273 | value=150, 1274 | interactive=True, 1275 | ) 1276 | but3 = gr.Button(i18n("训练模型"), variant="primary") 1277 | but4 = gr.Button(i18n("训练特征索引"), variant="primary") 1278 | info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10) 1279 | with gr.Accordion(label=i18n("常规设置"), open=False): 1280 | save_epoch10 = gr.Slider( 1281 | minimum=1, 1282 | maximum=50, 1283 | step=1, 1284 | label=i18n("保存频率save_every_epoch"), 1285 | value=25, 1286 | interactive=True, 1287 | ) 1288 | batch_size12 = gr.Slider( 1289 | minimum=1, 1290 | maximum=40, 1291 | step=1, 1292 | label=i18n("每张显卡的batch_size"), 1293 | value=default_batch_size, 1294 | interactive=True, 1295 | ) 1296 | if_save_latest13 = gr.Radio( 1297 | label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), 1298 | choices=[i18n("是"), i18n("否")], 1299 | value=i18n("是"), 1300 | interactive=True, 1301 | ) 1302 | if_cache_gpu17 = gr.Radio( 1303 | label=i18n( 1304 | "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速" 1305 | ), 1306 | choices=[i18n("是"), i18n("否")], 1307 | value=i18n("否"), 1308 | interactive=True, 1309 | ) 1310 | if_save_every_weights18 = gr.Radio( 1311 | label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), 1312 | choices=[i18n("是"), i18n("否")], 1313 | value=i18n("是"), 1314 | interactive=True, 1315 | ) 1316 | with gr.Row(): 1317 | pretrained_G14 = gr.Textbox( 1318 | label=i18n("加载预训练底模G路径"), 1319 | value="assets/pretrained_v2/f0G40k.pth", 1320 | interactive=True, 1321 | visible=False 1322 | ) 1323 | pretrained_D15 = gr.Textbox( 1324 | label=i18n("加载预训练底模D路径"), 1325 | value="assets/pretrained_v2/f0D40k.pth", 1326 | interactive=True, 1327 | visible=False 1328 | ) 1329 | sr2.change( 1330 | change_sr2, 1331 | [sr2, if_f0_3, version19], 1332 | [pretrained_G14, pretrained_D15], 1333 | ) 1334 | version19.change( 1335 | change_version19, 1336 | [sr2, if_f0_3, version19], 1337 | [pretrained_G14, pretrained_D15, sr2], 1338 | ) 1339 | if_f0_3.change( 1340 | change_f0, 1341 | [if_f0_3, sr2, version19], 1342 | [f0method8, pretrained_G14, pretrained_D15], 1343 | ) 1344 | gpus16 = gr.Textbox( 1345 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), 1346 | value=gpus, 1347 | interactive=True, 1348 | visible=False 1349 | ) 1350 | with gr.Row(): 1351 | but5 = gr.Button(i18n("一键训练"), variant="primary", visible=False) 1352 | but3.click( 1353 | click_train, 1354 | [ 1355 | exp_dir1, 1356 | sr2, 1357 | if_f0_3, 1358 | spk_id5, 1359 | save_epoch10, 1360 | total_epoch11, 1361 | batch_size12, 1362 | if_save_latest13, 1363 | pretrained_G14, 1364 | pretrained_D15, 1365 | gpus16, 1366 | if_cache_gpu17, 1367 | if_save_every_weights18, 1368 | version19, 1369 | ], 1370 | info3, 1371 | api_name="train_start", 1372 | ) 1373 | but4.click(train_index, [exp_dir1, version19], info3) 1374 | but5.click( 1375 | train1key, 1376 | [ 1377 | exp_dir1, 1378 | sr2, 1379 | if_f0_3, 1380 | trainset_dir4, 1381 | spk_id5, 1382 | np7, 1383 | f0method8, 1384 | save_epoch10, 1385 | total_epoch11, 1386 | batch_size12, 1387 | if_save_latest13, 1388 | pretrained_G14, 1389 | pretrained_D15, 1390 | gpus16, 1391 | if_cache_gpu17, 1392 | if_save_every_weights18, 1393 | version19, 1394 | gpus_rmvpe, 1395 | ], 1396 | info3, 1397 | api_name="train_start_all", 1398 | ) 1399 | 1400 | if config.iscolab: 1401 | app.queue(concurrency_count=511, max_size=1022).launch(share=True) 1402 | else: 1403 | app.queue(concurrency_count=511, max_size=1022).launch( 1404 | server_name="0.0.0.0", 1405 | inbrowser=not config.noautoopen, 1406 | server_port=config.listen_port, 1407 | quiet=True, 1408 | ) 1409 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 777gt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Packages.tar.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8c064071bada6cfebc38aa9acf417a1700559f49f0d33ef0e2ca90c5245c0a7b 3 | size 172498313 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EVC 2 | Easy Voice Cloning (Addons for RVC) 3 | 4 | These are miscellaneous files used for running the EasyGUI v1 & v2. 5 | v1 came from the orignal RVC GUI and v2 was adapted to Mangio's fork to be able to use RVC v2 and Crepe. 6 | -------------------------------------------------------------------------------- /easy-infer.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import cpu_count 2 | import threading, pdb, librosa 3 | from time import sleep 4 | from subprocess import Popen 5 | from time import sleep 6 | import torch, os, traceback, sys, warnings, shutil, numpy as np 7 | import faiss 8 | from random import shuffle 9 | import scipy.io.wavfile as wavfile 10 | from mega import Mega 11 | from pyngrok import ngrok 12 | now_dir = os.getcwd() 13 | sys.path.append(now_dir) 14 | tmp = os.path.join(now_dir, "TEMP") 15 | shutil.rmtree(tmp, ignore_errors=True) 16 | os.makedirs(tmp, exist_ok=True) 17 | os.makedirs("audios",exist_ok=True) 18 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) 19 | os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) 20 | os.environ["TEMP"] = tmp 21 | warnings.filterwarnings("ignore") 22 | torch.manual_seed(114514) 23 | from i18n import I18nAuto 24 | import ffmpeg 25 | import datetime 26 | import subprocess 27 | 28 | i18n = I18nAuto() 29 | # 判断是否有能用来训练和加速推理的N卡 30 | ncpu = cpu_count() 31 | ngpu = torch.cuda.device_count() 32 | gpu_infos = [] 33 | mem = [] 34 | if (not torch.cuda.is_available()) or ngpu == 0: 35 | if_gpu_ok = False 36 | else: 37 | if_gpu_ok = False 38 | for i in range(ngpu): 39 | gpu_name = torch.cuda.get_device_name(i) 40 | if ( 41 | "10" in gpu_name 42 | or "16" in gpu_name 43 | or "20" in gpu_name 44 | or "30" in gpu_name 45 | or "40" in gpu_name 46 | or "A2" in gpu_name.upper() 47 | or "A3" in gpu_name.upper() 48 | or "A4" in gpu_name.upper() 49 | or "P4" in gpu_name.upper() 50 | or "A50" in gpu_name.upper() 51 | or "70" in gpu_name 52 | or "80" in gpu_name 53 | or "90" in gpu_name 54 | or "M4" in gpu_name.upper() 55 | or "T4" in gpu_name.upper() 56 | or "TITAN" in gpu_name.upper() 57 | ): # A10#A100#V100#A40#P40#M40#K80#A4500 58 | if_gpu_ok = True # 至少有一张能用的N卡 59 | gpu_infos.append("%s\t%s" % (i, gpu_name)) 60 | mem.append( 61 | int( 62 | torch.cuda.get_device_properties(i).total_memory 63 | / 1024 64 | / 1024 65 | / 1024 66 | + 0.4 67 | ) 68 | ) 69 | if if_gpu_ok == True and len(gpu_infos) > 0: 70 | gpu_info = "\n".join(gpu_infos) 71 | default_batch_size = min(mem) // 2 72 | else: 73 | gpu_info = "很遗憾您这没有能用的显卡来支持您训练" 74 | default_batch_size = 1 75 | gpus = "-".join([i[0] for i in gpu_infos]) 76 | from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono 77 | from scipy.io import wavfile 78 | from fairseq import checkpoint_utils 79 | import gradio as gr 80 | import logging 81 | from vc_infer_pipeline import VC 82 | from config import ( 83 | is_half, 84 | device, 85 | python_cmd, 86 | listen_port, 87 | iscolab, 88 | noparallel, 89 | noautoopen, 90 | ) 91 | from infer_uvr5 import _audio_pre_ 92 | from my_utils import load_audio 93 | from train.process_ckpt import show_info, change_info, merge, extract_small_model 94 | 95 | # from trainset_preprocess_pipeline import PreProcess 96 | logging.getLogger("numba").setLevel(logging.WARNING) 97 | 98 | 99 | class ToolButton(gr.Button, gr.components.FormComponent): 100 | """Small button with single emoji as text, fits inside gradio forms""" 101 | 102 | def __init__(self, **kwargs): 103 | super().__init__(variant="tool", **kwargs) 104 | 105 | def get_block_name(self): 106 | return "button" 107 | 108 | 109 | hubert_model = None 110 | 111 | 112 | def load_hubert(): 113 | global hubert_model 114 | models, _, _ = checkpoint_utils.load_model_ensemble_and_task( 115 | ["hubert_base.pt"], 116 | suffix="", 117 | ) 118 | hubert_model = models[0] 119 | hubert_model = hubert_model.to(device) 120 | if is_half: 121 | hubert_model = hubert_model.half() 122 | else: 123 | hubert_model = hubert_model.float() 124 | hubert_model.eval() 125 | 126 | 127 | weight_root = "weights" 128 | weight_uvr5_root = "uvr5_weights" 129 | names = [] 130 | for name in os.listdir(weight_root): 131 | if name.endswith(".pth"): 132 | names.append(name) 133 | 134 | uvr5_names = [] 135 | for name in os.listdir(weight_uvr5_root): 136 | if name.endswith(".pth"): 137 | uvr5_names.append(name.replace(".pth", "")) 138 | 139 | def find_parent(search_dir, file_name): 140 | for dirpath, dirnames, filenames in os.walk(search_dir): 141 | if file_name in filenames: 142 | return os.path.abspath(dirpath) 143 | return None 144 | 145 | def vc_single( 146 | sid, 147 | input_audio, 148 | f0_up_key, 149 | f0_file, 150 | f0_method, 151 | file_index, 152 | # file_big_npy, 153 | index_rate, 154 | ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 155 | global tgt_sr, net_g, vc, hubert_model 156 | if input_audio is None: 157 | return "You need to upload an audio", None 158 | f0_up_key = int(f0_up_key) 159 | try: 160 | parent_dir = find_parent(".",input_audio) 161 | audio = load_audio(parent_dir+'/'+input_audio, 16000) 162 | times = [0, 0, 0] 163 | if hubert_model == None: 164 | load_hubert() 165 | if_f0 = cpt.get("f0", 1) 166 | try: 167 | file_index = ( 168 | file_index.strip(" ") 169 | .strip('"') 170 | .strip("\n") 171 | .strip('"') 172 | .strip(" ") 173 | .replace("trained", "added") 174 | ) # 防止小白写错,自动帮他替换掉 175 | except: 176 | file_index='' 177 | print("Skipped index.") 178 | # file_big_npy = ( 179 | # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 180 | # ) 181 | audio_opt = vc.pipeline( 182 | hubert_model, 183 | net_g, 184 | sid, 185 | audio, 186 | times, 187 | f0_up_key, 188 | f0_method, 189 | file_index, 190 | # file_big_npy, 191 | index_rate, 192 | if_f0, 193 | f0_file=f0_file, 194 | ) 195 | print( 196 | "npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep="" 197 | ) 198 | return "Success", (tgt_sr, audio_opt) 199 | except: 200 | info = traceback.format_exc() 201 | print(info) 202 | return info, (None, None) 203 | 204 | 205 | def vc_multi( 206 | sid, 207 | dir_path, 208 | opt_root, 209 | paths, 210 | f0_up_key, 211 | f0_method, 212 | file_index, 213 | # file_big_npy, 214 | index_rate, 215 | ): 216 | try: 217 | dir_path = ( 218 | dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 219 | ) # 防止小白拷路径头尾带了空格和"和回车 220 | opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 221 | os.makedirs(opt_root, exist_ok=True) 222 | try: 223 | if dir_path != "": 224 | paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] 225 | else: 226 | paths = [path.name for path in paths] 227 | except: 228 | traceback.print_exc() 229 | paths = [path.name for path in paths] 230 | infos = [] 231 | file_index = ( 232 | file_index.strip(" ") 233 | .strip('"') 234 | .strip("\n") 235 | .strip('"') 236 | .strip(" ") 237 | .replace("trained", "added") 238 | ) # 防止小白写错,自动帮他替换掉 239 | for path in paths: 240 | info, opt = vc_single( 241 | sid, 242 | path, 243 | f0_up_key, 244 | None, 245 | f0_method, 246 | file_index, 247 | # file_big_npy, 248 | index_rate, 249 | ) 250 | if info == "Success": 251 | try: 252 | tgt_sr, audio_opt = opt 253 | wavfile.write( 254 | "%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt 255 | ) 256 | except: 257 | info = traceback.format_exc() 258 | infos.append("%s->%s" % (os.path.basename(path), info)) 259 | yield "\n".join(infos) 260 | yield "\n".join(infos) 261 | except: 262 | yield traceback.format_exc() 263 | 264 | # 一个选项卡全局只能有一个音色 265 | def get_vc(sid): 266 | global n_spk, tgt_sr, net_g, vc, cpt 267 | if sid == []: 268 | global hubert_model 269 | if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 270 | print("clean_empty_cache") 271 | del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt 272 | hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None 273 | if torch.cuda.is_available(): 274 | torch.cuda.empty_cache() 275 | ###楼下不这么折腾清理不干净 276 | if_f0 = cpt.get("f0", 1) 277 | if if_f0 == 1: 278 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) 279 | else: 280 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 281 | del net_g, cpt 282 | if torch.cuda.is_available(): 283 | torch.cuda.empty_cache() 284 | cpt = None 285 | return {"visible": False, "__type__": "update"} 286 | person = "%s/%s" % (weight_root, sid) 287 | print("loading %s" % person) 288 | cpt = torch.load(person, map_location="cpu") 289 | tgt_sr = cpt["config"][-1] 290 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk 291 | if_f0 = cpt.get("f0", 1) 292 | if if_f0 == 1: 293 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) 294 | else: 295 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 296 | del net_g.enc_q 297 | print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩 298 | net_g.eval().to(device) 299 | if is_half: 300 | net_g = net_g.half() 301 | else: 302 | net_g = net_g.float() 303 | vc = VC(tgt_sr, device, is_half) 304 | n_spk = cpt["config"][-3] 305 | return {"visible": False, "maximum": n_spk, "__type__": "update"} 306 | 307 | 308 | def change_choices(): 309 | names = [] 310 | for name in os.listdir(weight_root): 311 | if name.endswith(".pth"): 312 | names.append(name) 313 | return {"choices": sorted(names), "__type__": "update"} 314 | 315 | def change_choices2(): 316 | audio_files=[] 317 | for filename in os.listdir("./audios"): 318 | if filename.endswith(('.wav','.mp3')): 319 | audio_files.append(filename) 320 | return {"choices": sorted(audio_files), "__type__": "update"} 321 | 322 | def clean(): 323 | return {"value": "", "__type__": "update"} 324 | 325 | def change_sr2(sr2, if_f0_3): 326 | if if_f0_3 == "是": 327 | return "pretrained/f0G%s.pth" % sr2, "pretrained/f0D%s.pth" % sr2 328 | else: 329 | return "pretrained/G%s.pth" % sr2, "pretrained/D%s.pth" % sr2 330 | 331 | def get_index(): 332 | if check_for_name() != '': 333 | if iscolab: 334 | chosen_model=sorted(names)[0].split(".")[0] 335 | logs_path="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+chosen_model 336 | for file in os.listdir(logs_path): 337 | if file.endswith(".index"): 338 | return os.path.join(logs_path, file) 339 | return '' 340 | else: 341 | return '' 342 | 343 | def get_indexes(): 344 | indexes_list=[] 345 | if iscolab: 346 | for dirpath, dirnames, filenames in os.walk("/content/Retrieval-based-Voice-Conversion-WebUI/logs/"): 347 | for filename in filenames: 348 | if filename.endswith(".index"): 349 | indexes_list.append(os.path.join(dirpath,filename)) 350 | return indexes_list 351 | else: 352 | return '' 353 | 354 | audio_files=[] 355 | for filename in os.listdir("./audios"): 356 | if filename.endswith(('.wav','.mp3')): 357 | audio_files.append(filename) 358 | 359 | def get_name(): 360 | if len(audio_files) > 0: 361 | return sorted(audio_files)[0] 362 | else: 363 | return '' 364 | 365 | def save_to_wav(record_button): 366 | if record_button is None: 367 | pass 368 | else: 369 | path_to_file=record_button 370 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' 371 | new_path='./audios/'+new_name 372 | shutil.move(path_to_file,new_path) 373 | return new_name 374 | 375 | def save_to_wav2(dropbox): 376 | file_path=dropbox.name 377 | shutil.move(file_path,'./audios') 378 | return os.path.basename(file_path) 379 | 380 | def match_index(speaker): 381 | folder=speaker.split(".")[0] 382 | parent_dir="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+folder 383 | for filename in os.listdir(parent_dir): 384 | if filename.endswith(".index"): 385 | index_path=os.path.join(parent_dir,filename) 386 | return index_path 387 | 388 | def download_from_url(url, model): 389 | url = url.strip() 390 | if url == '': 391 | return "URL cannot be left empty." 392 | zip_dirs = ["zips", "unzips"] 393 | for directory in zip_dirs: 394 | if os.path.exists(directory): 395 | shutil.rmtree(directory) 396 | os.makedirs("zips", exist_ok=True) 397 | os.makedirs("unzips", exist_ok=True) 398 | zipfile = model + '.zip' 399 | zipfile_path = './zips/' + zipfile 400 | MODELEPOCH = '' 401 | if "drive.google.com" in url: 402 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) 403 | elif "mega.nz" in url: 404 | m = Mega() 405 | m.download_url(url, './zips') 406 | else: 407 | subprocess.run(["wget", url, "-O", f"./zips/{zipfile}"]) 408 | for filename in os.listdir("./zips"): 409 | if filename.endswith(".zip"): 410 | zipfile_path = os.path.join("./zips/",filename) 411 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip') 412 | else: 413 | return "No zipfile found." 414 | for root, dirs, files in os.walk('./unzips'): 415 | for file in files: 416 | if "G_" in file: 417 | MODELEPOCH = file.split("G_")[1].split(".")[0] 418 | if MODELEPOCH == '': 419 | MODELEPOCH = '404' 420 | for file in files: 421 | file_path = os.path.join(root, file) 422 | if file.endswith(".npy") or file.endswith(".index"): 423 | subprocess.run(["mkdir", "-p", f"./logs/{model}"]) 424 | subprocess.run(["mv", file_path, f"./logs/{model}/"]) 425 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"): 426 | subprocess.run(["mv", file_path, f"./weights/{model}.pth"]) 427 | shutil.rmtree("zips") 428 | shutil.rmtree("unzips") 429 | return "Success." 430 | 431 | def check_for_name(): 432 | if len(names) > 0: 433 | return sorted(names)[0] 434 | else: 435 | return '' 436 | print(check_for_name()) 437 | #with gr.Blocks() as app 438 | with gr.Blocks(theme=gr.themes.Base()) as app: 439 | with gr.Tab("Inference"): 440 | with gr.Row(): 441 | sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name()) 442 | refresh_button = gr.Button("Refresh", variant="primary", size='sm') 443 | refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0]) 444 | if check_for_name() != '': 445 | get_vc(sorted(names)[0]) 446 | else: 447 | print("Starting without preloaded Model.") 448 | vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0) 449 | #clean_button = gr.Button("Unload Voice to Save Memory", variant="primary") 450 | spk_item = gr.Slider(minimum=0,maximum=2333,step=1,label="Please select speaker id",value=0,visible=False,interactive=True) 451 | #clean_button.click(fn=clean, inputs=[], outputs=[sid0]) 452 | sid0.change( 453 | fn=get_vc, 454 | inputs=[sid0], 455 | outputs=[], 456 | ) 457 | but0 = gr.Button("Convert", variant="primary") 458 | with gr.Row(): 459 | with gr.Column(): 460 | with gr.Row(): 461 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.") 462 | with gr.Row(): 463 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath") 464 | with gr.Row(): 465 | #input_audio0 = gr.Textbox(label="Enter the Path to the Audio File to be Processed (e.g. /content/youraudio.wav)",value="/content/youraudio.wav") 466 | input_audio0 = gr.Dropdown(choices=sorted(audio_files), label="2.Choose your audio.", value=get_name()) 467 | dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0]) 468 | dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0]) 469 | refresh_button2 = gr.Button("Refresh", variant="primary", size='sm') 470 | refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0]) 471 | record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0]) 472 | record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0]) 473 | with gr.Column(): 474 | with gr.Accordion(label="Feature Settings", open=False): 475 | file_index1 = gr.Dropdown( 476 | label="3. Path to your added.index file (if it didn't automatically find it.)", 477 | value=get_index(), 478 | choices=get_indexes(), 479 | interactive=True, 480 | visible=True 481 | ) 482 | index_rate1 = gr.Slider( 483 | minimum=0, 484 | maximum=1, 485 | label="Strength:", 486 | value=0.69, 487 | interactive=True, 488 | ) 489 | sid0.change(fn=match_index, inputs=[sid0], outputs=[file_index1]) 490 | 491 | with gr.Row(): 492 | vc_output2 = gr.Audio(label="Output Audio (Click on the Three Dots in the Right Corner to Download)") 493 | with gr.Row(): 494 | f0method0 = gr.Radio( 495 | label="Optional: Change the Pitch Extraction Algorithm. Use PM for fast results or Harvest for better low range (but it's extremely slow)", 496 | choices=["pm", "harvest"], 497 | value="pm", 498 | interactive=True, 499 | ) 500 | with gr.Row(): 501 | vc_output1 = gr.Textbox(label="") 502 | with gr.Row(): 503 | instructions = gr.Markdown(""" 504 | This is simply a modified version of the RVC GUI found here: 505 | https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI 506 | """) 507 | f0_file = gr.File(label="F0 Curve File (Optional, One Pitch Per Line, Replaces Default F0 and Pitch Shift)", visible=False) 508 | but0.click( 509 | vc_single, 510 | [ 511 | spk_item, 512 | input_audio0, 513 | vc_transform0, 514 | f0_file, 515 | f0method0, 516 | file_index1, 517 | index_rate1, 518 | ], 519 | [vc_output1, vc_output2] 520 | ) 521 | with gr.Tab("Download Model"): 522 | with gr.Row(): 523 | url=gr.Textbox(label="Enter the URL to the Model:") 524 | with gr.Row(): 525 | model = gr.Textbox(label="Name your model:") 526 | download_button=gr.Button(label="Download") 527 | with gr.Row(): 528 | status_bar=gr.Textbox(label="") 529 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) 530 | if iscolab: 531 | try: 532 | public_url = ngrok.connect(7860) 533 | print('Click on THIS link: '+public_url) 534 | except: 535 | print('Failed to create ngrok URL') 536 | try: 537 | app.launch(share=True) 538 | except KeyboardInterrupt: 539 | ngrok.kill() 540 | else: 541 | app.queue(concurrency_count=511, max_size=1022).launch( 542 | server_name="0.0.0.0", 543 | inbrowser=not noautoopen, 544 | server_port=listen_port, 545 | quiet=True, 546 | ) 547 | -------------------------------------------------------------------------------- /easy-infer2.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import cpu_count 2 | import threading, pdb, librosa 3 | from time import sleep 4 | from subprocess import Popen 5 | from time import sleep 6 | import torch, os, traceback, sys, warnings, shutil, numpy as np 7 | import faiss 8 | from random import shuffle 9 | import scipy.io.wavfile as wavfile 10 | from mega import Mega 11 | from config import Config 12 | from pyngrok import ngrok 13 | now_dir = os.getcwd() 14 | sys.path.append(now_dir) 15 | tmp = os.path.join(now_dir, "TEMP") 16 | shutil.rmtree(tmp, ignore_errors=True) 17 | os.makedirs(tmp, exist_ok=True) 18 | os.makedirs("audios",exist_ok=True) 19 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) 20 | os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) 21 | os.environ["TEMP"] = tmp 22 | warnings.filterwarnings("ignore") 23 | torch.manual_seed(114514) 24 | from i18n import I18nAuto 25 | import ffmpeg 26 | import datetime 27 | import subprocess 28 | 29 | i18n = I18nAuto() 30 | # 判断是否有能用来训练和加速推理的N卡 31 | ncpu = cpu_count() 32 | ngpu = torch.cuda.device_count() 33 | gpu_infos = [] 34 | mem = [] 35 | if (not torch.cuda.is_available()) or ngpu == 0: 36 | if_gpu_ok = False 37 | else: 38 | if_gpu_ok = False 39 | for i in range(ngpu): 40 | gpu_name = torch.cuda.get_device_name(i) 41 | if ( 42 | "10" in gpu_name 43 | or "16" in gpu_name 44 | or "20" in gpu_name 45 | or "30" in gpu_name 46 | or "40" in gpu_name 47 | or "A2" in gpu_name.upper() 48 | or "A3" in gpu_name.upper() 49 | or "A4" in gpu_name.upper() 50 | or "P4" in gpu_name.upper() 51 | or "A50" in gpu_name.upper() 52 | or "70" in gpu_name 53 | or "80" in gpu_name 54 | or "90" in gpu_name 55 | or "M4" in gpu_name.upper() 56 | or "T4" in gpu_name.upper() 57 | or "TITAN" in gpu_name.upper() 58 | ): # A10#A100#V100#A40#P40#M40#K80#A4500 59 | if_gpu_ok = True # 至少有一张能用的N卡 60 | gpu_infos.append("%s\t%s" % (i, gpu_name)) 61 | mem.append( 62 | int( 63 | torch.cuda.get_device_properties(i).total_memory 64 | / 1024 65 | / 1024 66 | / 1024 67 | + 0.4 68 | ) 69 | ) 70 | if if_gpu_ok == True and len(gpu_infos) > 0: 71 | gpu_info = "\n".join(gpu_infos) 72 | default_batch_size = min(mem) // 2 73 | else: 74 | gpu_info = "很遗憾您这没有能用的显卡来支持您训练" 75 | default_batch_size = 1 76 | gpus = "-".join([i[0] for i in gpu_infos]) 77 | from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono 78 | from scipy.io import wavfile 79 | from fairseq import checkpoint_utils 80 | import gradio as gr 81 | import logging 82 | from vc_infer_pipeline import VC 83 | config = Config() 84 | from infer_uvr5 import _audio_pre_ 85 | from my_utils import load_audio 86 | from train.process_ckpt import show_info, change_info, merge, extract_small_model 87 | 88 | # from trainset_preprocess_pipeline import PreProcess 89 | logging.getLogger("numba").setLevel(logging.WARNING) 90 | 91 | 92 | class ToolButton(gr.Button, gr.components.IOComponent): 93 | """Small button with single emoji as text, fits inside gradio forms""" 94 | 95 | def __init__(self, **kwargs): 96 | super().__init__(variant="tool", **kwargs) 97 | 98 | def get_block_name(self): 99 | return "button" 100 | 101 | 102 | hubert_model = None 103 | 104 | 105 | def load_hubert(): 106 | global hubert_model 107 | models, _, _ = checkpoint_utils.load_model_ensemble_and_task( 108 | ["hubert_base.pt"], 109 | suffix="", 110 | ) 111 | hubert_model = models[0] 112 | hubert_model = hubert_model.to(config.device) 113 | if config.is_half: 114 | hubert_model = hubert_model.half() 115 | else: 116 | hubert_model = hubert_model.float() 117 | hubert_model.eval() 118 | 119 | 120 | weight_root = "weights" 121 | weight_uvr5_root = "uvr5_weights" 122 | names = [] 123 | for name in os.listdir(weight_root): 124 | if name.endswith(".pth"): 125 | names.append(name) 126 | 127 | uvr5_names = [] 128 | for name in os.listdir(weight_uvr5_root): 129 | if name.endswith(".pth"): 130 | uvr5_names.append(name.replace(".pth", "")) 131 | 132 | def find_parent(search_dir, file_name): 133 | for dirpath, dirnames, filenames in os.walk(search_dir): 134 | if file_name in filenames: 135 | return os.path.abspath(dirpath) 136 | return None 137 | 138 | def vc_single( 139 | sid, 140 | input_audio, 141 | f0_up_key, 142 | f0_file, 143 | f0_method, 144 | file_index, 145 | # file_big_npy, 146 | index_rate, 147 | crepe_hop_length 148 | ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 149 | global tgt_sr, net_g, vc, hubert_model 150 | if input_audio is None: 151 | return "You need to upload an audio", None 152 | f0_up_key = int(f0_up_key) 153 | try: 154 | parent_dir = find_parent(".",input_audio) 155 | print(parent_dir+'/'+input_audio) 156 | audio = load_audio(parent_dir+'/'+input_audio, 16000) 157 | times = [0, 0, 0] 158 | if hubert_model == None: 159 | load_hubert() 160 | if_f0 = cpt.get("f0", 1) 161 | try: 162 | file_index = ( 163 | file_index.strip(" ") 164 | .strip('"') 165 | .strip("\n") 166 | .strip('"') 167 | .strip(" ") 168 | .replace("trained", "added") 169 | ) # 防止小白写错,自动帮他替换掉 170 | except: 171 | file_index='' 172 | print("Skipped index.") 173 | # file_big_npy = ( 174 | # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 175 | # ) 176 | audio_opt = vc.pipeline( 177 | hubert_model, 178 | net_g, 179 | sid, 180 | audio, 181 | times, 182 | f0_up_key, 183 | f0_method, 184 | file_index, 185 | # file_big_npy, 186 | index_rate, 187 | if_f0, 188 | crepe_hop_length, 189 | f0_file=f0_file, 190 | ) 191 | print( 192 | "npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep="" 193 | ) 194 | return "Success", (tgt_sr, audio_opt) 195 | except: 196 | info = traceback.format_exc() 197 | print(info) 198 | return info, (None, None) 199 | 200 | 201 | def vc_multi( 202 | sid, 203 | dir_path, 204 | opt_root, 205 | paths, 206 | f0_up_key, 207 | f0_method, 208 | file_index, 209 | # file_big_npy, 210 | index_rate, 211 | ): 212 | try: 213 | dir_path = ( 214 | dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 215 | ) # 防止小白拷路径头尾带了空格和"和回车 216 | opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") 217 | os.makedirs(opt_root, exist_ok=True) 218 | try: 219 | if dir_path != "": 220 | paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] 221 | else: 222 | paths = [path.name for path in paths] 223 | except: 224 | traceback.print_exc() 225 | paths = [path.name for path in paths] 226 | infos = [] 227 | file_index = ( 228 | file_index.strip(" ") 229 | .strip('"') 230 | .strip("\n") 231 | .strip('"') 232 | .strip(" ") 233 | .replace("trained", "added") 234 | ) # 防止小白写错,自动帮他替换掉 235 | for path in paths: 236 | info, opt = vc_single( 237 | sid, 238 | path, 239 | f0_up_key, 240 | None, 241 | f0_method, 242 | file_index, 243 | # file_big_npy, 244 | index_rate, 245 | ) 246 | if info == "Success": 247 | try: 248 | tgt_sr, audio_opt = opt 249 | wavfile.write( 250 | "%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt 251 | ) 252 | except: 253 | info = traceback.format_exc() 254 | infos.append("%s->%s" % (os.path.basename(path), info)) 255 | yield "\n".join(infos) 256 | yield "\n".join(infos) 257 | except: 258 | yield traceback.format_exc() 259 | 260 | # 一个选项卡全局只能有一个音色 261 | def get_vc(sid): 262 | global n_spk, tgt_sr, net_g, vc, cpt 263 | if sid == []: 264 | global hubert_model 265 | if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 266 | print("clean_empty_cache") 267 | del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt 268 | hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None 269 | if torch.cuda.is_available(): 270 | torch.cuda.empty_cache() 271 | ###楼下不这么折腾清理不干净 272 | if_f0 = cpt.get("f0", 1) 273 | if if_f0 == 1: 274 | net_g = SynthesizerTrnMs256NSFsid( 275 | *cpt["config"], is_half=config.is_half 276 | ) 277 | else: 278 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 279 | del net_g, cpt 280 | if torch.cuda.is_available(): 281 | torch.cuda.empty_cache() 282 | cpt = None 283 | return {"visible": False, "__type__": "update"} 284 | person = "%s/%s" % (weight_root, sid) 285 | print("loading %s" % person) 286 | cpt = torch.load(person, map_location="cpu") 287 | tgt_sr = cpt["config"][-1] 288 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk 289 | if_f0 = cpt.get("f0", 1) 290 | if if_f0 == 1: 291 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) 292 | else: 293 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 294 | del net_g.enc_q 295 | print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩 296 | net_g.eval().to(config.device) 297 | if config.is_half: 298 | net_g = net_g.half() 299 | else: 300 | net_g = net_g.float() 301 | vc = VC(tgt_sr, config) 302 | n_spk = cpt["config"][-3] 303 | return {"visible": True, "maximum": n_spk, "__type__": "update"} 304 | 305 | 306 | def change_choices(): 307 | names = [] 308 | for name in os.listdir(weight_root): 309 | if name.endswith(".pth"): 310 | names.append(name) 311 | return {"choices": sorted(names), "__type__": "update"} 312 | 313 | def change_choices2(): 314 | audio_files=[] 315 | for filename in os.listdir("./audios"): 316 | if filename.endswith(('.wav','.mp3')): 317 | audio_files.append(filename) 318 | return {"choices": sorted(audio_files), "__type__": "update"} 319 | 320 | def clean(): 321 | return {"value": "", "__type__": "update"} 322 | 323 | def change_sr2(sr2, if_f0_3): 324 | if if_f0_3 == "是": 325 | return "pretrained/f0G%s.pth" % sr2, "pretrained/f0D%s.pth" % sr2 326 | else: 327 | return "pretrained/G%s.pth" % sr2, "pretrained/D%s.pth" % sr2 328 | 329 | def get_index(): 330 | if check_for_name() != '': 331 | if config.iscolab: 332 | chosen_model=sorted(names)[0].split(".")[0] 333 | logs_path="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+chosen_model 334 | for file in os.listdir(logs_path): 335 | if file.endswith(".index"): 336 | return os.path.join(logs_path, file) 337 | return '' 338 | else: 339 | return '' 340 | 341 | def get_indexes(): 342 | indexes_list=[] 343 | if config.iscolab: 344 | for dirpath, dirnames, filenames in os.walk("/content/Retrieval-based-Voice-Conversion-WebUI/logs/"): 345 | for filename in filenames: 346 | if filename.endswith(".index"): 347 | indexes_list.append(os.path.join(dirpath,filename)) 348 | return indexes_list 349 | else: 350 | return '' 351 | 352 | audio_files=[] 353 | for filename in os.listdir("./audios"): 354 | if filename.endswith(('.wav','.mp3')): 355 | audio_files.append(filename) 356 | 357 | def get_name(): 358 | if len(audio_files) > 0: 359 | return sorted(audio_files)[0] 360 | else: 361 | return '' 362 | 363 | def save_to_wav(record_button): 364 | if record_button is None: 365 | pass 366 | else: 367 | path_to_file=record_button 368 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' 369 | new_path='./audios/'+new_name 370 | shutil.move(path_to_file,new_path) 371 | return new_name 372 | 373 | def save_to_wav2(dropbox): 374 | file_path=dropbox.name 375 | shutil.move(file_path,'./audios') 376 | return os.path.basename(file_path) 377 | 378 | def match_index(speaker): 379 | folder=speaker.split(".")[0] 380 | parent_dir="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+folder 381 | for filename in os.listdir(parent_dir): 382 | if filename.endswith(".index"): 383 | index_path=os.path.join(parent_dir,filename) 384 | return index_path 385 | 386 | def download_from_url(url, model): 387 | url = url.strip() 388 | if url == '': 389 | return "URL cannot be left empty." 390 | zip_dirs = ["zips", "unzips"] 391 | for directory in zip_dirs: 392 | if os.path.exists(directory): 393 | shutil.rmtree(directory) 394 | os.makedirs("zips", exist_ok=True) 395 | os.makedirs("unzips", exist_ok=True) 396 | zipfile = model + '.zip' 397 | zipfile_path = './zips/' + zipfile 398 | MODELEPOCH = '' 399 | if "drive.google.com" in url: 400 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) 401 | elif "mega.nz" in url: 402 | m = Mega() 403 | m.download_url(url, './zips') 404 | else: 405 | subprocess.run(["wget", url, "-O", f"./zips/{zipfile}"]) 406 | for filename in os.listdir("./zips"): 407 | if filename.endswith(".zip"): 408 | zipfile_path = os.path.join("./zips/",filename) 409 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip') 410 | else: 411 | return "No zipfile found." 412 | for root, dirs, files in os.walk('./unzips'): 413 | for file in files: 414 | if "G_" in file: 415 | MODELEPOCH = file.split("G_")[1].split(".")[0] 416 | if MODELEPOCH == '': 417 | MODELEPOCH = '404' 418 | for file in files: 419 | file_path = os.path.join(root, file) 420 | if file.endswith(".npy") or file.endswith(".index"): 421 | subprocess.run(["mkdir", "-p", f"./logs/{model}"]) 422 | subprocess.run(["mv", file_path, f"./logs/{model}/"]) 423 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"): 424 | subprocess.run(["mv", file_path, f"./weights/{model}.pth"]) 425 | shutil.rmtree("zips") 426 | shutil.rmtree("unzips") 427 | return "Success." 428 | 429 | def check_for_name(): 430 | if len(names) > 0: 431 | return sorted(names)[0] 432 | else: 433 | return '' 434 | print(check_for_name()) 435 | #with gr.Blocks() as app 436 | with gr.Blocks(theme=gr.themes.Base()) as app: 437 | with gr.Tab("Inference"): 438 | with gr.Row(): 439 | sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name()) 440 | refresh_button = gr.Button("Refresh", variant="primary", size='sm') 441 | refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0]) 442 | if check_for_name() != '': 443 | get_vc(sorted(names)[0]) 444 | else: 445 | print("Starting without preloaded Model.") 446 | vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0) 447 | #clean_button = gr.Button("Unload Voice to Save Memory", variant="primary") 448 | spk_item = gr.Slider(minimum=0,maximum=2333,step=1,label="Please select speaker id",value=0,visible=False,interactive=True) 449 | #clean_button.click(fn=clean, inputs=[], outputs=[sid0]) 450 | sid0.change( 451 | fn=get_vc, 452 | inputs=[sid0], 453 | outputs=[], 454 | ) 455 | but0 = gr.Button("Convert", variant="primary") 456 | with gr.Row(): 457 | with gr.Column(): 458 | with gr.Row(): 459 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.") 460 | with gr.Row(): 461 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath") 462 | with gr.Row(): 463 | #input_audio0 = gr.Textbox(label="Enter the Path to the Audio File to be Processed (e.g. /content/youraudio.wav)",value="/content/youraudio.wav") 464 | input_audio0 = gr.Dropdown(choices=sorted(audio_files), label="2.Choose your audio.", value=get_name()) 465 | dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0]) 466 | dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0]) 467 | refresh_button2 = gr.Button("Refresh", variant="primary", size='sm') 468 | refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0]) 469 | record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0]) 470 | record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0]) 471 | with gr.Column(): 472 | with gr.Accordion(label="Feature Settings", open=False): 473 | file_index1 = gr.Dropdown( 474 | label="3. Path to your added.index file (if it didn't automatically find it.)", 475 | value=get_index(), 476 | choices=get_indexes(), 477 | interactive=True, 478 | visible=True 479 | ) 480 | index_rate1 = gr.Slider( 481 | minimum=0, 482 | maximum=1, 483 | label="Strength:", 484 | value=0.69, 485 | interactive=True, 486 | ) 487 | sid0.change(fn=match_index, inputs=[sid0], outputs=[file_index1]) 488 | 489 | with gr.Row(): 490 | vc_output2 = gr.Audio(label="Output Audio (Click on the Three Dots in the Right Corner to Download)") 491 | with gr.Row(): 492 | f0method0 = gr.Radio( 493 | label="Optional: Change the Pitch Extraction Algorithm. Use PM for fast results or Harvest for better low range (but it's extremely slow) or Crepe for the best of both worlds.", 494 | choices=["pm", "harvest","crepe"], 495 | value="crepe", 496 | interactive=True, 497 | ) 498 | crepe_hop_length = gr.Slider( 499 | minimum=1, 500 | maximum=512, 501 | step=1, 502 | label=i18n("crepe_hop_length"), 503 | value=128, 504 | interactive=True, 505 | visible=False 506 | ) 507 | with gr.Row(): 508 | vc_output1 = gr.Textbox(label="") 509 | with gr.Row(): 510 | instructions = gr.Markdown(""" 511 | This is simply a modified version of the RVC GUI found here: 512 | https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI 513 | """) 514 | f0_file = gr.File(label="F0 Curve File (Optional, One Pitch Per Line, Replaces Default F0 and Pitch Shift)", visible=False) 515 | but0.click( 516 | vc_single, 517 | [ 518 | spk_item, 519 | input_audio0, 520 | vc_transform0, 521 | f0_file, 522 | f0method0, 523 | file_index1, 524 | index_rate1, 525 | crepe_hop_length 526 | ], 527 | [vc_output1, vc_output2] 528 | ) 529 | with gr.Tab("Download Model"): 530 | with gr.Row(): 531 | url=gr.Textbox(label="Enter the URL to the Model:") 532 | with gr.Row(): 533 | model = gr.Textbox(label="Name your model:") 534 | download_button=gr.Button(label="Download") 535 | with gr.Row(): 536 | status_bar=gr.Textbox(label="") 537 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) 538 | 539 | if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE) 540 | try: 541 | public_url = ngrok.connect(7860) 542 | print('Click on THIS link: '+public_url) 543 | except: 544 | print('Failed to create ngrok URL') 545 | try: 546 | app.launch(share=True) 547 | except KeyboardInterrupt: 548 | ngrok.kill() 549 | else: 550 | app.launch( 551 | server_name="0.0.0.0", 552 | inbrowser=not config.noautoopen, 553 | server_port=config.listen_port, 554 | quiet=True, 555 | ) 556 | -------------------------------------------------------------------------------- /filefinder: -------------------------------------------------------------------------------- 1 | import os 2 | def find_at(file_name, search_path='/'): 3 | found_files = [] 4 | for root, dirs, files in os.walk(search_path): 5 | if file_name in files: 6 | found_files.append(os.path.join(root, file_name)) 7 | return found_files 8 | -------------------------------------------------------------------------------- /myinfer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | runtime\python.exe myinfer.py 0 "E:\codes\py39\RVC-beta\todo-songs\1111.wav" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "test.wav" "weights/mi-test.pth" 0.6 cuda:0 True 3 | ''' 4 | import os,sys,pdb,torch 5 | now_dir = os.getcwd() 6 | sys.path.append(now_dir) 7 | import argparse 8 | import glob 9 | import sys 10 | import torch 11 | from multiprocessing import cpu_count 12 | class Config: 13 | def __init__(self,device,is_half): 14 | self.device = device 15 | self.is_half = is_half 16 | self.n_cpu = 0 17 | self.gpu_name = None 18 | self.gpu_mem = None 19 | self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() 20 | 21 | def device_config(self) -> tuple: 22 | if torch.cuda.is_available(): 23 | i_device = int(self.device.split(":")[-1]) 24 | self.gpu_name = torch.cuda.get_device_name(i_device) 25 | if ( 26 | ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) 27 | or "P40" in self.gpu_name.upper() 28 | or "1060" in self.gpu_name 29 | or "1070" in self.gpu_name 30 | or "1080" in self.gpu_name 31 | ): 32 | print("16系/10系显卡和P40强制单精度") 33 | self.is_half = False 34 | for config_file in ["32k.json", "40k.json", "48k.json"]: 35 | with open(f"configs/{config_file}", "r") as f: 36 | strr = f.read().replace("true", "false") 37 | with open(f"configs/{config_file}", "w") as f: 38 | f.write(strr) 39 | with open("trainset_preprocess_pipeline_print.py", "r") as f: 40 | strr = f.read().replace("3.7", "3.0") 41 | with open("trainset_preprocess_pipeline_print.py", "w") as f: 42 | f.write(strr) 43 | else: 44 | self.gpu_name = None 45 | self.gpu_mem = int( 46 | torch.cuda.get_device_properties(i_device).total_memory 47 | / 1024 48 | / 1024 49 | / 1024 50 | + 0.4 51 | ) 52 | if self.gpu_mem <= 4: 53 | with open("trainset_preprocess_pipeline_print.py", "r") as f: 54 | strr = f.read().replace("3.7", "3.0") 55 | with open("trainset_preprocess_pipeline_print.py", "w") as f: 56 | f.write(strr) 57 | elif torch.backends.mps.is_available(): 58 | print("没有发现支持的N卡, 使用MPS进行推理") 59 | self.device = "mps" 60 | else: 61 | print("没有发现支持的N卡, 使用CPU进行推理") 62 | self.device = "cpu" 63 | self.is_half = True 64 | 65 | if self.n_cpu == 0: 66 | self.n_cpu = cpu_count() 67 | 68 | if self.is_half: 69 | # 6G显存配置 70 | x_pad = 3 71 | x_query = 10 72 | x_center = 60 73 | x_max = 65 74 | else: 75 | # 5G显存配置 76 | x_pad = 1 77 | x_query = 6 78 | x_center = 38 79 | x_max = 41 80 | 81 | if self.gpu_mem != None and self.gpu_mem <= 4: 82 | x_pad = 1 83 | x_query = 5 84 | x_center = 30 85 | x_max = 32 86 | 87 | return x_pad, x_query, x_center, x_max 88 | 89 | f0up_key=sys.argv[1] 90 | input_path=sys.argv[2] 91 | index_path=sys.argv[3] 92 | f0method=sys.argv[4]#harvest or pm 93 | opt_path=sys.argv[5] 94 | model_path=sys.argv[6] 95 | index_rate=float(sys.argv[7]) 96 | device=sys.argv[8] 97 | is_half=bool(sys.argv[9]) 98 | print(sys.argv) 99 | config=Config(device,is_half) 100 | now_dir=os.getcwd() 101 | sys.path.append(now_dir) 102 | from vc_infer_pipeline import VC 103 | from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono 104 | from my_utils import load_audio 105 | from fairseq import checkpoint_utils 106 | from scipy.io import wavfile 107 | 108 | hubert_model=None 109 | def load_hubert(): 110 | global hubert_model 111 | models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",) 112 | hubert_model = models[0] 113 | hubert_model = hubert_model.to(device) 114 | if(is_half):hubert_model = hubert_model.half() 115 | else:hubert_model = hubert_model.float() 116 | hubert_model.eval() 117 | 118 | def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,index_rate): 119 | global tgt_sr,net_g,vc,hubert_model 120 | if input_audio is None:return "You need to upload an audio", None 121 | f0_up_key = int(f0_up_key) 122 | audio=load_audio(input_audio,16000) 123 | times = [0, 0, 0] 124 | if(hubert_model==None):load_hubert() 125 | if_f0 = cpt.get("f0", 1) 126 | # audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file) 127 | audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,index_rate,if_f0,f0_file=f0_file) 128 | print(times) 129 | return audio_opt 130 | 131 | 132 | def get_vc(model_path): 133 | global n_spk,tgt_sr,net_g,vc,cpt,device,is_half 134 | print("loading pth %s"%model_path) 135 | cpt = torch.load(model_path, map_location="cpu") 136 | tgt_sr = cpt["config"][-1] 137 | cpt["config"][-3]=cpt["weight"]["emb_g.weight"].shape[0]#n_spk 138 | if_f0=cpt.get("f0",1) 139 | if(if_f0==1): 140 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) 141 | else: 142 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) 143 | del net_g.enc_q 144 | print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净,真奇葩 145 | net_g.eval().to(device) 146 | if (is_half):net_g = net_g.half() 147 | else:net_g = net_g.float() 148 | vc = VC(tgt_sr, config) 149 | n_spk=cpt["config"][-3] 150 | # return {"visible": True,"maximum": n_spk, "__type__": "update"} 151 | 152 | 153 | get_vc(model_path) 154 | wav_opt=vc_single(0,input_path,f0up_key,None,f0method,index_path,index_rate) 155 | wavfile.write(opt_path, tgt_sr, wav_opt) 156 | -------------------------------------------------------------------------------- /ngrokabled.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RejektsAI/EVC/f00f1aab704a8df00df2710fc9270b788ec474b7/ngrokabled.zip -------------------------------------------------------------------------------- /somegirl.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RejektsAI/EVC/f00f1aab704a8df00df2710fc9270b788ec474b7/somegirl.mp3 -------------------------------------------------------------------------------- /someguy.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RejektsAI/EVC/f00f1aab704a8df00df2710fc9270b788ec474b7/someguy.mp3 -------------------------------------------------------------------------------- /wav2lip-HD.tar.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:66589c1796b79dd3ded0e6df5345317a3a477a378a1ac0734792e79581e47064 3 | size 405266588 4 | -------------------------------------------------------------------------------- /wav2lip-cache.tar.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:99c6e934981cded0621470a77c9d9979f319e0eb5624f0993d176846c86fd42e 3 | size 3476760 4 | --------------------------------------------------------------------------------