├── .gitattributes
├── EasierGUI.py
├── GUI.py
├── LICENSE
├── Packages.tar.gz
├── README.md
├── easy-infer.py
├── easy-infer2.py
├── filefinder
├── myinfer.py
├── ngrokabled.zip
├── somegirl.mp3
├── someguy.mp3
├── wav2lip-HD.tar.gz
└── wav2lip-cache.tar.gz
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.tar.gz filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/EasierGUI.py:
--------------------------------------------------------------------------------
1 | import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np
2 | from mega import Mega
3 | os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
4 | import threading
5 | from time import sleep
6 | from subprocess import Popen
7 | import faiss
8 | from random import shuffle
9 | import json, datetime
10 |
11 | now_dir = os.getcwd()
12 | sys.path.append(now_dir)
13 | tmp = os.path.join(now_dir, "TEMP")
14 | shutil.rmtree(tmp, ignore_errors=True)
15 | shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
16 | shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
17 | os.makedirs(tmp, exist_ok=True)
18 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
19 | os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
20 | os.environ["TEMP"] = tmp
21 | warnings.filterwarnings("ignore")
22 | torch.manual_seed(114514)
23 | from i18n import I18nAuto
24 | import ffmpeg
25 | from MDXNet import MDXNetDereverb
26 |
27 | i18n = I18nAuto()
28 | i18n.print()
29 | # 判断是否有能用来训练和加速推理的N卡
30 | ngpu = torch.cuda.device_count()
31 | gpu_infos = []
32 | mem = []
33 | if (not torch.cuda.is_available()) or ngpu == 0:
34 | if_gpu_ok = False
35 | else:
36 | if_gpu_ok = False
37 | for i in range(ngpu):
38 | gpu_name = torch.cuda.get_device_name(i)
39 | if (
40 | "10" in gpu_name
41 | or "16" in gpu_name
42 | or "20" in gpu_name
43 | or "30" in gpu_name
44 | or "40" in gpu_name
45 | or "A2" in gpu_name.upper()
46 | or "A3" in gpu_name.upper()
47 | or "A4" in gpu_name.upper()
48 | or "P4" in gpu_name.upper()
49 | or "A50" in gpu_name.upper()
50 | or "A60" in gpu_name.upper()
51 | or "70" in gpu_name
52 | or "80" in gpu_name
53 | or "90" in gpu_name
54 | or "M4" in gpu_name.upper()
55 | or "T4" in gpu_name.upper()
56 | or "TITAN" in gpu_name.upper()
57 | ): # A10#A100#V100#A40#P40#M40#K80#A4500
58 | if_gpu_ok = True # 至少有一张能用的N卡
59 | gpu_infos.append("%s\t%s" % (i, gpu_name))
60 | mem.append(
61 | int(
62 | torch.cuda.get_device_properties(i).total_memory
63 | / 1024
64 | / 1024
65 | / 1024
66 | + 0.4
67 | )
68 | )
69 | if if_gpu_ok == True and len(gpu_infos) > 0:
70 | gpu_info = "\n".join(gpu_infos)
71 | default_batch_size = min(mem) // 2
72 | else:
73 | gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
74 | default_batch_size = 1
75 | gpus = "-".join([i[0] for i in gpu_infos])
76 | from infer_pack.models import (
77 | SynthesizerTrnMs256NSFsid,
78 | SynthesizerTrnMs256NSFsid_nono,
79 | SynthesizerTrnMs768NSFsid,
80 | SynthesizerTrnMs768NSFsid_nono,
81 | )
82 | import soundfile as sf
83 | from fairseq import checkpoint_utils
84 | import gradio as gr
85 | import logging
86 | from vc_infer_pipeline import VC
87 | from config import Config
88 | from infer_uvr5 import _audio_pre_, _audio_pre_new
89 | from my_utils import load_audio
90 | from train.process_ckpt import show_info, change_info, merge, extract_small_model
91 |
92 | config = Config()
93 | # from trainset_preprocess_pipeline import PreProcess
94 | logging.getLogger("numba").setLevel(logging.WARNING)
95 |
96 |
97 | class ToolButton(gr.Button, gr.components.FormComponent):
98 | """Small button with single emoji as text, fits inside gradio forms"""
99 |
100 | def __init__(self, **kwargs):
101 | super().__init__(variant="tool", **kwargs)
102 |
103 | def get_block_name(self):
104 | return "button"
105 |
106 |
107 | hubert_model = None
108 |
109 |
110 | def load_hubert():
111 | global hubert_model
112 | models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
113 | ["hubert_base.pt"],
114 | suffix="",
115 | )
116 | hubert_model = models[0]
117 | hubert_model = hubert_model.to(config.device)
118 | if config.is_half:
119 | hubert_model = hubert_model.half()
120 | else:
121 | hubert_model = hubert_model.float()
122 | hubert_model.eval()
123 |
124 |
125 | weight_root = "weights"
126 | weight_uvr5_root = "uvr5_weights"
127 | index_root = "logs"
128 | names = []
129 | for name in os.listdir(weight_root):
130 | if name.endswith(".pth"):
131 | names.append(name)
132 | index_paths = []
133 | for root, dirs, files in os.walk(index_root, topdown=False):
134 | for name in files:
135 | if name.endswith(".index") and "trained" not in name:
136 | index_paths.append("%s/%s" % (root, name))
137 | uvr5_names = []
138 | for name in os.listdir(weight_uvr5_root):
139 | if name.endswith(".pth") or "onnx" in name:
140 | uvr5_names.append(name.replace(".pth", ""))
141 |
142 |
143 | def vc_single(
144 | sid,
145 | input_audio_path,
146 | f0_up_key,
147 | f0_file,
148 | f0_method,
149 | file_index,
150 | #file_index2,
151 | # file_big_npy,
152 | index_rate,
153 | filter_radius,
154 | resample_sr,
155 | rms_mix_rate,
156 | protect,
157 | crepe_hop_length,
158 | ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
159 | global tgt_sr, net_g, vc, hubert_model, version
160 | if input_audio_path is None:
161 | return "You need to upload an audio", None
162 | f0_up_key = int(f0_up_key)
163 | try:
164 | audio = load_audio(input_audio_path, 16000)
165 | audio_max = np.abs(audio).max() / 0.95
166 | if audio_max > 1:
167 | audio /= audio_max
168 | times = [0, 0, 0]
169 | if hubert_model == None:
170 | load_hubert()
171 | if_f0 = cpt.get("f0", 1)
172 | file_index = (
173 | (
174 | file_index.strip(" ")
175 | .strip('"')
176 | .strip("\n")
177 | .strip('"')
178 | .strip(" ")
179 | .replace("trained", "added")
180 | )
181 | ) # 防止小白写错,自动帮他替换掉
182 | # file_big_npy = (
183 | # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
184 | # )
185 | audio_opt = vc.pipeline(
186 | hubert_model,
187 | net_g,
188 | sid,
189 | audio,
190 | input_audio_path,
191 | times,
192 | f0_up_key,
193 | f0_method,
194 | file_index,
195 | # file_big_npy,
196 | index_rate,
197 | if_f0,
198 | filter_radius,
199 | tgt_sr,
200 | resample_sr,
201 | rms_mix_rate,
202 | version,
203 | protect,
204 | crepe_hop_length,
205 | f0_file=f0_file,
206 | )
207 | if resample_sr >= 16000 and tgt_sr != resample_sr:
208 | tgt_sr = resample_sr
209 | index_info = (
210 | "Using index:%s." % file_index
211 | if os.path.exists(file_index)
212 | else "Index not used."
213 | )
214 | return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
215 | index_info,
216 | times[0],
217 | times[1],
218 | times[2],
219 | ), (tgt_sr, audio_opt)
220 | except:
221 | info = traceback.format_exc()
222 | print(info)
223 | return info, (None, None)
224 |
225 |
226 | def vc_multi(
227 | sid,
228 | dir_path,
229 | opt_root,
230 | paths,
231 | f0_up_key,
232 | f0_method,
233 | file_index,
234 | file_index2,
235 | # file_big_npy,
236 | index_rate,
237 | filter_radius,
238 | resample_sr,
239 | rms_mix_rate,
240 | protect,
241 | format1,
242 | crepe_hop_length,
243 | ):
244 | try:
245 | dir_path = (
246 | dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
247 | ) # 防止小白拷路径头尾带了空格和"和回车
248 | opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
249 | os.makedirs(opt_root, exist_ok=True)
250 | try:
251 | if dir_path != "":
252 | paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
253 | else:
254 | paths = [path.name for path in paths]
255 | except:
256 | traceback.print_exc()
257 | paths = [path.name for path in paths]
258 | infos = []
259 | for path in paths:
260 | info, opt = vc_single(
261 | sid,
262 | path,
263 | f0_up_key,
264 | None,
265 | f0_method,
266 | file_index,
267 | file_index2,
268 | # file_big_npy,
269 | index_rate,
270 | filter_radius,
271 | resample_sr,
272 | rms_mix_rate,
273 | protect,
274 | crepe_hop_length
275 | )
276 | if "Success" in info:
277 | try:
278 | tgt_sr, audio_opt = opt
279 | if format1 in ["wav", "flac"]:
280 | sf.write(
281 | "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
282 | audio_opt,
283 | tgt_sr,
284 | )
285 | else:
286 | path = "%s/%s.wav" % (opt_root, os.path.basename(path))
287 | sf.write(
288 | path,
289 | audio_opt,
290 | tgt_sr,
291 | )
292 | if os.path.exists(path):
293 | os.system(
294 | "ffmpeg -i %s -vn %s -q:a 2 -y"
295 | % (path, path[:-4] + ".%s" % format1)
296 | )
297 | except:
298 | info += traceback.format_exc()
299 | infos.append("%s->%s" % (os.path.basename(path), info))
300 | yield "\n".join(infos)
301 | yield "\n".join(infos)
302 | except:
303 | yield traceback.format_exc()
304 |
305 |
306 | def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
307 | infos = []
308 | try:
309 | inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
310 | save_root_vocal = (
311 | save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
312 | )
313 | save_root_ins = (
314 | save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
315 | )
316 | if model_name == "onnx_dereverb_By_FoxJoy":
317 | pre_fun = MDXNetDereverb(15)
318 | else:
319 | func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new
320 | pre_fun = func(
321 | agg=int(agg),
322 | model_path=os.path.join(weight_uvr5_root, model_name + ".pth"),
323 | device=config.device,
324 | is_half=config.is_half,
325 | )
326 | if inp_root != "":
327 | paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)]
328 | else:
329 | paths = [path.name for path in paths]
330 | for path in paths:
331 | inp_path = os.path.join(inp_root, path)
332 | need_reformat = 1
333 | done = 0
334 | try:
335 | info = ffmpeg.probe(inp_path, cmd="ffprobe")
336 | if (
337 | info["streams"][0]["channels"] == 2
338 | and info["streams"][0]["sample_rate"] == "44100"
339 | ):
340 | need_reformat = 0
341 | pre_fun._path_audio_(
342 | inp_path, save_root_ins, save_root_vocal, format0
343 | )
344 | done = 1
345 | except:
346 | need_reformat = 1
347 | traceback.print_exc()
348 | if need_reformat == 1:
349 | tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path))
350 | os.system(
351 | "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y"
352 | % (inp_path, tmp_path)
353 | )
354 | inp_path = tmp_path
355 | try:
356 | if done == 0:
357 | pre_fun._path_audio_(
358 | inp_path, save_root_ins, save_root_vocal, format0
359 | )
360 | infos.append("%s->Success" % (os.path.basename(inp_path)))
361 | yield "\n".join(infos)
362 | except:
363 | infos.append(
364 | "%s->%s" % (os.path.basename(inp_path), traceback.format_exc())
365 | )
366 | yield "\n".join(infos)
367 | except:
368 | infos.append(traceback.format_exc())
369 | yield "\n".join(infos)
370 | finally:
371 | try:
372 | if model_name == "onnx_dereverb_By_FoxJoy":
373 | del pre_fun.pred.model
374 | del pre_fun.pred.model_
375 | else:
376 | del pre_fun.model
377 | del pre_fun
378 | except:
379 | traceback.print_exc()
380 | print("clean_empty_cache")
381 | if torch.cuda.is_available():
382 | torch.cuda.empty_cache()
383 | yield "\n".join(infos)
384 |
385 |
386 | # 一个选项卡全局只能有一个音色
387 | def get_vc(sid):
388 | global n_spk, tgt_sr, net_g, vc, cpt, version
389 | if sid == "" or sid == []:
390 | global hubert_model
391 | if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
392 | print("clean_empty_cache")
393 | del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
394 | hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
395 | if torch.cuda.is_available():
396 | torch.cuda.empty_cache()
397 | ###楼下不这么折腾清理不干净
398 | if_f0 = cpt.get("f0", 1)
399 | version = cpt.get("version", "v1")
400 | if version == "v1":
401 | if if_f0 == 1:
402 | net_g = SynthesizerTrnMs256NSFsid(
403 | *cpt["config"], is_half=config.is_half
404 | )
405 | else:
406 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
407 | elif version == "v2":
408 | if if_f0 == 1:
409 | net_g = SynthesizerTrnMs768NSFsid(
410 | *cpt["config"], is_half=config.is_half
411 | )
412 | else:
413 | net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
414 | del net_g, cpt
415 | if torch.cuda.is_available():
416 | torch.cuda.empty_cache()
417 | cpt = None
418 | return {"visible": False, "__type__": "update"}
419 | person = "%s/%s" % (weight_root, sid)
420 | print("loading %s" % person)
421 | cpt = torch.load(person, map_location="cpu")
422 | tgt_sr = cpt["config"][-1]
423 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
424 | if_f0 = cpt.get("f0", 1)
425 | version = cpt.get("version", "v1")
426 | if version == "v1":
427 | if if_f0 == 1:
428 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
429 | else:
430 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
431 | elif version == "v2":
432 | if if_f0 == 1:
433 | net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
434 | else:
435 | net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
436 | del net_g.enc_q
437 | print(net_g.load_state_dict(cpt["weight"], strict=False))
438 | net_g.eval().to(config.device)
439 | if config.is_half:
440 | net_g = net_g.half()
441 | else:
442 | net_g = net_g.float()
443 | vc = VC(tgt_sr, config)
444 | n_spk = cpt["config"][-3]
445 | return {"visible": False, "maximum": n_spk, "__type__": "update"}
446 |
447 |
448 | def change_choices():
449 | names = []
450 | for name in os.listdir(weight_root):
451 | if name.endswith(".pth"):
452 | names.append(name)
453 | index_paths = []
454 | for root, dirs, files in os.walk(index_root, topdown=False):
455 | for name in files:
456 | if name.endswith(".index") and "trained" not in name:
457 | index_paths.append("%s/%s" % (root, name))
458 | return {"choices": sorted(names), "__type__": "update"}, {
459 | "choices": sorted(index_paths),
460 | "__type__": "update",
461 | }
462 |
463 |
464 | def clean():
465 | return {"value": "", "__type__": "update"}
466 |
467 |
468 | sr_dict = {
469 | "32k": 32000,
470 | "40k": 40000,
471 | "48k": 48000,
472 | }
473 |
474 |
475 | def if_done(done, p):
476 | while 1:
477 | if p.poll() == None:
478 | sleep(0.5)
479 | else:
480 | break
481 | done[0] = True
482 |
483 |
484 | def if_done_multi(done, ps):
485 | while 1:
486 | # poll==None代表进程未结束
487 | # 只要有一个进程未结束都不停
488 | flag = 1
489 | for p in ps:
490 | if p.poll() == None:
491 | flag = 0
492 | sleep(0.5)
493 | break
494 | if flag == 1:
495 | break
496 | done[0] = True
497 |
498 |
499 | def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
500 | sr = sr_dict[sr]
501 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
502 | f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
503 | f.close()
504 | cmd = (
505 | config.python_cmd
506 | + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "
507 | % (trainset_dir, sr, n_p, now_dir, exp_dir)
508 | + str(config.noparallel)
509 | )
510 | print(cmd)
511 | p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
512 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
513 | done = [False]
514 | threading.Thread(
515 | target=if_done,
516 | args=(
517 | done,
518 | p,
519 | ),
520 | ).start()
521 | while 1:
522 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
523 | yield (f.read())
524 | sleep(1)
525 | if done[0] == True:
526 | break
527 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
528 | log = f.read()
529 | print(log)
530 | yield log
531 |
532 |
533 | # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
534 | def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl):
535 | gpus = gpus.split("-")
536 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
537 | f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
538 | f.close()
539 | if if_f0:
540 | cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s %s" % (
541 | now_dir,
542 | exp_dir,
543 | n_p,
544 | f0method,
545 | echl,
546 | )
547 | print(cmd)
548 | p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE
549 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
550 | done = [False]
551 | threading.Thread(
552 | target=if_done,
553 | args=(
554 | done,
555 | p,
556 | ),
557 | ).start()
558 | while 1:
559 | with open(
560 | "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
561 | ) as f:
562 | yield (f.read())
563 | sleep(1)
564 | if done[0] == True:
565 | break
566 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
567 | log = f.read()
568 | print(log)
569 | yield log
570 | ####对不同part分别开多进程
571 | """
572 | n_part=int(sys.argv[1])
573 | i_part=int(sys.argv[2])
574 | i_gpu=sys.argv[3]
575 | exp_dir=sys.argv[4]
576 | os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
577 | """
578 | leng = len(gpus)
579 | ps = []
580 | for idx, n_g in enumerate(gpus):
581 | cmd = (
582 | config.python_cmd
583 | + " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
584 | % (
585 | config.device,
586 | leng,
587 | idx,
588 | n_g,
589 | now_dir,
590 | exp_dir,
591 | version19,
592 | )
593 | )
594 | print(cmd)
595 | p = Popen(
596 | cmd, shell=True, cwd=now_dir
597 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
598 | ps.append(p)
599 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
600 | done = [False]
601 | threading.Thread(
602 | target=if_done_multi,
603 | args=(
604 | done,
605 | ps,
606 | ),
607 | ).start()
608 | while 1:
609 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
610 | yield (f.read())
611 | sleep(1)
612 | if done[0] == True:
613 | break
614 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
615 | log = f.read()
616 | print(log)
617 | yield log
618 |
619 |
620 | def change_sr2(sr2, if_f0_3, version19):
621 | path_str = "" if version19 == "v1" else "_v2"
622 | f0_str = "f0" if if_f0_3 else ""
623 | if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK)
624 | if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK)
625 | if (if_pretrained_generator_exist == False):
626 | print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
627 | if (if_pretrained_discriminator_exist == False):
628 | print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
629 | return (
630 | ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "",
631 | ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "",
632 | {"visible": True, "__type__": "update"}
633 | )
634 |
635 | def change_version19(sr2, if_f0_3, version19):
636 | path_str = "" if version19 == "v1" else "_v2"
637 | f0_str = "f0" if if_f0_3 else ""
638 | if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK)
639 | if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK)
640 | if (if_pretrained_generator_exist == False):
641 | print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
642 | if (if_pretrained_discriminator_exist == False):
643 | print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
644 | return (
645 | ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "",
646 | ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "",
647 | )
648 |
649 |
650 | def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
651 | path_str = "" if version19 == "v1" else "_v2"
652 | if_pretrained_generator_exist = os.access("pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK)
653 | if_pretrained_discriminator_exist = os.access("pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK)
654 | if (if_pretrained_generator_exist == False):
655 | print("pretrained%s/f0G%s.pth" % (path_str, sr2), "not exist, will not use pretrained model")
656 | if (if_pretrained_discriminator_exist == False):
657 | print("pretrained%s/f0D%s.pth" % (path_str, sr2), "not exist, will not use pretrained model")
658 | if if_f0_3:
659 | return (
660 | {"visible": True, "__type__": "update"},
661 | "pretrained%s/f0G%s.pth" % (path_str, sr2) if if_pretrained_generator_exist else "",
662 | "pretrained%s/f0D%s.pth" % (path_str, sr2) if if_pretrained_discriminator_exist else "",
663 | )
664 | return (
665 | {"visible": False, "__type__": "update"},
666 | ("pretrained%s/G%s.pth" % (path_str, sr2)) if if_pretrained_generator_exist else "",
667 | ("pretrained%s/D%s.pth" % (path_str, sr2)) if if_pretrained_discriminator_exist else "",
668 | )
669 |
670 |
671 | # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
672 | def click_train(
673 | exp_dir1,
674 | sr2,
675 | if_f0_3,
676 | spk_id5,
677 | save_epoch10,
678 | total_epoch11,
679 | batch_size12,
680 | if_save_latest13,
681 | pretrained_G14,
682 | pretrained_D15,
683 | gpus16,
684 | if_cache_gpu17,
685 | if_save_every_weights18,
686 | version19,
687 | ):
688 | # 生成filelist
689 | exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
690 | os.makedirs(exp_dir, exist_ok=True)
691 | gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
692 | feature_dir = (
693 | "%s/3_feature256" % (exp_dir)
694 | if version19 == "v1"
695 | else "%s/3_feature768" % (exp_dir)
696 | )
697 | if if_f0_3:
698 | f0_dir = "%s/2a_f0" % (exp_dir)
699 | f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
700 | names = (
701 | set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
702 | & set([name.split(".")[0] for name in os.listdir(feature_dir)])
703 | & set([name.split(".")[0] for name in os.listdir(f0_dir)])
704 | & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
705 | )
706 | else:
707 | names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
708 | [name.split(".")[0] for name in os.listdir(feature_dir)]
709 | )
710 | opt = []
711 | for name in names:
712 | if if_f0_3:
713 | opt.append(
714 | "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
715 | % (
716 | gt_wavs_dir.replace("\\", "\\\\"),
717 | name,
718 | feature_dir.replace("\\", "\\\\"),
719 | name,
720 | f0_dir.replace("\\", "\\\\"),
721 | name,
722 | f0nsf_dir.replace("\\", "\\\\"),
723 | name,
724 | spk_id5,
725 | )
726 | )
727 | else:
728 | opt.append(
729 | "%s/%s.wav|%s/%s.npy|%s"
730 | % (
731 | gt_wavs_dir.replace("\\", "\\\\"),
732 | name,
733 | feature_dir.replace("\\", "\\\\"),
734 | name,
735 | spk_id5,
736 | )
737 | )
738 | fea_dim = 256 if version19 == "v1" else 768
739 | if if_f0_3:
740 | for _ in range(2):
741 | opt.append(
742 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
743 | % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
744 | )
745 | else:
746 | for _ in range(2):
747 | opt.append(
748 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
749 | % (now_dir, sr2, now_dir, fea_dim, spk_id5)
750 | )
751 | shuffle(opt)
752 | with open("%s/filelist.txt" % exp_dir, "w") as f:
753 | f.write("\n".join(opt))
754 | print("write filelist done")
755 | # 生成config#无需生成config
756 | # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
757 | print("use gpus:", gpus16)
758 | if pretrained_G14 == "":
759 | print("no pretrained Generator")
760 | if pretrained_D15 == "":
761 | print("no pretrained Discriminator")
762 | if gpus16:
763 | cmd = (
764 | config.python_cmd
765 | + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
766 | % (
767 | exp_dir1,
768 | sr2,
769 | 1 if if_f0_3 else 0,
770 | batch_size12,
771 | gpus16,
772 | total_epoch11,
773 | save_epoch10,
774 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
775 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
776 | 1 if if_save_latest13 == i18n("是") else 0,
777 | 1 if if_cache_gpu17 == i18n("是") else 0,
778 | 1 if if_save_every_weights18 == i18n("是") else 0,
779 | version19,
780 | )
781 | )
782 | else:
783 | cmd = (
784 | config.python_cmd
785 | + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
786 | % (
787 | exp_dir1,
788 | sr2,
789 | 1 if if_f0_3 else 0,
790 | batch_size12,
791 | total_epoch11,
792 | save_epoch10,
793 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "\b",
794 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "\b",
795 | 1 if if_save_latest13 == i18n("是") else 0,
796 | 1 if if_cache_gpu17 == i18n("是") else 0,
797 | 1 if if_save_every_weights18 == i18n("是") else 0,
798 | version19,
799 | )
800 | )
801 | print(cmd)
802 | p = Popen(cmd, shell=True, cwd=now_dir)
803 | p.wait()
804 | return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
805 |
806 |
807 | # but4.click(train_index, [exp_dir1], info3)
808 | def train_index(exp_dir1, version19):
809 | exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
810 | os.makedirs(exp_dir, exist_ok=True)
811 | feature_dir = (
812 | "%s/3_feature256" % (exp_dir)
813 | if version19 == "v1"
814 | else "%s/3_feature768" % (exp_dir)
815 | )
816 | if os.path.exists(feature_dir) == False:
817 | return "请先进行特征提取!"
818 | listdir_res = list(os.listdir(feature_dir))
819 | if len(listdir_res) == 0:
820 | return "请先进行特征提取!"
821 | npys = []
822 | for name in sorted(listdir_res):
823 | phone = np.load("%s/%s" % (feature_dir, name))
824 | npys.append(phone)
825 | big_npy = np.concatenate(npys, 0)
826 | big_npy_idx = np.arange(big_npy.shape[0])
827 | np.random.shuffle(big_npy_idx)
828 | big_npy = big_npy[big_npy_idx]
829 | np.save("%s/total_fea.npy" % exp_dir, big_npy)
830 | # n_ivf = big_npy.shape[0] // 39
831 | n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
832 | infos = []
833 | infos.append("%s,%s" % (big_npy.shape, n_ivf))
834 | yield "\n".join(infos)
835 | index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
836 | # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
837 | infos.append("training")
838 | yield "\n".join(infos)
839 | index_ivf = faiss.extract_index_ivf(index) #
840 | index_ivf.nprobe = 1
841 | index.train(big_npy)
842 | faiss.write_index(
843 | index,
844 | "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
845 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
846 | )
847 | # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
848 | infos.append("adding")
849 | yield "\n".join(infos)
850 | batch_size_add = 8192
851 | for i in range(0, big_npy.shape[0], batch_size_add):
852 | index.add(big_npy[i : i + batch_size_add])
853 | faiss.write_index(
854 | index,
855 | "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
856 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
857 | )
858 | infos.append(
859 | "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index"
860 | % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
861 | )
862 | # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
863 | # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
864 | yield "\n".join(infos)
865 |
866 |
867 | # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
868 | def train1key(
869 | exp_dir1,
870 | sr2,
871 | if_f0_3,
872 | trainset_dir4,
873 | spk_id5,
874 | np7,
875 | f0method8,
876 | save_epoch10,
877 | total_epoch11,
878 | batch_size12,
879 | if_save_latest13,
880 | pretrained_G14,
881 | pretrained_D15,
882 | gpus16,
883 | if_cache_gpu17,
884 | if_save_every_weights18,
885 | version19,
886 | echl
887 | ):
888 | infos = []
889 |
890 | def get_info_str(strr):
891 | infos.append(strr)
892 | return "\n".join(infos)
893 |
894 | model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
895 | preprocess_log_path = "%s/preprocess.log" % model_log_dir
896 | extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
897 | gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
898 | feature_dir = (
899 | "%s/3_feature256" % model_log_dir
900 | if version19 == "v1"
901 | else "%s/3_feature768" % model_log_dir
902 | )
903 |
904 | os.makedirs(model_log_dir, exist_ok=True)
905 | #########step1:处理数据
906 | open(preprocess_log_path, "w").close()
907 | cmd = (
908 | config.python_cmd
909 | + " trainset_preprocess_pipeline_print.py %s %s %s %s "
910 | % (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
911 | + str(config.noparallel)
912 | )
913 | yield get_info_str(i18n("step1:正在处理数据"))
914 | yield get_info_str(cmd)
915 | p = Popen(cmd, shell=True)
916 | p.wait()
917 | with open(preprocess_log_path, "r") as f:
918 | print(f.read())
919 | #########step2a:提取音高
920 | open(extract_f0_feature_log_path, "w")
921 | if if_f0_3:
922 | yield get_info_str("step2a:正在提取音高")
923 | cmd = config.python_cmd + " extract_f0_print.py %s %s %s %s" % (
924 | model_log_dir,
925 | np7,
926 | f0method8,
927 | echl
928 | )
929 | yield get_info_str(cmd)
930 | p = Popen(cmd, shell=True, cwd=now_dir)
931 | p.wait()
932 | with open(extract_f0_feature_log_path, "r") as f:
933 | print(f.read())
934 | else:
935 | yield get_info_str(i18n("step2a:无需提取音高"))
936 | #######step2b:提取特征
937 | yield get_info_str(i18n("step2b:正在提取特征"))
938 | gpus = gpus16.split("-")
939 | leng = len(gpus)
940 | ps = []
941 | for idx, n_g in enumerate(gpus):
942 | cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
943 | config.device,
944 | leng,
945 | idx,
946 | n_g,
947 | model_log_dir,
948 | version19,
949 | )
950 | yield get_info_str(cmd)
951 | p = Popen(
952 | cmd, shell=True, cwd=now_dir
953 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
954 | ps.append(p)
955 | for p in ps:
956 | p.wait()
957 | with open(extract_f0_feature_log_path, "r") as f:
958 | print(f.read())
959 | #######step3a:训练模型
960 | yield get_info_str(i18n("step3a:正在训练模型"))
961 | # 生成filelist
962 | if if_f0_3:
963 | f0_dir = "%s/2a_f0" % model_log_dir
964 | f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
965 | names = (
966 | set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
967 | & set([name.split(".")[0] for name in os.listdir(feature_dir)])
968 | & set([name.split(".")[0] for name in os.listdir(f0_dir)])
969 | & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
970 | )
971 | else:
972 | names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
973 | [name.split(".")[0] for name in os.listdir(feature_dir)]
974 | )
975 | opt = []
976 | for name in names:
977 | if if_f0_3:
978 | opt.append(
979 | "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
980 | % (
981 | gt_wavs_dir.replace("\\", "\\\\"),
982 | name,
983 | feature_dir.replace("\\", "\\\\"),
984 | name,
985 | f0_dir.replace("\\", "\\\\"),
986 | name,
987 | f0nsf_dir.replace("\\", "\\\\"),
988 | name,
989 | spk_id5,
990 | )
991 | )
992 | else:
993 | opt.append(
994 | "%s/%s.wav|%s/%s.npy|%s"
995 | % (
996 | gt_wavs_dir.replace("\\", "\\\\"),
997 | name,
998 | feature_dir.replace("\\", "\\\\"),
999 | name,
1000 | spk_id5,
1001 | )
1002 | )
1003 | fea_dim = 256 if version19 == "v1" else 768
1004 | if if_f0_3:
1005 | for _ in range(2):
1006 | opt.append(
1007 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
1008 | % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
1009 | )
1010 | else:
1011 | for _ in range(2):
1012 | opt.append(
1013 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
1014 | % (now_dir, sr2, now_dir, fea_dim, spk_id5)
1015 | )
1016 | shuffle(opt)
1017 | with open("%s/filelist.txt" % model_log_dir, "w") as f:
1018 | f.write("\n".join(opt))
1019 | yield get_info_str("write filelist done")
1020 | if gpus16:
1021 | cmd = (
1022 | config.python_cmd
1023 | +" train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
1024 | % (
1025 | exp_dir1,
1026 | sr2,
1027 | 1 if if_f0_3 else 0,
1028 | batch_size12,
1029 | gpus16,
1030 | total_epoch11,
1031 | save_epoch10,
1032 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
1033 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
1034 | 1 if if_save_latest13 == i18n("是") else 0,
1035 | 1 if if_cache_gpu17 == i18n("是") else 0,
1036 | 1 if if_save_every_weights18 == i18n("是") else 0,
1037 | version19,
1038 | )
1039 | )
1040 | else:
1041 | cmd = (
1042 | config.python_cmd
1043 | + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
1044 | % (
1045 | exp_dir1,
1046 | sr2,
1047 | 1 if if_f0_3 else 0,
1048 | batch_size12,
1049 | total_epoch11,
1050 | save_epoch10,
1051 | ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
1052 | ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
1053 | 1 if if_save_latest13 == i18n("是") else 0,
1054 | 1 if if_cache_gpu17 == i18n("是") else 0,
1055 | 1 if if_save_every_weights18 == i18n("是") else 0,
1056 | version19,
1057 | )
1058 | )
1059 | yield get_info_str(cmd)
1060 | p = Popen(cmd, shell=True, cwd=now_dir)
1061 | p.wait()
1062 | yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
1063 | #######step3b:训练索引
1064 | npys = []
1065 | listdir_res = list(os.listdir(feature_dir))
1066 | for name in sorted(listdir_res):
1067 | phone = np.load("%s/%s" % (feature_dir, name))
1068 | npys.append(phone)
1069 | big_npy = np.concatenate(npys, 0)
1070 |
1071 | big_npy_idx = np.arange(big_npy.shape[0])
1072 | np.random.shuffle(big_npy_idx)
1073 | big_npy = big_npy[big_npy_idx]
1074 | np.save("%s/total_fea.npy" % model_log_dir, big_npy)
1075 |
1076 | # n_ivf = big_npy.shape[0] // 39
1077 | n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
1078 | yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
1079 | index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
1080 | yield get_info_str("training index")
1081 | index_ivf = faiss.extract_index_ivf(index) #
1082 | index_ivf.nprobe = 1
1083 | index.train(big_npy)
1084 | faiss.write_index(
1085 | index,
1086 | "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
1087 | % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
1088 | )
1089 | yield get_info_str("adding index")
1090 | batch_size_add = 8192
1091 | for i in range(0, big_npy.shape[0], batch_size_add):
1092 | index.add(big_npy[i : i + batch_size_add])
1093 | faiss.write_index(
1094 | index,
1095 | "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
1096 | % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
1097 | )
1098 | yield get_info_str(
1099 | "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
1100 | % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
1101 | )
1102 | yield get_info_str(i18n("全流程结束!"))
1103 |
1104 |
1105 | # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
1106 | def change_info_(ckpt_path):
1107 | if (
1108 | os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log"))
1109 | == False
1110 | ):
1111 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
1112 | try:
1113 | with open(
1114 | ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
1115 | ) as f:
1116 | info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
1117 | sr, f0 = info["sample_rate"], info["if_f0"]
1118 | version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
1119 | return sr, str(f0), version
1120 | except:
1121 | traceback.print_exc()
1122 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
1123 |
1124 |
1125 | from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
1126 |
1127 |
1128 | def export_onnx(ModelPath, ExportedPath, MoeVS=True):
1129 | cpt = torch.load(ModelPath, map_location="cpu")
1130 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
1131 | hidden_channels = 256 if cpt.get("version","v1")=="v1"else 768#cpt["config"][-2] # hidden_channels,为768Vec做准备
1132 |
1133 | test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
1134 | test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
1135 | test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
1136 | test_pitchf = torch.rand(1, 200) # nsf基频
1137 | test_ds = torch.LongTensor([0]) # 说话人ID
1138 | test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
1139 |
1140 | device = "cpu" # 导出时设备(不影响使用模型)
1141 |
1142 |
1143 | net_g = SynthesizerTrnMsNSFsidM(
1144 | *cpt["config"], is_half=False,version=cpt.get("version","v1")
1145 | ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
1146 | net_g.load_state_dict(cpt["weight"], strict=False)
1147 | input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
1148 | output_names = [
1149 | "audio",
1150 | ]
1151 | # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
1152 | torch.onnx.export(
1153 | net_g,
1154 | (
1155 | test_phone.to(device),
1156 | test_phone_lengths.to(device),
1157 | test_pitch.to(device),
1158 | test_pitchf.to(device),
1159 | test_ds.to(device),
1160 | test_rnd.to(device),
1161 | ),
1162 | ExportedPath,
1163 | dynamic_axes={
1164 | "phone": [1],
1165 | "pitch": [1],
1166 | "pitchf": [1],
1167 | "rnd": [2],
1168 | },
1169 | do_constant_folding=False,
1170 | opset_version=16,
1171 | verbose=False,
1172 | input_names=input_names,
1173 | output_names=output_names,
1174 | )
1175 | return "Finished"
1176 |
1177 |
1178 | #region Mangio-RVC-Fork CLI App
1179 | import re as regex
1180 | import scipy.io.wavfile as wavfile
1181 |
1182 | cli_current_page = "HOME"
1183 |
1184 | def cli_split_command(com):
1185 | exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)'
1186 | split_array = regex.findall(exp, com)
1187 | split_array = [group[0] if group[0] else group[1] for group in split_array]
1188 | return split_array
1189 |
1190 | def execute_generator_function(genObject):
1191 | for _ in genObject: pass
1192 |
1193 | def cli_infer(com):
1194 | # get VC first
1195 | com = cli_split_command(com)
1196 | model_name = com[0]
1197 | source_audio_path = com[1]
1198 | output_file_name = com[2]
1199 | feature_index_path = com[3]
1200 | f0_file = None # Not Implemented Yet
1201 |
1202 | # Get parameters for inference
1203 | speaker_id = int(com[4])
1204 | transposition = float(com[5])
1205 | f0_method = com[6]
1206 | crepe_hop_length = int(com[7])
1207 | harvest_median_filter = int(com[8])
1208 | resample = int(com[9])
1209 | mix = float(com[10])
1210 | feature_ratio = float(com[11])
1211 | protection_amnt = float(com[12])
1212 |
1213 | print("Mangio-RVC-Fork Infer-CLI: Starting the inference...")
1214 | vc_data = get_vc(model_name)
1215 | print(vc_data)
1216 | print("Mangio-RVC-Fork Infer-CLI: Performing inference...")
1217 | conversion_data = vc_single(
1218 | speaker_id,
1219 | source_audio_path,
1220 | transposition,
1221 | f0_file,
1222 | f0_method,
1223 | feature_index_path,
1224 | #feature_index_path,
1225 | feature_ratio,
1226 | harvest_median_filter,
1227 | resample,
1228 | mix,
1229 | protection_amnt,
1230 | crepe_hop_length,
1231 | )
1232 | if "Success." in conversion_data[0]:
1233 | print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name))
1234 | wavfile.write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1])
1235 | print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name))
1236 | else:
1237 | print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ")
1238 | print(conversion_data[0])
1239 |
1240 | def cli_pre_process(com):
1241 | com = cli_split_command(com)
1242 | model_name = com[0]
1243 | trainset_directory = com[1]
1244 | sample_rate = com[2]
1245 | num_processes = int(com[3])
1246 |
1247 | print("Mangio-RVC-Fork Pre-process: Starting...")
1248 | generator = preprocess_dataset(
1249 | trainset_directory,
1250 | model_name,
1251 | sample_rate,
1252 | num_processes
1253 | )
1254 | execute_generator_function(generator)
1255 | print("Mangio-RVC-Fork Pre-process: Finished")
1256 |
1257 | def cli_extract_feature(com):
1258 | com = cli_split_command(com)
1259 | model_name = com[0]
1260 | gpus = com[1]
1261 | num_processes = int(com[2])
1262 | has_pitch_guidance = True if (int(com[3]) == 1) else False
1263 | f0_method = com[4]
1264 | crepe_hop_length = int(com[5])
1265 | version = com[6] # v1 or v2
1266 |
1267 | print("Mangio-RVC-CLI: Extract Feature Has Pitch: " + str(has_pitch_guidance))
1268 | print("Mangio-RVC-CLI: Extract Feature Version: " + str(version))
1269 | print("Mangio-RVC-Fork Feature Extraction: Starting...")
1270 | generator = extract_f0_feature(
1271 | gpus,
1272 | num_processes,
1273 | f0_method,
1274 | has_pitch_guidance,
1275 | model_name,
1276 | version,
1277 | crepe_hop_length
1278 | )
1279 | execute_generator_function(generator)
1280 | print("Mangio-RVC-Fork Feature Extraction: Finished")
1281 |
1282 | def cli_train(com):
1283 | com = cli_split_command(com)
1284 | model_name = com[0]
1285 | sample_rate = com[1]
1286 | has_pitch_guidance = True if (int(com[2]) == 1) else False
1287 | speaker_id = int(com[3])
1288 | save_epoch_iteration = int(com[4])
1289 | total_epoch = int(com[5]) # 10000
1290 | batch_size = int(com[6])
1291 | gpu_card_slot_numbers = com[7]
1292 | if_save_latest = i18n("是") if (int(com[8]) == 1) else i18n("否")
1293 | if_cache_gpu = i18n("是") if (int(com[9]) == 1) else i18n("否")
1294 | if_save_every_weight = i18n("是") if (int(com[10]) == 1) else i18n("否")
1295 | version = com[11]
1296 |
1297 | pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/"
1298 |
1299 | g_pretrained_path = "%sf0G%s.pth" % (pretrained_base, sample_rate)
1300 | d_pretrained_path = "%sf0D%s.pth" % (pretrained_base, sample_rate)
1301 |
1302 | print("Mangio-RVC-Fork Train-CLI: Training...")
1303 | click_train(
1304 | model_name,
1305 | sample_rate,
1306 | has_pitch_guidance,
1307 | speaker_id,
1308 | save_epoch_iteration,
1309 | total_epoch,
1310 | batch_size,
1311 | if_save_latest,
1312 | g_pretrained_path,
1313 | d_pretrained_path,
1314 | gpu_card_slot_numbers,
1315 | if_cache_gpu,
1316 | if_save_every_weight,
1317 | version
1318 | )
1319 |
1320 | def cli_train_feature(com):
1321 | com = cli_split_command(com)
1322 | model_name = com[0]
1323 | version = com[1]
1324 | print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait")
1325 | generator = train_index(
1326 | model_name,
1327 | version
1328 | )
1329 | execute_generator_function(generator)
1330 | print("Mangio-RVC-Fork Train Feature Index-CLI: Done!")
1331 |
1332 | def cli_extract_model(com):
1333 | com = cli_split_command(com)
1334 | model_path = com[0]
1335 | save_name = com[1]
1336 | sample_rate = com[2]
1337 | has_pitch_guidance = com[3]
1338 | info = com[4]
1339 | version = com[5]
1340 | extract_small_model_process = extract_small_model(
1341 | model_path,
1342 | save_name,
1343 | sample_rate,
1344 | has_pitch_guidance,
1345 | info,
1346 | version
1347 | )
1348 | if extract_small_model_process == "Success.":
1349 | print("Mangio-RVC-Fork Extract Small Model: Success!")
1350 | else:
1351 | print(str(extract_small_model_process))
1352 | print("Mangio-RVC-Fork Extract Small Model: Failed!")
1353 |
1354 | def print_page_details():
1355 | if cli_current_page == "HOME":
1356 | print(" go home : Takes you back to home with a navigation list.")
1357 | print(" go infer : Takes you to inference command execution.\n")
1358 | print(" go pre-process : Takes you to training step.1) pre-process command execution.")
1359 | print(" go extract-feature : Takes you to training step.2) extract-feature command execution.")
1360 | print(" go train : Takes you to training step.3) being or continue training command execution.")
1361 | print(" go train-feature : Takes you to the train feature index command execution.\n")
1362 | print(" go extract-model : Takes you to the extract small model command execution.")
1363 | elif cli_current_page == "INFER":
1364 | print(" arg 1) model name with .pth in ./weights: mi-test.pth")
1365 | print(" arg 2) source audio path: myFolder\\MySource.wav")
1366 | print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav")
1367 | print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index")
1368 | print(" arg 5) speaker id: 0")
1369 | print(" arg 6) transposition: 0")
1370 | print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)")
1371 | print(" arg 8) crepe hop length: 160")
1372 | print(" arg 9) harvest median filter radius: 3 (0-7)")
1373 | print(" arg 10) post resample rate: 0")
1374 | print(" arg 11) mix volume envelope: 1")
1375 | print(" arg 12) feature index ratio: 0.78 (0-1)")
1376 | print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n")
1377 | print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33")
1378 | elif cli_current_page == "PRE-PROCESS":
1379 | print(" arg 1) Model folder name in ./logs: mi-test")
1380 | print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set")
1381 | print(" arg 3) Sample rate: 40k (32k, 40k, 48k)")
1382 | print(" arg 4) Number of CPU threads to use: 8 \n")
1383 | print("Example: mi-test mydataset 40k 24")
1384 | elif cli_current_page == "EXTRACT-FEATURE":
1385 | print(" arg 1) Model folder name in ./logs: mi-test")
1386 | print(" arg 2) Gpu card slot: 0 (0-1-2 if using 3 GPUs)")
1387 | print(" arg 3) Number of CPU threads to use: 8")
1388 | print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)")
1389 | print(" arg 5) f0 Method: harvest (pm, harvest, dio, crepe)")
1390 | print(" arg 6) Crepe hop length: 128")
1391 | print(" arg 7) Version for pre-trained models: v2 (use either v1 or v2)\n")
1392 | print("Example: mi-test 0 24 1 harvest 128 v2")
1393 | elif cli_current_page == "TRAIN":
1394 | print(" arg 1) Model folder name in ./logs: mi-test")
1395 | print(" arg 2) Sample rate: 40k (32k, 40k, 48k)")
1396 | print(" arg 3) Has Pitch Guidance?: 1 (0 for no, 1 for yes)")
1397 | print(" arg 4) speaker id: 0")
1398 | print(" arg 5) Save epoch iteration: 50")
1399 | print(" arg 6) Total epochs: 10000")
1400 | print(" arg 7) Batch size: 8")
1401 | print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)")
1402 | print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)")
1403 | print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)")
1404 | print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)")
1405 | print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n")
1406 | print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2")
1407 | elif cli_current_page == "TRAIN-FEATURE":
1408 | print(" arg 1) Model folder name in ./logs: mi-test")
1409 | print(" arg 2) Model architecture version: v2 (use either v1 or v2)\n")
1410 | print("Example: mi-test v2")
1411 | elif cli_current_page == "EXTRACT-MODEL":
1412 | print(" arg 1) Model Path: logs/mi-test/G_168000.pth")
1413 | print(" arg 2) Model save name: MyModel")
1414 | print(" arg 3) Sample rate: 40k (32k, 40k, 48k)")
1415 | print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)")
1416 | print(' arg 5) Model information: "My Model"')
1417 | print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n")
1418 | print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2')
1419 | print("")
1420 |
1421 | def change_page(page):
1422 | global cli_current_page
1423 | cli_current_page = page
1424 | return 0
1425 |
1426 | def execute_command(com):
1427 | if com == "go home":
1428 | return change_page("HOME")
1429 | elif com == "go infer":
1430 | return change_page("INFER")
1431 | elif com == "go pre-process":
1432 | return change_page("PRE-PROCESS")
1433 | elif com == "go extract-feature":
1434 | return change_page("EXTRACT-FEATURE")
1435 | elif com == "go train":
1436 | return change_page("TRAIN")
1437 | elif com == "go train-feature":
1438 | return change_page("TRAIN-FEATURE")
1439 | elif com == "go extract-model":
1440 | return change_page("EXTRACT-MODEL")
1441 | else:
1442 | if com[:3] == "go ":
1443 | print("page '%s' does not exist!" % com[3:])
1444 | return 0
1445 |
1446 | if cli_current_page == "INFER":
1447 | cli_infer(com)
1448 | elif cli_current_page == "PRE-PROCESS":
1449 | cli_pre_process(com)
1450 | elif cli_current_page == "EXTRACT-FEATURE":
1451 | cli_extract_feature(com)
1452 | elif cli_current_page == "TRAIN":
1453 | cli_train(com)
1454 | elif cli_current_page == "TRAIN-FEATURE":
1455 | cli_train_feature(com)
1456 | elif cli_current_page == "EXTRACT-MODEL":
1457 | cli_extract_model(com)
1458 |
1459 | def cli_navigation_loop():
1460 | while True:
1461 | print("You are currently in '%s':" % cli_current_page)
1462 | print_page_details()
1463 | command = input("%s: " % cli_current_page)
1464 | try:
1465 | execute_command(command)
1466 | except:
1467 | print(traceback.format_exc())
1468 |
1469 | if(config.is_cli):
1470 | print("\n\nMangio-RVC-Fork v2 CLI App!\n")
1471 | print("Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n")
1472 | cli_navigation_loop()
1473 |
1474 | #endregion
1475 |
1476 | #region RVC WebUI App
1477 |
1478 | def get_presets():
1479 | data = None
1480 | with open('../inference-presets.json', 'r') as file:
1481 | data = json.load(file)
1482 | preset_names = []
1483 | for preset in data['presets']:
1484 | preset_names.append(preset['name'])
1485 |
1486 | return preset_names
1487 |
1488 | def change_choices2():
1489 | audio_files=[]
1490 | for filename in os.listdir("./audios"):
1491 | if filename.endswith(('.wav','.mp3')):
1492 | audio_files.append(os.path.join('./audios',filename))
1493 | return {"choices": sorted(audio_files), "__type__": "update"}
1494 |
1495 | audio_files=[]
1496 | for filename in os.listdir("./audios"):
1497 | if filename.endswith(('.wav','.mp3')):
1498 | audio_files.append(filename)
1499 |
1500 | def get_index():
1501 | if check_for_name() != '':
1502 | if config.iscolab:
1503 | chosen_model=sorted(names)[0].split(".")[0]
1504 | logs_path="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+chosen_model
1505 | for file in os.listdir(logs_path):
1506 | if file.endswith(".index"):
1507 | return os.path.join(logs_path, file)
1508 | return ''
1509 | else:
1510 | return ''
1511 |
1512 | def get_indexes():
1513 | indexes_list=[]
1514 | if config.iscolab:
1515 | for dirpath, dirnames, filenames in os.walk("/content/Retrieval-based-Voice-Conversion-WebUI/logs/"):
1516 | for filename in filenames:
1517 | if filename.endswith(".index"):
1518 | indexes_list.append(os.path.join(dirpath,filename))
1519 | return indexes_list
1520 | else:
1521 | return ''
1522 |
1523 | def get_name():
1524 | if len(audio_files) > 0:
1525 | return sorted(audio_files)[0]
1526 | else:
1527 | return ''
1528 |
1529 | def save_to_wav(record_button):
1530 | if record_button is None:
1531 | pass
1532 | else:
1533 | path_to_file=record_button
1534 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
1535 | new_path='./audios/'+new_name
1536 | shutil.move(path_to_file,new_path)
1537 | return new_path
1538 |
1539 | def save_to_wav2(dropbox):
1540 | file_path=dropbox.name
1541 | shutil.move(file_path,'./audios')
1542 | return os.path.join('./audios',os.path.basename(file_path))
1543 |
1544 | def match_index(speaker):
1545 | folder=speaker.split(".")[0]
1546 | parent_dir="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+folder
1547 | for filename in os.listdir(parent_dir):
1548 | if filename.endswith(".index"):
1549 | index_path=os.path.join(parent_dir,filename)
1550 | return index_path
1551 |
1552 | def check_for_name():
1553 | if len(names) > 0:
1554 | return sorted(names)[0]
1555 | else:
1556 | return ''
1557 |
1558 | def download_from_url(url, model):
1559 | url = url.strip()
1560 | if url == '':
1561 | return "URL cannot be left empty."
1562 | zip_dirs = ["zips", "unzips"]
1563 | for directory in zip_dirs:
1564 | if os.path.exists(directory):
1565 | shutil.rmtree(directory)
1566 | os.makedirs("zips", exist_ok=True)
1567 | os.makedirs("unzips", exist_ok=True)
1568 | zipfile = model + '.zip'
1569 | zipfile_path = './zips/' + zipfile
1570 | MODELEPOCH = ''
1571 | if "drive.google.com" in url:
1572 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
1573 | elif "mega.nz" in url:
1574 | m = Mega()
1575 | m.download_url(url, './zips')
1576 | else:
1577 | subprocess.run(["wget", url, "-O", f"./zips/{zipfile}"])
1578 | for filename in os.listdir("./zips"):
1579 | if filename.endswith(".zip"):
1580 | zipfile_path = os.path.join("./zips/",filename)
1581 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
1582 | else:
1583 | return "No zipfile found."
1584 | for root, dirs, files in os.walk('./unzips'):
1585 | for file in files:
1586 | if "G_" in file:
1587 | MODELEPOCH = file.split("G_")[1].split(".")[0]
1588 | if MODELEPOCH == '':
1589 | MODELEPOCH = '404'
1590 | for file in files:
1591 | file_path = os.path.join(root, file)
1592 | if file.endswith(".npy") or file.endswith(".index"):
1593 | subprocess.run(["mkdir", "-p", f"./logs/{model}"])
1594 | subprocess.run(["mv", file_path, f"./logs/{model}/"])
1595 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
1596 | subprocess.run(["mv", file_path, f"./weights/{model}.pth"])
1597 | shutil.rmtree("zips")
1598 | shutil.rmtree("unzips")
1599 | return "Success."
1600 |
1601 | with gr.Blocks(theme=gr.themes.Base()) as app:
1602 | with gr.Tabs():
1603 | with gr.TabItem("Inference"):
1604 | gr.HTML("
Easy GUI v2 (rejekts) - adapted to Mangio-RVC-Fork 💻
")
1605 | # Inference Preset Row
1606 | # with gr.Row():
1607 | # mangio_preset = gr.Dropdown(label="Inference Preset", choices=sorted(get_presets()))
1608 | # mangio_preset_name_save = gr.Textbox(
1609 | # label="Your preset name"
1610 | # )
1611 | # mangio_preset_save_btn = gr.Button('Save Preset', variant="primary")
1612 |
1613 | # Other RVC stuff
1614 | with gr.Row():
1615 | sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name())
1616 | refresh_button = gr.Button("Refresh", variant="primary")
1617 | if check_for_name() != '':
1618 | get_vc(sorted(names)[0])
1619 | vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0)
1620 | #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
1621 | spk_item = gr.Slider(
1622 | minimum=0,
1623 | maximum=2333,
1624 | step=1,
1625 | label=i18n("请选择说话人id"),
1626 | value=0,
1627 | visible=False,
1628 | interactive=True,
1629 | )
1630 | #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
1631 | sid0.change(
1632 | fn=get_vc,
1633 | inputs=[sid0],
1634 | outputs=[spk_item],
1635 | )
1636 | but0 = gr.Button("Convert", variant="primary")
1637 | with gr.Row():
1638 | with gr.Column():
1639 | with gr.Row():
1640 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
1641 | with gr.Row():
1642 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
1643 | with gr.Row():
1644 | input_audio0 = gr.Dropdown(
1645 | label="2.Choose your audio.",
1646 | value="./audios/someguy.mp3",
1647 | )
1648 | dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
1649 | dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
1650 | refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
1651 | refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0])
1652 | record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
1653 | record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
1654 | with gr.Column():
1655 | with gr.Accordion("Index Settings", open=True):
1656 | file_index1 = gr.Dropdown(
1657 | label="3. Path to your added.index file (if it didn't automatically find it.)",
1658 | choices=get_indexes(),
1659 | value=get_index(),
1660 | interactive=True,
1661 | )
1662 | refresh_button.click(
1663 | fn=change_choices, inputs=[], outputs=[sid0, file_index1]
1664 | )
1665 | # file_big_npy1 = gr.Textbox(
1666 | # label=i18n("特征文件路径"),
1667 | # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1668 | # interactive=True,
1669 | # )
1670 | index_rate1 = gr.Slider(
1671 | minimum=0,
1672 | maximum=1,
1673 | label=i18n("检索特征占比"),
1674 | value=0.66,
1675 | interactive=True,
1676 | )
1677 | vc_output2 = gr.Audio(label="Output Audio (Click on the Three Dots in the Right Corner to Download)")
1678 | f0method0 = gr.Radio(
1679 | label="Optional: Change the Pitch Extraction Algorithm. Use PM for fast results or Harvest for better low range (slower results) or Crepe for the best of both worlds.",
1680 | choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny"], # Fork Feature. Add Crepe-Tiny
1681 | value="pm",
1682 | interactive=True,
1683 | )
1684 | with gr.Accordion("More", open=False):
1685 | crepe_hop_length = gr.Slider(
1686 | minimum=1,
1687 | maximum=512,
1688 | step=1,
1689 | label=i18n("crepe_hop_length"),
1690 | value=160,
1691 | interactive=True
1692 | )
1693 | filter_radius0 = gr.Slider(
1694 | minimum=0,
1695 | maximum=7,
1696 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
1697 | value=3,
1698 | step=1,
1699 | interactive=True,
1700 | )
1701 | resample_sr0 = gr.Slider(
1702 | minimum=0,
1703 | maximum=48000,
1704 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
1705 | value=0,
1706 | step=1,
1707 | interactive=True,
1708 | )
1709 | rms_mix_rate0 = gr.Slider(
1710 | minimum=0,
1711 | maximum=1,
1712 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
1713 | value=1,
1714 | interactive=True,
1715 | )
1716 | protect0 = gr.Slider(
1717 | minimum=0,
1718 | maximum=0.5,
1719 | label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
1720 | value=0.33,
1721 | step=0.01,
1722 | interactive=True,
1723 | )
1724 | with gr.Row():
1725 | vc_output1 = gr.Textbox("")
1726 | f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
1727 |
1728 | but0.click(
1729 | vc_single,
1730 | [
1731 | spk_item,
1732 | input_audio0,
1733 | vc_transform0,
1734 | f0_file,
1735 | f0method0,
1736 | file_index1,
1737 | # file_index2,
1738 | # file_big_npy1,
1739 | index_rate1,
1740 | filter_radius0,
1741 | resample_sr0,
1742 | rms_mix_rate0,
1743 | protect0,
1744 | crepe_hop_length
1745 | ],
1746 | [vc_output1, vc_output2],
1747 | )
1748 | with gr.Accordion("Batch Conversion",open=False):
1749 | with gr.Row():
1750 | with gr.Column():
1751 | vc_transform1 = gr.Number(
1752 | label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
1753 | )
1754 | opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
1755 | f0method1 = gr.Radio(
1756 | label=i18n(
1757 | "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
1758 | ),
1759 | choices=["pm", "harvest", "crepe"],
1760 | value="pm",
1761 | interactive=True,
1762 | )
1763 | filter_radius1 = gr.Slider(
1764 | minimum=0,
1765 | maximum=7,
1766 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
1767 | value=3,
1768 | step=1,
1769 | interactive=True,
1770 | )
1771 | with gr.Column():
1772 | file_index3 = gr.Textbox(
1773 | label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
1774 | value="",
1775 | interactive=True,
1776 | )
1777 | file_index4 = gr.Dropdown(
1778 | label=i18n("自动检测index路径,下拉式选择(dropdown)"),
1779 | choices=sorted(index_paths),
1780 | interactive=True,
1781 | )
1782 | refresh_button.click(
1783 | fn=lambda: change_choices()[1],
1784 | inputs=[],
1785 | outputs=file_index4,
1786 | )
1787 | # file_big_npy2 = gr.Textbox(
1788 | # label=i18n("特征文件路径"),
1789 | # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1790 | # interactive=True,
1791 | # )
1792 | index_rate2 = gr.Slider(
1793 | minimum=0,
1794 | maximum=1,
1795 | label=i18n("检索特征占比"),
1796 | value=1,
1797 | interactive=True,
1798 | )
1799 | with gr.Column():
1800 | resample_sr1 = gr.Slider(
1801 | minimum=0,
1802 | maximum=48000,
1803 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
1804 | value=0,
1805 | step=1,
1806 | interactive=True,
1807 | )
1808 | rms_mix_rate1 = gr.Slider(
1809 | minimum=0,
1810 | maximum=1,
1811 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
1812 | value=1,
1813 | interactive=True,
1814 | )
1815 | protect1 = gr.Slider(
1816 | minimum=0,
1817 | maximum=0.5,
1818 | label=i18n(
1819 | "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
1820 | ),
1821 | value=0.33,
1822 | step=0.01,
1823 | interactive=True,
1824 | )
1825 | with gr.Column():
1826 | dir_input = gr.Textbox(
1827 | label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
1828 | value="E:\codes\py39\\test-20230416b\\todo-songs",
1829 | )
1830 | inputs = gr.File(
1831 | file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
1832 | )
1833 | with gr.Row():
1834 | format1 = gr.Radio(
1835 | label=i18n("导出文件格式"),
1836 | choices=["wav", "flac", "mp3", "m4a"],
1837 | value="flac",
1838 | interactive=True,
1839 | )
1840 | but1 = gr.Button(i18n("转换"), variant="primary")
1841 | vc_output3 = gr.Textbox(label=i18n("输出信息"))
1842 | but1.click(
1843 | vc_multi,
1844 | [
1845 | spk_item,
1846 | dir_input,
1847 | opt_input,
1848 | inputs,
1849 | vc_transform1,
1850 | f0method1,
1851 | file_index3,
1852 | file_index4,
1853 | # file_big_npy2,
1854 | index_rate2,
1855 | filter_radius1,
1856 | resample_sr1,
1857 | rms_mix_rate1,
1858 | protect1,
1859 | format1,
1860 | crepe_hop_length,
1861 | ],
1862 | [vc_output3],
1863 | )
1864 | with gr.TabItem("Download Model"):
1865 | with gr.Row():
1866 | url=gr.Textbox(label="Enter the URL to the Model:")
1867 | with gr.Row():
1868 | model = gr.Textbox(label="Name your model:")
1869 | download_button=gr.Button(label="Download")
1870 | with gr.Row():
1871 | status_bar=gr.Textbox(label="")
1872 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
1873 | with gr.Row():
1874 | gr.Markdown(
1875 | """
1876 | Original RVC:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI
1877 | Mangio's RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork
1878 | ❤️ If you like the EasyGUI, help me keep it.❤️
1879 | https://paypal.me/lesantillan
1880 | """
1881 | )
1882 | '''
1883 | with gr.TabItem("Train", visible=False):
1884 | with gr.Row():
1885 | exp_dir1 = gr.Textbox(label="Voice Name:", value="My-Voice")
1886 | sr2 = gr.Radio(
1887 | label=i18n("目标采样率"),
1888 | choices=["40k", "48k"],
1889 | value="40k",
1890 | interactive=True,
1891 | visible=False
1892 | )
1893 | if_f0_3 = gr.Radio(
1894 | label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"),
1895 | choices=[True, False],
1896 | value=True,
1897 | interactive=True,
1898 | visible=False
1899 | )
1900 | version19 = gr.Radio(
1901 | label=i18n("版本(目前仅40k支持了v2)"),
1902 | choices=["v1", "v2"],
1903 | value="v2",
1904 | interactive=True,
1905 | visible=False,
1906 | )
1907 | np7 = gr.Slider(
1908 | minimum=0,
1909 | maximum=config.n_cpu,
1910 | step=1,
1911 | label=i18n("提取音高和处理数据使用的CPU进程数"),
1912 | value=config.n_cpu,
1913 | interactive=True,
1914 | )
1915 | with gr.Group(): # 暂时单人的, 后面支持最多4人的#数据处理
1916 | with gr.Row():
1917 | trainset_dir4 = gr.Textbox(
1918 | label=i18n("输入训练文件夹路径"), value="/content/dataset"
1919 | )
1920 | spk_id5 = gr.Slider(
1921 | minimum=0,
1922 | maximum=4,
1923 | step=1,
1924 | label=i18n("请指定说话人id"),
1925 | value=0,
1926 | interactive=True,
1927 | visible=False
1928 | )
1929 | but1 = gr.Button(i18n("处理数据"), variant="primary")
1930 | info1 = gr.Textbox(label=i18n("输出信息"), value="")
1931 | but1.click(
1932 | preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
1933 | )
1934 | with gr.Group():
1935 | #gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
1936 | with gr.Row():
1937 | with gr.Accordion('GPU Settings', open=False):
1938 | gpus6 = gr.Textbox(
1939 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
1940 | value=gpus,
1941 | interactive=True,
1942 | )
1943 | gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info)
1944 | with gr.Column():
1945 | f0method8 = gr.Radio(
1946 | label=i18n(
1947 | "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢"
1948 | ),
1949 | choices=["pm", "harvest", "dio", "crepe", "mangio-crepe"], # Fork feature: Crepe on f0 extraction for training.
1950 | value="harvest",
1951 | interactive=True,
1952 | )
1953 | extraction_crepe_hop_length = gr.Slider(
1954 | minimum=1,
1955 | maximum=512,
1956 | step=1,
1957 | label=i18n("crepe_hop_length"),
1958 | value=128,
1959 | interactive=True
1960 | )
1961 | but2 = gr.Button(i18n("特征提取"), variant="primary")
1962 | info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
1963 | but2.click(
1964 | extract_f0_feature,
1965 | [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length],
1966 | [info2],
1967 | )
1968 | with gr.Group():
1969 | #gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引"))
1970 | with gr.Row():
1971 | save_epoch10 = gr.Slider(
1972 | minimum=0,
1973 | maximum=50,
1974 | step=1,
1975 | label="Create a backup every # of epochs:",
1976 | value=10,
1977 | interactive=True,
1978 | )
1979 | total_epoch11 = gr.Slider(
1980 | minimum=0,
1981 | maximum=10000,
1982 | step=1,
1983 | label=i18n("总训练轮数total_epoch"),
1984 | value=100,
1985 | interactive=True,
1986 | )
1987 | batch_size12 = gr.Slider(
1988 | minimum=1,
1989 | maximum=40,
1990 | step=1,
1991 | label=i18n("每张显卡的batch_size"),
1992 | value=default_batch_size,
1993 | interactive=True,
1994 | )
1995 | if_save_latest13 = gr.Radio(
1996 | label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"),
1997 | choices=[i18n("是"), i18n("否")],
1998 | value=i18n("是"),
1999 | interactive=True,
2000 | )
2001 | if_cache_gpu17 = gr.Radio(
2002 | label=i18n(
2003 | "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速"
2004 | ),
2005 | choices=[i18n("是"), i18n("否")],
2006 | value=i18n("否"),
2007 | interactive=True,
2008 | )
2009 | if_save_every_weights18 = gr.Radio(
2010 | label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
2011 | choices=[i18n("是"), i18n("否")],
2012 | value=i18n("否"),
2013 | interactive=True,
2014 | )
2015 | with gr.Row():
2016 | with gr.Accordion("Advanced", open=False):
2017 | pretrained_G14 = gr.Textbox(
2018 | label=i18n("加载预训练底模G路径"),
2019 | value="pretrained/f0G40k.pth",
2020 | interactive=True,
2021 | )
2022 | pretrained_D15 = gr.Textbox(
2023 | label=i18n("加载预训练底模D路径"),
2024 | value="pretrained/f0D40k.pth",
2025 | interactive=True,
2026 | )
2027 | sr2.change(
2028 | change_sr2,
2029 | [sr2, if_f0_3, version19],
2030 | [pretrained_G14, pretrained_D15, version19],
2031 | )
2032 | version19.change(
2033 | change_version19,
2034 | [sr2, if_f0_3, version19],
2035 | [pretrained_G14, pretrained_D15],
2036 | )
2037 | if_f0_3.change(
2038 | change_f0,
2039 | [if_f0_3, sr2, version19],
2040 | [f0method8, pretrained_G14, pretrained_D15],
2041 | )
2042 | gpus16 = gr.Textbox(
2043 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
2044 | value=gpus,
2045 | interactive=True,
2046 | )
2047 | but3 = gr.Button(i18n("训练模型"), variant="primary")
2048 | but4 = gr.Button(i18n("训练特征索引"), variant="primary")
2049 | but5 = gr.Button(i18n("一键训练"), variant="primary", visible=False)
2050 | info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10)
2051 | but3.click(
2052 | click_train,
2053 | [
2054 | exp_dir1,
2055 | sr2,
2056 | if_f0_3,
2057 | spk_id5,
2058 | save_epoch10,
2059 | total_epoch11,
2060 | batch_size12,
2061 | if_save_latest13,
2062 | pretrained_G14,
2063 | pretrained_D15,
2064 | gpus16,
2065 | if_cache_gpu17,
2066 | if_save_every_weights18,
2067 | version19,
2068 | ],
2069 | info3,
2070 | )
2071 | but4.click(train_index, [exp_dir1, version19], info3)
2072 | but5.click(
2073 | train1key,
2074 | [
2075 | exp_dir1,
2076 | sr2,
2077 | if_f0_3,
2078 | trainset_dir4,
2079 | spk_id5,
2080 | np7,
2081 | f0method8,
2082 | save_epoch10,
2083 | total_epoch11,
2084 | batch_size12,
2085 | if_save_latest13,
2086 | pretrained_G14,
2087 | pretrained_D15,
2088 | gpus16,
2089 | if_cache_gpu17,
2090 | if_save_every_weights18,
2091 | version19,
2092 | extraction_crepe_hop_length
2093 | ],
2094 | info3,
2095 | )
2096 |
2097 |
2098 | try:
2099 | if tab_faq == "常见问题解答":
2100 | with open("docs/faq.md", "r", encoding="utf8") as f:
2101 | info = f.read()
2102 | else:
2103 | with open("docs/faq_en.md", "r", encoding="utf8") as f:
2104 | info = f.read()
2105 | gr.Markdown(value=info)
2106 | except:
2107 | gr.Markdown(traceback.format_exc())
2108 | '''
2109 |
2110 | #region Mangio Preset Handler Region
2111 | def save_preset(
2112 | preset_name,
2113 | sid0,
2114 | vc_transform,
2115 | input_audio,
2116 | f0method,
2117 | crepe_hop_length,
2118 | filter_radius,
2119 | file_index1,
2120 | file_index2,
2121 | index_rate,
2122 | resample_sr,
2123 | rms_mix_rate,
2124 | protect,
2125 | f0_file
2126 | ):
2127 | data = None
2128 | with open('../inference-presets.json', 'r') as file:
2129 | data = json.load(file)
2130 | preset_json = {
2131 | 'name': preset_name,
2132 | 'model': sid0,
2133 | 'transpose': vc_transform,
2134 | 'audio_file': input_audio,
2135 | 'f0_method': f0method,
2136 | 'crepe_hop_length': crepe_hop_length,
2137 | 'median_filtering': filter_radius,
2138 | 'feature_path': file_index1,
2139 | 'auto_feature_path': file_index2,
2140 | 'search_feature_ratio': index_rate,
2141 | 'resample': resample_sr,
2142 | 'volume_envelope': rms_mix_rate,
2143 | 'protect_voiceless': protect,
2144 | 'f0_file_path': f0_file
2145 | }
2146 | data['presets'].append(preset_json)
2147 | with open('../inference-presets.json', 'w') as file:
2148 | json.dump(data, file)
2149 | file.flush()
2150 | print("Saved Preset %s into inference-presets.json!" % preset_name)
2151 |
2152 |
2153 | def on_preset_changed(preset_name):
2154 | print("Changed Preset to %s!" % preset_name)
2155 | data = None
2156 | with open('../inference-presets.json', 'r') as file:
2157 | data = json.load(file)
2158 |
2159 | print("Searching for " + preset_name)
2160 | returning_preset = None
2161 | for preset in data['presets']:
2162 | if(preset['name'] == preset_name):
2163 | print("Found a preset")
2164 | returning_preset = preset
2165 | # return all new input values
2166 | return (
2167 | # returning_preset['model'],
2168 | # returning_preset['transpose'],
2169 | # returning_preset['audio_file'],
2170 | # returning_preset['f0_method'],
2171 | # returning_preset['crepe_hop_length'],
2172 | # returning_preset['median_filtering'],
2173 | # returning_preset['feature_path'],
2174 | # returning_preset['auto_feature_path'],
2175 | # returning_preset['search_feature_ratio'],
2176 | # returning_preset['resample'],
2177 | # returning_preset['volume_envelope'],
2178 | # returning_preset['protect_voiceless'],
2179 | # returning_preset['f0_file_path']
2180 | )
2181 |
2182 | # Preset State Changes
2183 |
2184 | # This click calls save_preset that saves the preset into inference-presets.json with the preset name
2185 | # mangio_preset_save_btn.click(
2186 | # fn=save_preset,
2187 | # inputs=[
2188 | # mangio_preset_name_save,
2189 | # sid0,
2190 | # vc_transform0,
2191 | # input_audio0,
2192 | # f0method0,
2193 | # crepe_hop_length,
2194 | # filter_radius0,
2195 | # file_index1,
2196 | # file_index2,
2197 | # index_rate1,
2198 | # resample_sr0,
2199 | # rms_mix_rate0,
2200 | # protect0,
2201 | # f0_file
2202 | # ],
2203 | # outputs=[]
2204 | # )
2205 |
2206 | # mangio_preset.change(
2207 | # on_preset_changed,
2208 | # inputs=[
2209 | # # Pass inputs here
2210 | # mangio_preset
2211 | # ],
2212 | # outputs=[
2213 | # # Pass Outputs here. These refer to the gradio elements that we want to directly change
2214 | # # sid0,
2215 | # # vc_transform0,
2216 | # # input_audio0,
2217 | # # f0method0,
2218 | # # crepe_hop_length,
2219 | # # filter_radius0,
2220 | # # file_index1,
2221 | # # file_index2,
2222 | # # index_rate1,
2223 | # # resample_sr0,
2224 | # # rms_mix_rate0,
2225 | # # protect0,
2226 | # # f0_file
2227 | # ]
2228 | # )
2229 |
2230 |
2231 | # with gr.TabItem(i18n("招募音高曲线前端编辑器")):
2232 | # gr.Markdown(value=i18n("加开发群联系我xxxxx"))
2233 | # with gr.TabItem(i18n("点击查看交流、问题反馈群号")):
2234 | # gr.Markdown(value=i18n("xxxxx"))
2235 |
2236 |
2237 | if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE)
2238 | app.queue(concurrency_count=511, max_size=1022).launch(share=True)
2239 | else:
2240 | app.queue(concurrency_count=511, max_size=1022).launch(
2241 | server_name="0.0.0.0",
2242 | inbrowser=not config.noautoopen,
2243 | server_port=config.listen_port,
2244 | quiet=True,
2245 | )
2246 |
--------------------------------------------------------------------------------
/GUI.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import datetime, subprocess
3 | from mega import Mega
4 | now_dir = os.getcwd()
5 | sys.path.append(now_dir)
6 | import logging
7 | import shutil
8 | import threading
9 | import traceback
10 | import warnings
11 | from random import shuffle
12 | from subprocess import Popen
13 | from time import sleep
14 | import json
15 | import pathlib
16 |
17 | import fairseq
18 | import faiss
19 | import gradio as gr
20 | import numpy as np
21 | import torch
22 | from dotenv import load_dotenv
23 | from sklearn.cluster import MiniBatchKMeans
24 |
25 | from configs.config import Config
26 | from i18n.i18n import I18nAuto
27 | from infer.lib.train.process_ckpt import (
28 | change_info,
29 | extract_small_model,
30 | merge,
31 | show_info,
32 | )
33 | from infer.modules.uvr5.modules import uvr
34 | from infer.modules.vc.modules import VC
35 |
36 | logging.getLogger("numba").setLevel(logging.WARNING)
37 |
38 | logger = logging.getLogger(__name__)
39 |
40 | tmp = os.path.join(now_dir, "TEMP")
41 | shutil.rmtree(tmp, ignore_errors=True)
42 | shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
43 | shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
44 | os.makedirs(tmp, exist_ok=True)
45 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
46 | os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
47 | os.environ["TEMP"] = tmp
48 | warnings.filterwarnings("ignore")
49 | torch.manual_seed(114514)
50 |
51 |
52 | load_dotenv()
53 | config = Config()
54 | vc = VC(config)
55 |
56 |
57 | if config.dml == True:
58 |
59 | def forward_dml(ctx, x, scale):
60 | ctx.scale = scale
61 | res = x.clone().detach()
62 | return res
63 |
64 | fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
65 | i18n = I18nAuto()
66 | logger.info(i18n)
67 | # 判断是否有能用来训练和加速推理的N卡
68 | ngpu = torch.cuda.device_count()
69 | gpu_infos = []
70 | mem = []
71 | if_gpu_ok = False
72 |
73 | if torch.cuda.is_available() or ngpu != 0:
74 | for i in range(ngpu):
75 | gpu_name = torch.cuda.get_device_name(i)
76 | if any(
77 | value in gpu_name.upper()
78 | for value in [
79 | "10",
80 | "16",
81 | "20",
82 | "30",
83 | "40",
84 | "A2",
85 | "A3",
86 | "A4",
87 | "P4",
88 | "A50",
89 | "500",
90 | "A60",
91 | "70",
92 | "80",
93 | "90",
94 | "M4",
95 | "T4",
96 | "TITAN",
97 | ]
98 | ):
99 | # A10#A100#V100#A40#P40#M40#K80#A4500
100 | if_gpu_ok = True # 至少有一张能用的N卡
101 | gpu_infos.append("%s\t%s" % (i, gpu_name))
102 | mem.append(
103 | int(
104 | torch.cuda.get_device_properties(i).total_memory
105 | / 1024
106 | / 1024
107 | / 1024
108 | + 0.4
109 | )
110 | )
111 | if if_gpu_ok and len(gpu_infos) > 0:
112 | gpu_info = "\n".join(gpu_infos)
113 | default_batch_size = min(mem) // 2
114 | else:
115 | gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
116 | default_batch_size = 1
117 | gpus = "-".join([i[0] for i in gpu_infos])
118 |
119 |
120 | class ToolButton(gr.Button, gr.components.FormComponent):
121 | """Small button with single emoji as text, fits inside gradio forms"""
122 |
123 | def __init__(self, **kwargs):
124 | super().__init__(variant="tool", **kwargs)
125 |
126 | def get_block_name(self):
127 | return "button"
128 |
129 |
130 | weight_root = os.getenv("weight_root")
131 | weight_uvr5_root = os.getenv("weight_uvr5_root")
132 | index_root = os.getenv("index_root")
133 |
134 | names = []
135 | for name in os.listdir(weight_root):
136 | if name.endswith(".pth"):
137 | names.append(name)
138 | index_paths = []
139 | for root, dirs, files in os.walk(index_root, topdown=False):
140 | for name in files:
141 | if name.endswith(".index") and "trained" not in name:
142 | index_paths.append("%s/%s" % (root, name))
143 | uvr5_names = []
144 | for name in os.listdir(weight_uvr5_root):
145 | if name.endswith(".pth") or "onnx" in name:
146 | uvr5_names.append(name.replace(".pth", ""))
147 |
148 |
149 | def change_choices():
150 | names = []
151 | for name in os.listdir(weight_root):
152 | if name.endswith(".pth"):
153 | names.append(name)
154 | index_paths = []
155 | for root, dirs, files in os.walk(index_root, topdown=False):
156 | for name in files:
157 | if name.endswith(".index") and "trained" not in name:
158 | index_paths.append("%s/%s" % (root, name))
159 | audio_files=[]
160 | for filename in os.listdir("./audios"):
161 | if filename.endswith(('.wav','.mp3','.ogg')):
162 | audio_files.append(filename)
163 | return {"choices": sorted(names), "__type__": "update"}, {
164 | "choices": sorted(index_paths),
165 | "__type__": "update",
166 | }, {"choices": sorted(audio_files), "__type__": "update"}
167 |
168 | def clean():
169 | return {"value": "", "__type__": "update"}
170 |
171 |
172 | def export_onnx():
173 | from infer.modules.onnx.export import export_onnx as eo
174 |
175 | eo()
176 |
177 |
178 | sr_dict = {
179 | "32k": 32000,
180 | "40k": 40000,
181 | "48k": 48000,
182 | }
183 |
184 |
185 | def if_done(done, p):
186 | while 1:
187 | if p.poll() is None:
188 | sleep(0.5)
189 | else:
190 | break
191 | done[0] = True
192 |
193 |
194 | def if_done_multi(done, ps):
195 | while 1:
196 | # poll==None代表进程未结束
197 | # 只要有一个进程未结束都不停
198 | flag = 1
199 | for p in ps:
200 | if p.poll() is None:
201 | flag = 0
202 | sleep(0.5)
203 | break
204 | if flag == 1:
205 | break
206 | done[0] = True
207 |
208 |
209 | def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
210 | sr = sr_dict[sr]
211 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
212 | f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
213 | f.close()
214 | per = 3.0 if config.is_half else 3.7
215 | cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
216 | config.python_cmd,
217 | trainset_dir,
218 | sr,
219 | n_p,
220 | now_dir,
221 | exp_dir,
222 | config.noparallel,
223 | per,
224 | )
225 | logger.info(cmd)
226 | p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
227 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
228 | done = [False]
229 | threading.Thread(
230 | target=if_done,
231 | args=(
232 | done,
233 | p,
234 | ),
235 | ).start()
236 | while 1:
237 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
238 | yield (f.read())
239 | sleep(1)
240 | if done[0]:
241 | break
242 | with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
243 | log = f.read()
244 | logger.info(log)
245 | yield log
246 |
247 |
248 | # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
249 | def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe):
250 | gpus = gpus.split("-")
251 | os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
252 | f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
253 | f.close()
254 | if if_f0:
255 | if f0method != "rmvpe_gpu":
256 | cmd = (
257 | '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s'
258 | % (
259 | config.python_cmd,
260 | now_dir,
261 | exp_dir,
262 | n_p,
263 | f0method,
264 | )
265 | )
266 | logger.info(cmd)
267 | p = Popen(
268 | cmd, shell=True, cwd=now_dir
269 | ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE
270 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
271 | done = [False]
272 | threading.Thread(
273 | target=if_done,
274 | args=(
275 | done,
276 | p,
277 | ),
278 | ).start()
279 | else:
280 | if gpus_rmvpe != "-":
281 | gpus_rmvpe = gpus_rmvpe.split("-")
282 | leng = len(gpus_rmvpe)
283 | ps = []
284 | for idx, n_g in enumerate(gpus_rmvpe):
285 | cmd = (
286 | '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
287 | % (
288 | config.python_cmd,
289 | leng,
290 | idx,
291 | n_g,
292 | now_dir,
293 | exp_dir,
294 | config.is_half,
295 | )
296 | )
297 | logger.info(cmd)
298 | p = Popen(
299 | cmd, shell=True, cwd=now_dir
300 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
301 | ps.append(p)
302 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
303 | done = [False]
304 | threading.Thread(
305 | target=if_done_multi, #
306 | args=(
307 | done,
308 | ps,
309 | ),
310 | ).start()
311 | else:
312 | cmd = (
313 | config.python_cmd
314 | + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" '
315 | % (
316 | now_dir,
317 | exp_dir,
318 | )
319 | )
320 | logger.info(cmd)
321 | p = Popen(
322 | cmd, shell=True, cwd=now_dir
323 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
324 | p.wait()
325 | done = [True]
326 | while 1:
327 | with open(
328 | "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
329 | ) as f:
330 | yield (f.read())
331 | sleep(1)
332 | if done[0]:
333 | break
334 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
335 | log = f.read()
336 | logger.info(log)
337 | yield log
338 | ####对不同part分别开多进程
339 | """
340 | n_part=int(sys.argv[1])
341 | i_part=int(sys.argv[2])
342 | i_gpu=sys.argv[3]
343 | exp_dir=sys.argv[4]
344 | os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
345 | """
346 | leng = len(gpus)
347 | ps = []
348 | for idx, n_g in enumerate(gpus):
349 | cmd = (
350 | '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s'
351 | % (
352 | config.python_cmd,
353 | config.device,
354 | leng,
355 | idx,
356 | n_g,
357 | now_dir,
358 | exp_dir,
359 | version19,
360 | )
361 | )
362 | logger.info(cmd)
363 | p = Popen(
364 | cmd, shell=True, cwd=now_dir
365 | ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
366 | ps.append(p)
367 | ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
368 | done = [False]
369 | threading.Thread(
370 | target=if_done_multi,
371 | args=(
372 | done,
373 | ps,
374 | ),
375 | ).start()
376 | while 1:
377 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
378 | yield (f.read())
379 | sleep(1)
380 | if done[0]:
381 | break
382 | with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
383 | log = f.read()
384 | logger.info(log)
385 | yield log
386 |
387 |
388 | def get_pretrained_models(path_str, f0_str, sr2):
389 | if_pretrained_generator_exist = os.access(
390 | "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
391 | )
392 | if_pretrained_discriminator_exist = os.access(
393 | "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
394 | )
395 | if not if_pretrained_generator_exist:
396 | logger.warn(
397 | "assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model",
398 | path_str,
399 | f0_str,
400 | sr2,
401 | )
402 | if not if_pretrained_discriminator_exist:
403 | logger.warn(
404 | "assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model",
405 | path_str,
406 | f0_str,
407 | sr2,
408 | )
409 | return (
410 | "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
411 | if if_pretrained_generator_exist
412 | else "",
413 | "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
414 | if if_pretrained_discriminator_exist
415 | else "",
416 | )
417 |
418 |
419 | def change_sr2(sr2, if_f0_3, version19):
420 | path_str = "" if version19 == "v1" else "_v2"
421 | f0_str = "f0" if if_f0_3 else ""
422 | return get_pretrained_models(path_str, f0_str, sr2)
423 |
424 |
425 | def change_version19(sr2, if_f0_3, version19):
426 | path_str = "" if version19 == "v1" else "_v2"
427 | if sr2 == "32k" and version19 == "v1":
428 | sr2 = "40k"
429 | to_return_sr2 = (
430 | {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
431 | if version19 == "v1"
432 | else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
433 | )
434 | f0_str = "f0" if if_f0_3 else ""
435 | return (
436 | *get_pretrained_models(path_str, f0_str, sr2),
437 | to_return_sr2,
438 | )
439 |
440 |
441 | def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
442 | path_str = "" if version19 == "v1" else "_v2"
443 | return (
444 | {"visible": if_f0_3, "__type__": "update"},
445 | *get_pretrained_models(path_str, "f0", sr2),
446 | )
447 |
448 |
449 | # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
450 | def click_train(
451 | exp_dir1,
452 | sr2,
453 | if_f0_3,
454 | spk_id5,
455 | save_epoch10,
456 | total_epoch11,
457 | batch_size12,
458 | if_save_latest13,
459 | pretrained_G14,
460 | pretrained_D15,
461 | gpus16,
462 | if_cache_gpu17,
463 | if_save_every_weights18,
464 | version19,
465 | ):
466 | # 生成filelist
467 | exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
468 | os.makedirs(exp_dir, exist_ok=True)
469 | gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
470 | feature_dir = (
471 | "%s/3_feature256" % (exp_dir)
472 | if version19 == "v1"
473 | else "%s/3_feature768" % (exp_dir)
474 | )
475 | if if_f0_3:
476 | f0_dir = "%s/2a_f0" % (exp_dir)
477 | f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
478 | names = (
479 | set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
480 | & set([name.split(".")[0] for name in os.listdir(feature_dir)])
481 | & set([name.split(".")[0] for name in os.listdir(f0_dir)])
482 | & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
483 | )
484 | else:
485 | names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
486 | [name.split(".")[0] for name in os.listdir(feature_dir)]
487 | )
488 | opt = []
489 | for name in names:
490 | if if_f0_3:
491 | opt.append(
492 | "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
493 | % (
494 | gt_wavs_dir.replace("\\", "\\\\"),
495 | name,
496 | feature_dir.replace("\\", "\\\\"),
497 | name,
498 | f0_dir.replace("\\", "\\\\"),
499 | name,
500 | f0nsf_dir.replace("\\", "\\\\"),
501 | name,
502 | spk_id5,
503 | )
504 | )
505 | else:
506 | opt.append(
507 | "%s/%s.wav|%s/%s.npy|%s"
508 | % (
509 | gt_wavs_dir.replace("\\", "\\\\"),
510 | name,
511 | feature_dir.replace("\\", "\\\\"),
512 | name,
513 | spk_id5,
514 | )
515 | )
516 | fea_dim = 256 if version19 == "v1" else 768
517 | if if_f0_3:
518 | for _ in range(2):
519 | opt.append(
520 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
521 | % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
522 | )
523 | else:
524 | for _ in range(2):
525 | opt.append(
526 | "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
527 | % (now_dir, sr2, now_dir, fea_dim, spk_id5)
528 | )
529 | shuffle(opt)
530 | with open("%s/filelist.txt" % exp_dir, "w") as f:
531 | f.write("\n".join(opt))
532 | logger.debug("Write filelist done")
533 | # 生成config#无需生成config
534 | # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
535 | logger.info("Use gpus: %s", str(gpus16))
536 | if pretrained_G14 == "":
537 | logger.info("No pretrained Generator")
538 | if pretrained_D15 == "":
539 | logger.info("No pretrained Discriminator")
540 | if version19 == "v1" or sr2 == "40k":
541 | config_path = "v1/%s.json" % sr2
542 | else:
543 | config_path = "v2/%s.json" % sr2
544 | config_save_path = os.path.join(exp_dir, "config.json")
545 | if not pathlib.Path(config_save_path).exists():
546 | with open(config_save_path, "w", encoding="utf-8") as f:
547 | json.dump(
548 | config.json_config[config_path],
549 | f,
550 | ensure_ascii=False,
551 | indent=4,
552 | sort_keys=True,
553 | )
554 | f.write("\n")
555 | if gpus16:
556 | cmd = (
557 | '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
558 | % (
559 | config.python_cmd,
560 | exp_dir1,
561 | sr2,
562 | 1 if if_f0_3 else 0,
563 | batch_size12,
564 | gpus16,
565 | total_epoch11,
566 | save_epoch10,
567 | "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
568 | "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
569 | 1 if if_save_latest13 == i18n("是") else 0,
570 | 1 if if_cache_gpu17 == i18n("是") else 0,
571 | 1 if if_save_every_weights18 == i18n("是") else 0,
572 | version19,
573 | )
574 | )
575 | else:
576 | cmd = (
577 | '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
578 | % (
579 | config.python_cmd,
580 | exp_dir1,
581 | sr2,
582 | 1 if if_f0_3 else 0,
583 | batch_size12,
584 | total_epoch11,
585 | save_epoch10,
586 | "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
587 | "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
588 | 1 if if_save_latest13 == i18n("是") else 0,
589 | 1 if if_cache_gpu17 == i18n("是") else 0,
590 | 1 if if_save_every_weights18 == i18n("是") else 0,
591 | version19,
592 | )
593 | )
594 | logger.info(cmd)
595 | p = Popen(cmd, shell=True, cwd=now_dir)
596 | p.wait()
597 | return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
598 |
599 |
600 | # but4.click(train_index, [exp_dir1], info3)
601 | def train_index(exp_dir1, version19):
602 | # exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
603 | exp_dir = "logs/%s" % (exp_dir1)
604 | os.makedirs(exp_dir, exist_ok=True)
605 | feature_dir = (
606 | "%s/3_feature256" % (exp_dir)
607 | if version19 == "v1"
608 | else "%s/3_feature768" % (exp_dir)
609 | )
610 | if not os.path.exists(feature_dir):
611 | return "请先进行特征提取!"
612 | listdir_res = list(os.listdir(feature_dir))
613 | if len(listdir_res) == 0:
614 | return "请先进行特征提取!"
615 | infos = []
616 | npys = []
617 | for name in sorted(listdir_res):
618 | phone = np.load("%s/%s" % (feature_dir, name))
619 | npys.append(phone)
620 | big_npy = np.concatenate(npys, 0)
621 | big_npy_idx = np.arange(big_npy.shape[0])
622 | np.random.shuffle(big_npy_idx)
623 | big_npy = big_npy[big_npy_idx]
624 | if big_npy.shape[0] > 2e5:
625 | infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
626 | yield "\n".join(infos)
627 | try:
628 | big_npy = (
629 | MiniBatchKMeans(
630 | n_clusters=10000,
631 | verbose=True,
632 | batch_size=256 * config.n_cpu,
633 | compute_labels=False,
634 | init="random",
635 | )
636 | .fit(big_npy)
637 | .cluster_centers_
638 | )
639 | except:
640 | info = traceback.format_exc()
641 | logger.info(info)
642 | infos.append(info)
643 | yield "\n".join(infos)
644 |
645 | np.save("%s/total_fea.npy" % exp_dir, big_npy)
646 | n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
647 | infos.append("%s,%s" % (big_npy.shape, n_ivf))
648 | yield "\n".join(infos)
649 | index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
650 | # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
651 | infos.append("training")
652 | yield "\n".join(infos)
653 | index_ivf = faiss.extract_index_ivf(index) #
654 | index_ivf.nprobe = 1
655 | index.train(big_npy)
656 | faiss.write_index(
657 | index,
658 | "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
659 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
660 | )
661 |
662 | infos.append("adding")
663 | yield "\n".join(infos)
664 | batch_size_add = 8192
665 | for i in range(0, big_npy.shape[0], batch_size_add):
666 | index.add(big_npy[i : i + batch_size_add])
667 | faiss.write_index(
668 | index,
669 | "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
670 | % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
671 | )
672 | infos.append(
673 | "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index"
674 | % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
675 | )
676 | # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
677 | # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
678 | yield "\n".join(infos)
679 |
680 |
681 | # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
682 | def train1key(
683 | exp_dir1,
684 | sr2,
685 | if_f0_3,
686 | trainset_dir4,
687 | spk_id5,
688 | np7,
689 | f0method8,
690 | save_epoch10,
691 | total_epoch11,
692 | batch_size12,
693 | if_save_latest13,
694 | pretrained_G14,
695 | pretrained_D15,
696 | gpus16,
697 | if_cache_gpu17,
698 | if_save_every_weights18,
699 | version19,
700 | gpus_rmvpe,
701 | ):
702 | infos = []
703 |
704 | def get_info_str(strr):
705 | infos.append(strr)
706 | return "\n".join(infos)
707 |
708 | ####### step1:处理数据
709 | yield get_info_str(i18n("step1:正在处理数据"))
710 | [get_info_str(_) for _ in preprocess_dataset(trainset_dir4, exp_dir1, sr2, np7)]
711 |
712 | ####### step2a:提取音高
713 | yield get_info_str(i18n("step2:正在提取音高&正在提取特征"))
714 | [
715 | get_info_str(_)
716 | for _ in extract_f0_feature(
717 | gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe
718 | )
719 | ]
720 |
721 | ####### step3a:训练模型
722 | yield get_info_str(i18n("step3a:正在训练模型"))
723 | click_train(
724 | exp_dir1,
725 | sr2,
726 | if_f0_3,
727 | spk_id5,
728 | save_epoch10,
729 | total_epoch11,
730 | batch_size12,
731 | if_save_latest13,
732 | pretrained_G14,
733 | pretrained_D15,
734 | gpus16,
735 | if_cache_gpu17,
736 | if_save_every_weights18,
737 | version19,
738 | )
739 | yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
740 |
741 | ####### step3b:训练索引
742 | [get_info_str(_) for _ in train_index(exp_dir1, version19)]
743 | yield get_info_str(i18n("全流程结束!"))
744 |
745 |
746 | # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
747 | def change_info_(ckpt_path):
748 | if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
749 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
750 | try:
751 | with open(
752 | ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
753 | ) as f:
754 | info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
755 | sr, f0 = info["sample_rate"], info["if_f0"]
756 | version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
757 | return sr, str(f0), version
758 | except:
759 | traceback.print_exc()
760 | return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
761 |
762 |
763 | F0GPUVisible = config.dml == False
764 |
765 |
766 | def change_f0_method(f0method8):
767 | if f0method8 == "rmvpe_gpu":
768 | visible = F0GPUVisible
769 | else:
770 | visible = False
771 | return {"visible": visible, "__type__": "update"}
772 |
773 | def find_model():
774 | if len(names) > 0:
775 | vc.get_vc(sorted(names)[0],None,None)
776 | return sorted(names)[0]
777 | else:
778 | gr.Info("Do not forget to choose a model.")
779 | return ''
780 |
781 | def find_audios(index=False):
782 | audio_files=[]
783 | if not os.path.exists('./audios'): os.mkdir("./audios")
784 | for filename in os.listdir("./audios"):
785 | if filename.endswith(('.wav','.mp3','.ogg')):
786 | audio_files.append("./audios/"+filename)
787 | if index:
788 | if len(audio_files) > 0: return sorted(audio_files)[0]
789 | else: return ""
790 | elif len(audio_files) > 0: return sorted(audio_files)
791 | else: return []
792 |
793 | def get_index():
794 | if find_model() != '':
795 | chosen_model=sorted(names)[0].split(".")[0]
796 | logs_path="./logs/"+chosen_model
797 | if os.path.exists(logs_path):
798 | for file in os.listdir(logs_path):
799 | if file.endswith(".index"):
800 | return os.path.join(logs_path, file)
801 | return ''
802 | else:
803 | return ''
804 |
805 | def get_indexes():
806 | indexes_list=[]
807 | for dirpath, dirnames, filenames in os.walk("./logs/"):
808 | for filename in filenames:
809 | if filename.endswith(".index"):
810 | indexes_list.append(os.path.join(dirpath,filename))
811 | if len(indexes_list) > 0:
812 | return indexes_list
813 | else:
814 | return ''
815 |
816 | def save_wav(file):
817 | try:
818 | file_path=file.name
819 | shutil.move(file_path,'./audios')
820 | return './audios/'+os.path.basename(file_path)
821 | except AttributeError:
822 | try:
823 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
824 | new_path='./audios/'+new_name
825 | shutil.move(file,new_path)
826 | return new_path
827 | except TypeError:
828 | return None
829 |
830 | def download_from_url(url, model):
831 | if url == '':
832 | return "URL cannot be left empty."
833 | if model =='':
834 | return "You need to name your model. For example: My-Model"
835 | url = url.strip()
836 | zip_dirs = ["zips", "unzips"]
837 | for directory in zip_dirs:
838 | if os.path.exists(directory):
839 | shutil.rmtree(directory)
840 | os.makedirs("zips", exist_ok=True)
841 | os.makedirs("unzips", exist_ok=True)
842 | zipfile = model + '.zip'
843 | zipfile_path = './zips/' + zipfile
844 | try:
845 | if "drive.google.com" in url:
846 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
847 | elif "mega.nz" in url:
848 | m = Mega()
849 | m.download_url(url, './zips')
850 | else:
851 | subprocess.run(["wget", url, "-O", zipfile_path])
852 | for filename in os.listdir("./zips"):
853 | if filename.endswith(".zip"):
854 | zipfile_path = os.path.join("./zips/",filename)
855 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
856 | else:
857 | return "No zipfile found."
858 | for root, dirs, files in os.walk('./unzips'):
859 | for file in files:
860 | file_path = os.path.join(root, file)
861 | if file.endswith(".index"):
862 | os.mkdir(f'./logs/{model}')
863 | shutil.copy2(file_path,f'./logs/{model}')
864 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
865 | shutil.copy(file_path,f'./weights/{model}.pth')
866 | shutil.rmtree("zips")
867 | shutil.rmtree("unzips")
868 | return "Success."
869 | except:
870 | return "There's been an error."
871 |
872 | def upload_to_dataset(files, dir):
873 | if dir == '':
874 | dir = './dataset/'+datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
875 | if not os.path.exists(dir):
876 | os.makedirs(dir)
877 | for file in files:
878 | path=file.name
879 | shutil.copy2(path,dir)
880 | gr.Info(i18n("处理数据"))
881 | return i18n("处理数据"), {"value":dir,"__type__":"update"}
882 |
883 | with gr.Blocks(title="EasyGUI v2.9",theme=gr.themes.Base()) as app:
884 | gr.HTML(" EasyGUI v2.9
")
885 | with gr.Tabs():
886 | with gr.TabItem(i18n("模型推理")):
887 | with gr.Row():
888 | sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=find_model())
889 | refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary")
890 | #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
891 | spk_item = gr.Slider(
892 | minimum=0,
893 | maximum=2333,
894 | step=1,
895 | label=i18n("请选择说话人id"),
896 | value=0,
897 | visible=False,
898 | interactive=True,
899 | )
900 | #clean_button.click(
901 | # fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean"
902 | #)
903 | vc_transform0 = gr.Number(
904 | label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
905 | )
906 | but0 = gr.Button(i18n("转换"), variant="primary")
907 | with gr.Row():
908 | with gr.Column():
909 | with gr.Row():
910 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
911 | with gr.Row():
912 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
913 | with gr.Row():
914 | input_audio0 = gr.Dropdown(
915 | label=i18n("输入待处理音频文件路径(默认是正确格式示例)"),
916 | value=find_audios(True),
917 | choices=find_audios()
918 | )
919 | record_button.change(fn=save_wav, inputs=[record_button], outputs=[input_audio0])
920 | dropbox.upload(fn=save_wav, inputs=[dropbox], outputs=[input_audio0])
921 | with gr.Column():
922 | with gr.Accordion(label=i18n("自动检测index路径,下拉式选择(dropdown)"), open=False):
923 | file_index2 = gr.Dropdown(
924 | label=i18n("自动检测index路径,下拉式选择(dropdown)"),
925 | choices=get_indexes(),
926 | interactive=True,
927 | value=get_index()
928 | )
929 | index_rate1 = gr.Slider(
930 | minimum=0,
931 | maximum=1,
932 | label=i18n("检索特征占比"),
933 | value=0.66,
934 | interactive=True,
935 | )
936 | vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
937 | with gr.Accordion(label=i18n("常规设置"), open=False):
938 | f0method0 = gr.Radio(
939 | label=i18n(
940 | "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
941 | ),
942 | choices=["pm", "harvest", "crepe", "rmvpe"]
943 | if config.dml == False
944 | else ["pm", "harvest", "rmvpe"],
945 | value="rmvpe",
946 | interactive=True,
947 | )
948 | filter_radius0 = gr.Slider(
949 | minimum=0,
950 | maximum=7,
951 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
952 | value=3,
953 | step=1,
954 | interactive=True,
955 | )
956 | resample_sr0 = gr.Slider(
957 | minimum=0,
958 | maximum=48000,
959 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
960 | value=0,
961 | step=1,
962 | interactive=True,
963 | )
964 | rms_mix_rate0 = gr.Slider(
965 | minimum=0,
966 | maximum=1,
967 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
968 | value=0.21,
969 | interactive=True,
970 | )
971 | protect0 = gr.Slider(
972 | minimum=0,
973 | maximum=0.5,
974 | label=i18n(
975 | "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
976 | ),
977 | value=0.33,
978 | step=0.01,
979 | interactive=True,
980 | )
981 | file_index1 = gr.Textbox(
982 | label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
983 | value="",
984 | interactive=True,
985 | visible=False
986 | )
987 | refresh_button.click(
988 | fn=change_choices,
989 | inputs=[],
990 | outputs=[sid0, file_index2, input_audio0],
991 | api_name="infer_refresh",
992 | )
993 | # file_big_npy1 = gr.Textbox(
994 | # label=i18n("特征文件路径"),
995 | # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
996 | # interactive=True,
997 | # )
998 | with gr.Row():
999 | f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
1000 | with gr.Row():
1001 | vc_output1 = gr.Textbox(label=i18n("输出信息"))
1002 | but0.click(
1003 | vc.vc_single,
1004 | [
1005 | spk_item,
1006 | input_audio0,
1007 | vc_transform0,
1008 | f0_file,
1009 | f0method0,
1010 | file_index1,
1011 | file_index2,
1012 | # file_big_npy1,
1013 | index_rate1,
1014 | filter_radius0,
1015 | resample_sr0,
1016 | rms_mix_rate0,
1017 | protect0,
1018 | ],
1019 | [vc_output1, vc_output2],
1020 | api_name="infer_convert",
1021 | )
1022 | with gr.Row():
1023 | with gr.Accordion(open=False, label=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ")):
1024 | with gr.Column():
1025 | vc_transform1 = gr.Number(
1026 | label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
1027 | )
1028 | opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
1029 | f0method1 = gr.Radio(
1030 | label=i18n(
1031 | "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
1032 | ),
1033 | choices=["pm", "harvest", "crepe", "rmvpe"]
1034 | if config.dml == False
1035 | else ["pm", "harvest", "rmvpe"],
1036 | value="pm",
1037 | interactive=True,
1038 | )
1039 | filter_radius1 = gr.Slider(
1040 | minimum=0,
1041 | maximum=7,
1042 | label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
1043 | value=3,
1044 | step=1,
1045 | interactive=True,
1046 | )
1047 | with gr.Column():
1048 | file_index3 = gr.Textbox(
1049 | label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
1050 | value="",
1051 | interactive=True,
1052 | visible=False
1053 | )
1054 | file_index4 = gr.Dropdown(
1055 | label=i18n("自动检测index路径,下拉式选择(dropdown)"),
1056 | choices=sorted(index_paths),
1057 | interactive=True,
1058 | )
1059 | refresh_button.click(
1060 | fn=lambda: change_choices()[1],
1061 | inputs=[],
1062 | outputs=file_index4,
1063 | api_name="infer_refresh_batch",
1064 | )
1065 | # file_big_npy2 = gr.Textbox(
1066 | # label=i18n("特征文件路径"),
1067 | # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1068 | # interactive=True,
1069 | # )
1070 | index_rate2 = gr.Slider(
1071 | minimum=0,
1072 | maximum=1,
1073 | label=i18n("检索特征占比"),
1074 | value=1,
1075 | interactive=True,
1076 | )
1077 | with gr.Column():
1078 | resample_sr1 = gr.Slider(
1079 | minimum=0,
1080 | maximum=48000,
1081 | label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
1082 | value=0,
1083 | step=1,
1084 | interactive=True,
1085 | )
1086 | rms_mix_rate1 = gr.Slider(
1087 | minimum=0,
1088 | maximum=1,
1089 | label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
1090 | value=1,
1091 | interactive=True,
1092 | )
1093 | protect1 = gr.Slider(
1094 | minimum=0,
1095 | maximum=0.5,
1096 | label=i18n(
1097 | "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
1098 | ),
1099 | value=0.33,
1100 | step=0.01,
1101 | interactive=True,
1102 | )
1103 | with gr.Column():
1104 | dir_input = gr.Textbox(
1105 | label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
1106 | value="E:\codes\py39\\test-20230416b\\todo-songs",
1107 | )
1108 | inputs = gr.File(
1109 | file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
1110 | )
1111 | with gr.Row():
1112 | format1 = gr.Radio(
1113 | label=i18n("导出文件格式"),
1114 | choices=["wav", "flac", "mp3", "m4a"],
1115 | value="flac",
1116 | interactive=True,
1117 | )
1118 | but1 = gr.Button(i18n("转换"), variant="primary")
1119 | vc_output3 = gr.Textbox(label=i18n("输出信息"))
1120 | but1.click(
1121 | vc.vc_multi,
1122 | [
1123 | spk_item,
1124 | dir_input,
1125 | opt_input,
1126 | inputs,
1127 | vc_transform1,
1128 | f0method1,
1129 | file_index3,
1130 | file_index4,
1131 | # file_big_npy2,
1132 | index_rate2,
1133 | filter_radius1,
1134 | resample_sr1,
1135 | rms_mix_rate1,
1136 | protect1,
1137 | format1,
1138 | ],
1139 | [vc_output3],
1140 | api_name="infer_convert_batch",
1141 | )
1142 | sid0.change(
1143 | fn=vc.get_vc,
1144 | inputs=[sid0, protect0, protect1],
1145 | outputs=[spk_item, protect0, protect1, file_index2, file_index4],
1146 | )
1147 | with gr.TabItem("Download Model"):
1148 | with gr.Row():
1149 | url=gr.Textbox(label="Enter the URL to the Model:")
1150 | with gr.Row():
1151 | model = gr.Textbox(label="Name your model:")
1152 | download_button=gr.Button("Download")
1153 | with gr.Row():
1154 | status_bar=gr.Textbox(label="")
1155 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
1156 | with gr.Row():
1157 | gr.Markdown(
1158 | """
1159 | Original RVC:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI
1160 | Mangio's RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork
1161 | ❤️ If you like the EasyGUI, help me keep it.❤️
1162 | https://paypal.me/lesantillan
1163 | """
1164 | )
1165 | with gr.TabItem(i18n("训练")):
1166 | with gr.Row():
1167 | with gr.Column():
1168 | exp_dir1 = gr.Textbox(label=i18n("输入实验名"), value="My-Voice")
1169 | np7 = gr.Slider(
1170 | minimum=0,
1171 | maximum=config.n_cpu,
1172 | step=1,
1173 | label=i18n("提取音高和处理数据使用的CPU进程数"),
1174 | value=int(np.ceil(config.n_cpu / 1.5)),
1175 | interactive=True,
1176 | )
1177 | sr2 = gr.Radio(
1178 | label=i18n("目标采样率"),
1179 | choices=["40k", "48k"],
1180 | value="40k",
1181 | interactive=True,
1182 | visible=False
1183 | )
1184 | if_f0_3 = gr.Radio(
1185 | label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"),
1186 | choices=[True, False],
1187 | value=True,
1188 | interactive=True,
1189 | visible=False
1190 | )
1191 | version19 = gr.Radio(
1192 | label=i18n("版本"),
1193 | choices=["v1", "v2"],
1194 | value="v2",
1195 | interactive=True,
1196 | visible=False,
1197 | )
1198 | trainset_dir4 = gr.Textbox(
1199 | label=i18n("输入训练文件夹路径"), value='./dataset/'+datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1200 | )
1201 | easy_uploader = gr.Files(label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),file_types=['audio'])
1202 | but1 = gr.Button(label=i18n("处理数据"), variant="primary")
1203 | info1 = gr.Textbox(label=i18n("输出信息"), value="")
1204 | easy_uploader.upload(fn=upload_to_dataset, inputs=[easy_uploader, trainset_dir4], outputs=[info1, trainset_dir4])
1205 | gpus6 = gr.Textbox(
1206 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
1207 | value=gpus,
1208 | interactive=True,
1209 | visible=F0GPUVisible,
1210 | )
1211 | gpu_info9 = gr.Textbox(
1212 | label=i18n("显卡信息"), value=gpu_info, visible=F0GPUVisible
1213 | )
1214 | spk_id5 = gr.Slider(
1215 | minimum=0,
1216 | maximum=4,
1217 | step=1,
1218 | label=i18n("请指定说话人id"),
1219 | value=0,
1220 | interactive=True,
1221 | visible=False
1222 | )
1223 | but1.click(
1224 | preprocess_dataset,
1225 | [trainset_dir4, exp_dir1, sr2, np7],
1226 | [info1],
1227 | api_name="train_preprocess",
1228 | )
1229 | with gr.Column():
1230 | f0method8 = gr.Radio(
1231 | label=i18n(
1232 | "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢,rmvpe效果最好且微吃CPU/GPU"
1233 | ),
1234 | choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"],
1235 | value="rmvpe_gpu",
1236 | interactive=True,
1237 | )
1238 | gpus_rmvpe = gr.Textbox(
1239 | label=i18n(
1240 | "rmvpe卡号配置:以-分隔输入使用的不同进程卡号,例如0-0-1使用在卡0上跑2个进程并在卡1上跑1个进程"
1241 | ),
1242 | value="%s-%s" % (gpus, gpus),
1243 | interactive=True,
1244 | visible=F0GPUVisible,
1245 | )
1246 | but2 = gr.Button(i18n("特征提取"), variant="primary")
1247 | info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
1248 | f0method8.change(
1249 | fn=change_f0_method,
1250 | inputs=[f0method8],
1251 | outputs=[gpus_rmvpe],
1252 | )
1253 | but2.click(
1254 | extract_f0_feature,
1255 | [
1256 | gpus6,
1257 | np7,
1258 | f0method8,
1259 | if_f0_3,
1260 | exp_dir1,
1261 | version19,
1262 | gpus_rmvpe,
1263 | ],
1264 | [info2],
1265 | api_name="train_extract_f0_feature",
1266 | )
1267 | with gr.Column():
1268 | total_epoch11 = gr.Slider(
1269 | minimum=2,
1270 | maximum=1000,
1271 | step=1,
1272 | label=i18n("总训练轮数total_epoch"),
1273 | value=150,
1274 | interactive=True,
1275 | )
1276 | but3 = gr.Button(i18n("训练模型"), variant="primary")
1277 | but4 = gr.Button(i18n("训练特征索引"), variant="primary")
1278 | info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10)
1279 | with gr.Accordion(label=i18n("常规设置"), open=False):
1280 | save_epoch10 = gr.Slider(
1281 | minimum=1,
1282 | maximum=50,
1283 | step=1,
1284 | label=i18n("保存频率save_every_epoch"),
1285 | value=25,
1286 | interactive=True,
1287 | )
1288 | batch_size12 = gr.Slider(
1289 | minimum=1,
1290 | maximum=40,
1291 | step=1,
1292 | label=i18n("每张显卡的batch_size"),
1293 | value=default_batch_size,
1294 | interactive=True,
1295 | )
1296 | if_save_latest13 = gr.Radio(
1297 | label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"),
1298 | choices=[i18n("是"), i18n("否")],
1299 | value=i18n("是"),
1300 | interactive=True,
1301 | )
1302 | if_cache_gpu17 = gr.Radio(
1303 | label=i18n(
1304 | "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速"
1305 | ),
1306 | choices=[i18n("是"), i18n("否")],
1307 | value=i18n("否"),
1308 | interactive=True,
1309 | )
1310 | if_save_every_weights18 = gr.Radio(
1311 | label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
1312 | choices=[i18n("是"), i18n("否")],
1313 | value=i18n("是"),
1314 | interactive=True,
1315 | )
1316 | with gr.Row():
1317 | pretrained_G14 = gr.Textbox(
1318 | label=i18n("加载预训练底模G路径"),
1319 | value="assets/pretrained_v2/f0G40k.pth",
1320 | interactive=True,
1321 | visible=False
1322 | )
1323 | pretrained_D15 = gr.Textbox(
1324 | label=i18n("加载预训练底模D路径"),
1325 | value="assets/pretrained_v2/f0D40k.pth",
1326 | interactive=True,
1327 | visible=False
1328 | )
1329 | sr2.change(
1330 | change_sr2,
1331 | [sr2, if_f0_3, version19],
1332 | [pretrained_G14, pretrained_D15],
1333 | )
1334 | version19.change(
1335 | change_version19,
1336 | [sr2, if_f0_3, version19],
1337 | [pretrained_G14, pretrained_D15, sr2],
1338 | )
1339 | if_f0_3.change(
1340 | change_f0,
1341 | [if_f0_3, sr2, version19],
1342 | [f0method8, pretrained_G14, pretrained_D15],
1343 | )
1344 | gpus16 = gr.Textbox(
1345 | label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
1346 | value=gpus,
1347 | interactive=True,
1348 | visible=False
1349 | )
1350 | with gr.Row():
1351 | but5 = gr.Button(i18n("一键训练"), variant="primary", visible=False)
1352 | but3.click(
1353 | click_train,
1354 | [
1355 | exp_dir1,
1356 | sr2,
1357 | if_f0_3,
1358 | spk_id5,
1359 | save_epoch10,
1360 | total_epoch11,
1361 | batch_size12,
1362 | if_save_latest13,
1363 | pretrained_G14,
1364 | pretrained_D15,
1365 | gpus16,
1366 | if_cache_gpu17,
1367 | if_save_every_weights18,
1368 | version19,
1369 | ],
1370 | info3,
1371 | api_name="train_start",
1372 | )
1373 | but4.click(train_index, [exp_dir1, version19], info3)
1374 | but5.click(
1375 | train1key,
1376 | [
1377 | exp_dir1,
1378 | sr2,
1379 | if_f0_3,
1380 | trainset_dir4,
1381 | spk_id5,
1382 | np7,
1383 | f0method8,
1384 | save_epoch10,
1385 | total_epoch11,
1386 | batch_size12,
1387 | if_save_latest13,
1388 | pretrained_G14,
1389 | pretrained_D15,
1390 | gpus16,
1391 | if_cache_gpu17,
1392 | if_save_every_weights18,
1393 | version19,
1394 | gpus_rmvpe,
1395 | ],
1396 | info3,
1397 | api_name="train_start_all",
1398 | )
1399 |
1400 | if config.iscolab:
1401 | app.queue(concurrency_count=511, max_size=1022).launch(share=True)
1402 | else:
1403 | app.queue(concurrency_count=511, max_size=1022).launch(
1404 | server_name="0.0.0.0",
1405 | inbrowser=not config.noautoopen,
1406 | server_port=config.listen_port,
1407 | quiet=True,
1408 | )
1409 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 777gt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Packages.tar.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8c064071bada6cfebc38aa9acf417a1700559f49f0d33ef0e2ca90c5245c0a7b
3 | size 172498313
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EVC
2 | Easy Voice Cloning (Addons for RVC)
3 |
4 | These are miscellaneous files used for running the EasyGUI v1 & v2.
5 | v1 came from the orignal RVC GUI and v2 was adapted to Mangio's fork to be able to use RVC v2 and Crepe.
6 |
--------------------------------------------------------------------------------
/easy-infer.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import cpu_count
2 | import threading, pdb, librosa
3 | from time import sleep
4 | from subprocess import Popen
5 | from time import sleep
6 | import torch, os, traceback, sys, warnings, shutil, numpy as np
7 | import faiss
8 | from random import shuffle
9 | import scipy.io.wavfile as wavfile
10 | from mega import Mega
11 | from pyngrok import ngrok
12 | now_dir = os.getcwd()
13 | sys.path.append(now_dir)
14 | tmp = os.path.join(now_dir, "TEMP")
15 | shutil.rmtree(tmp, ignore_errors=True)
16 | os.makedirs(tmp, exist_ok=True)
17 | os.makedirs("audios",exist_ok=True)
18 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
19 | os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
20 | os.environ["TEMP"] = tmp
21 | warnings.filterwarnings("ignore")
22 | torch.manual_seed(114514)
23 | from i18n import I18nAuto
24 | import ffmpeg
25 | import datetime
26 | import subprocess
27 |
28 | i18n = I18nAuto()
29 | # 判断是否有能用来训练和加速推理的N卡
30 | ncpu = cpu_count()
31 | ngpu = torch.cuda.device_count()
32 | gpu_infos = []
33 | mem = []
34 | if (not torch.cuda.is_available()) or ngpu == 0:
35 | if_gpu_ok = False
36 | else:
37 | if_gpu_ok = False
38 | for i in range(ngpu):
39 | gpu_name = torch.cuda.get_device_name(i)
40 | if (
41 | "10" in gpu_name
42 | or "16" in gpu_name
43 | or "20" in gpu_name
44 | or "30" in gpu_name
45 | or "40" in gpu_name
46 | or "A2" in gpu_name.upper()
47 | or "A3" in gpu_name.upper()
48 | or "A4" in gpu_name.upper()
49 | or "P4" in gpu_name.upper()
50 | or "A50" in gpu_name.upper()
51 | or "70" in gpu_name
52 | or "80" in gpu_name
53 | or "90" in gpu_name
54 | or "M4" in gpu_name.upper()
55 | or "T4" in gpu_name.upper()
56 | or "TITAN" in gpu_name.upper()
57 | ): # A10#A100#V100#A40#P40#M40#K80#A4500
58 | if_gpu_ok = True # 至少有一张能用的N卡
59 | gpu_infos.append("%s\t%s" % (i, gpu_name))
60 | mem.append(
61 | int(
62 | torch.cuda.get_device_properties(i).total_memory
63 | / 1024
64 | / 1024
65 | / 1024
66 | + 0.4
67 | )
68 | )
69 | if if_gpu_ok == True and len(gpu_infos) > 0:
70 | gpu_info = "\n".join(gpu_infos)
71 | default_batch_size = min(mem) // 2
72 | else:
73 | gpu_info = "很遗憾您这没有能用的显卡来支持您训练"
74 | default_batch_size = 1
75 | gpus = "-".join([i[0] for i in gpu_infos])
76 | from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
77 | from scipy.io import wavfile
78 | from fairseq import checkpoint_utils
79 | import gradio as gr
80 | import logging
81 | from vc_infer_pipeline import VC
82 | from config import (
83 | is_half,
84 | device,
85 | python_cmd,
86 | listen_port,
87 | iscolab,
88 | noparallel,
89 | noautoopen,
90 | )
91 | from infer_uvr5 import _audio_pre_
92 | from my_utils import load_audio
93 | from train.process_ckpt import show_info, change_info, merge, extract_small_model
94 |
95 | # from trainset_preprocess_pipeline import PreProcess
96 | logging.getLogger("numba").setLevel(logging.WARNING)
97 |
98 |
99 | class ToolButton(gr.Button, gr.components.FormComponent):
100 | """Small button with single emoji as text, fits inside gradio forms"""
101 |
102 | def __init__(self, **kwargs):
103 | super().__init__(variant="tool", **kwargs)
104 |
105 | def get_block_name(self):
106 | return "button"
107 |
108 |
109 | hubert_model = None
110 |
111 |
112 | def load_hubert():
113 | global hubert_model
114 | models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
115 | ["hubert_base.pt"],
116 | suffix="",
117 | )
118 | hubert_model = models[0]
119 | hubert_model = hubert_model.to(device)
120 | if is_half:
121 | hubert_model = hubert_model.half()
122 | else:
123 | hubert_model = hubert_model.float()
124 | hubert_model.eval()
125 |
126 |
127 | weight_root = "weights"
128 | weight_uvr5_root = "uvr5_weights"
129 | names = []
130 | for name in os.listdir(weight_root):
131 | if name.endswith(".pth"):
132 | names.append(name)
133 |
134 | uvr5_names = []
135 | for name in os.listdir(weight_uvr5_root):
136 | if name.endswith(".pth"):
137 | uvr5_names.append(name.replace(".pth", ""))
138 |
139 | def find_parent(search_dir, file_name):
140 | for dirpath, dirnames, filenames in os.walk(search_dir):
141 | if file_name in filenames:
142 | return os.path.abspath(dirpath)
143 | return None
144 |
145 | def vc_single(
146 | sid,
147 | input_audio,
148 | f0_up_key,
149 | f0_file,
150 | f0_method,
151 | file_index,
152 | # file_big_npy,
153 | index_rate,
154 | ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
155 | global tgt_sr, net_g, vc, hubert_model
156 | if input_audio is None:
157 | return "You need to upload an audio", None
158 | f0_up_key = int(f0_up_key)
159 | try:
160 | parent_dir = find_parent(".",input_audio)
161 | audio = load_audio(parent_dir+'/'+input_audio, 16000)
162 | times = [0, 0, 0]
163 | if hubert_model == None:
164 | load_hubert()
165 | if_f0 = cpt.get("f0", 1)
166 | try:
167 | file_index = (
168 | file_index.strip(" ")
169 | .strip('"')
170 | .strip("\n")
171 | .strip('"')
172 | .strip(" ")
173 | .replace("trained", "added")
174 | ) # 防止小白写错,自动帮他替换掉
175 | except:
176 | file_index=''
177 | print("Skipped index.")
178 | # file_big_npy = (
179 | # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
180 | # )
181 | audio_opt = vc.pipeline(
182 | hubert_model,
183 | net_g,
184 | sid,
185 | audio,
186 | times,
187 | f0_up_key,
188 | f0_method,
189 | file_index,
190 | # file_big_npy,
191 | index_rate,
192 | if_f0,
193 | f0_file=f0_file,
194 | )
195 | print(
196 | "npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep=""
197 | )
198 | return "Success", (tgt_sr, audio_opt)
199 | except:
200 | info = traceback.format_exc()
201 | print(info)
202 | return info, (None, None)
203 |
204 |
205 | def vc_multi(
206 | sid,
207 | dir_path,
208 | opt_root,
209 | paths,
210 | f0_up_key,
211 | f0_method,
212 | file_index,
213 | # file_big_npy,
214 | index_rate,
215 | ):
216 | try:
217 | dir_path = (
218 | dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
219 | ) # 防止小白拷路径头尾带了空格和"和回车
220 | opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
221 | os.makedirs(opt_root, exist_ok=True)
222 | try:
223 | if dir_path != "":
224 | paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
225 | else:
226 | paths = [path.name for path in paths]
227 | except:
228 | traceback.print_exc()
229 | paths = [path.name for path in paths]
230 | infos = []
231 | file_index = (
232 | file_index.strip(" ")
233 | .strip('"')
234 | .strip("\n")
235 | .strip('"')
236 | .strip(" ")
237 | .replace("trained", "added")
238 | ) # 防止小白写错,自动帮他替换掉
239 | for path in paths:
240 | info, opt = vc_single(
241 | sid,
242 | path,
243 | f0_up_key,
244 | None,
245 | f0_method,
246 | file_index,
247 | # file_big_npy,
248 | index_rate,
249 | )
250 | if info == "Success":
251 | try:
252 | tgt_sr, audio_opt = opt
253 | wavfile.write(
254 | "%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt
255 | )
256 | except:
257 | info = traceback.format_exc()
258 | infos.append("%s->%s" % (os.path.basename(path), info))
259 | yield "\n".join(infos)
260 | yield "\n".join(infos)
261 | except:
262 | yield traceback.format_exc()
263 |
264 | # 一个选项卡全局只能有一个音色
265 | def get_vc(sid):
266 | global n_spk, tgt_sr, net_g, vc, cpt
267 | if sid == []:
268 | global hubert_model
269 | if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
270 | print("clean_empty_cache")
271 | del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
272 | hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
273 | if torch.cuda.is_available():
274 | torch.cuda.empty_cache()
275 | ###楼下不这么折腾清理不干净
276 | if_f0 = cpt.get("f0", 1)
277 | if if_f0 == 1:
278 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
279 | else:
280 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
281 | del net_g, cpt
282 | if torch.cuda.is_available():
283 | torch.cuda.empty_cache()
284 | cpt = None
285 | return {"visible": False, "__type__": "update"}
286 | person = "%s/%s" % (weight_root, sid)
287 | print("loading %s" % person)
288 | cpt = torch.load(person, map_location="cpu")
289 | tgt_sr = cpt["config"][-1]
290 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
291 | if_f0 = cpt.get("f0", 1)
292 | if if_f0 == 1:
293 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
294 | else:
295 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
296 | del net_g.enc_q
297 | print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
298 | net_g.eval().to(device)
299 | if is_half:
300 | net_g = net_g.half()
301 | else:
302 | net_g = net_g.float()
303 | vc = VC(tgt_sr, device, is_half)
304 | n_spk = cpt["config"][-3]
305 | return {"visible": False, "maximum": n_spk, "__type__": "update"}
306 |
307 |
308 | def change_choices():
309 | names = []
310 | for name in os.listdir(weight_root):
311 | if name.endswith(".pth"):
312 | names.append(name)
313 | return {"choices": sorted(names), "__type__": "update"}
314 |
315 | def change_choices2():
316 | audio_files=[]
317 | for filename in os.listdir("./audios"):
318 | if filename.endswith(('.wav','.mp3')):
319 | audio_files.append(filename)
320 | return {"choices": sorted(audio_files), "__type__": "update"}
321 |
322 | def clean():
323 | return {"value": "", "__type__": "update"}
324 |
325 | def change_sr2(sr2, if_f0_3):
326 | if if_f0_3 == "是":
327 | return "pretrained/f0G%s.pth" % sr2, "pretrained/f0D%s.pth" % sr2
328 | else:
329 | return "pretrained/G%s.pth" % sr2, "pretrained/D%s.pth" % sr2
330 |
331 | def get_index():
332 | if check_for_name() != '':
333 | if iscolab:
334 | chosen_model=sorted(names)[0].split(".")[0]
335 | logs_path="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+chosen_model
336 | for file in os.listdir(logs_path):
337 | if file.endswith(".index"):
338 | return os.path.join(logs_path, file)
339 | return ''
340 | else:
341 | return ''
342 |
343 | def get_indexes():
344 | indexes_list=[]
345 | if iscolab:
346 | for dirpath, dirnames, filenames in os.walk("/content/Retrieval-based-Voice-Conversion-WebUI/logs/"):
347 | for filename in filenames:
348 | if filename.endswith(".index"):
349 | indexes_list.append(os.path.join(dirpath,filename))
350 | return indexes_list
351 | else:
352 | return ''
353 |
354 | audio_files=[]
355 | for filename in os.listdir("./audios"):
356 | if filename.endswith(('.wav','.mp3')):
357 | audio_files.append(filename)
358 |
359 | def get_name():
360 | if len(audio_files) > 0:
361 | return sorted(audio_files)[0]
362 | else:
363 | return ''
364 |
365 | def save_to_wav(record_button):
366 | if record_button is None:
367 | pass
368 | else:
369 | path_to_file=record_button
370 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
371 | new_path='./audios/'+new_name
372 | shutil.move(path_to_file,new_path)
373 | return new_name
374 |
375 | def save_to_wav2(dropbox):
376 | file_path=dropbox.name
377 | shutil.move(file_path,'./audios')
378 | return os.path.basename(file_path)
379 |
380 | def match_index(speaker):
381 | folder=speaker.split(".")[0]
382 | parent_dir="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+folder
383 | for filename in os.listdir(parent_dir):
384 | if filename.endswith(".index"):
385 | index_path=os.path.join(parent_dir,filename)
386 | return index_path
387 |
388 | def download_from_url(url, model):
389 | url = url.strip()
390 | if url == '':
391 | return "URL cannot be left empty."
392 | zip_dirs = ["zips", "unzips"]
393 | for directory in zip_dirs:
394 | if os.path.exists(directory):
395 | shutil.rmtree(directory)
396 | os.makedirs("zips", exist_ok=True)
397 | os.makedirs("unzips", exist_ok=True)
398 | zipfile = model + '.zip'
399 | zipfile_path = './zips/' + zipfile
400 | MODELEPOCH = ''
401 | if "drive.google.com" in url:
402 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
403 | elif "mega.nz" in url:
404 | m = Mega()
405 | m.download_url(url, './zips')
406 | else:
407 | subprocess.run(["wget", url, "-O", f"./zips/{zipfile}"])
408 | for filename in os.listdir("./zips"):
409 | if filename.endswith(".zip"):
410 | zipfile_path = os.path.join("./zips/",filename)
411 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
412 | else:
413 | return "No zipfile found."
414 | for root, dirs, files in os.walk('./unzips'):
415 | for file in files:
416 | if "G_" in file:
417 | MODELEPOCH = file.split("G_")[1].split(".")[0]
418 | if MODELEPOCH == '':
419 | MODELEPOCH = '404'
420 | for file in files:
421 | file_path = os.path.join(root, file)
422 | if file.endswith(".npy") or file.endswith(".index"):
423 | subprocess.run(["mkdir", "-p", f"./logs/{model}"])
424 | subprocess.run(["mv", file_path, f"./logs/{model}/"])
425 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
426 | subprocess.run(["mv", file_path, f"./weights/{model}.pth"])
427 | shutil.rmtree("zips")
428 | shutil.rmtree("unzips")
429 | return "Success."
430 |
431 | def check_for_name():
432 | if len(names) > 0:
433 | return sorted(names)[0]
434 | else:
435 | return ''
436 | print(check_for_name())
437 | #with gr.Blocks() as app
438 | with gr.Blocks(theme=gr.themes.Base()) as app:
439 | with gr.Tab("Inference"):
440 | with gr.Row():
441 | sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name())
442 | refresh_button = gr.Button("Refresh", variant="primary", size='sm')
443 | refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0])
444 | if check_for_name() != '':
445 | get_vc(sorted(names)[0])
446 | else:
447 | print("Starting without preloaded Model.")
448 | vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0)
449 | #clean_button = gr.Button("Unload Voice to Save Memory", variant="primary")
450 | spk_item = gr.Slider(minimum=0,maximum=2333,step=1,label="Please select speaker id",value=0,visible=False,interactive=True)
451 | #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
452 | sid0.change(
453 | fn=get_vc,
454 | inputs=[sid0],
455 | outputs=[],
456 | )
457 | but0 = gr.Button("Convert", variant="primary")
458 | with gr.Row():
459 | with gr.Column():
460 | with gr.Row():
461 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
462 | with gr.Row():
463 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
464 | with gr.Row():
465 | #input_audio0 = gr.Textbox(label="Enter the Path to the Audio File to be Processed (e.g. /content/youraudio.wav)",value="/content/youraudio.wav")
466 | input_audio0 = gr.Dropdown(choices=sorted(audio_files), label="2.Choose your audio.", value=get_name())
467 | dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
468 | dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
469 | refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
470 | refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0])
471 | record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
472 | record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
473 | with gr.Column():
474 | with gr.Accordion(label="Feature Settings", open=False):
475 | file_index1 = gr.Dropdown(
476 | label="3. Path to your added.index file (if it didn't automatically find it.)",
477 | value=get_index(),
478 | choices=get_indexes(),
479 | interactive=True,
480 | visible=True
481 | )
482 | index_rate1 = gr.Slider(
483 | minimum=0,
484 | maximum=1,
485 | label="Strength:",
486 | value=0.69,
487 | interactive=True,
488 | )
489 | sid0.change(fn=match_index, inputs=[sid0], outputs=[file_index1])
490 |
491 | with gr.Row():
492 | vc_output2 = gr.Audio(label="Output Audio (Click on the Three Dots in the Right Corner to Download)")
493 | with gr.Row():
494 | f0method0 = gr.Radio(
495 | label="Optional: Change the Pitch Extraction Algorithm. Use PM for fast results or Harvest for better low range (but it's extremely slow)",
496 | choices=["pm", "harvest"],
497 | value="pm",
498 | interactive=True,
499 | )
500 | with gr.Row():
501 | vc_output1 = gr.Textbox(label="")
502 | with gr.Row():
503 | instructions = gr.Markdown("""
504 | This is simply a modified version of the RVC GUI found here:
505 | https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI
506 | """)
507 | f0_file = gr.File(label="F0 Curve File (Optional, One Pitch Per Line, Replaces Default F0 and Pitch Shift)", visible=False)
508 | but0.click(
509 | vc_single,
510 | [
511 | spk_item,
512 | input_audio0,
513 | vc_transform0,
514 | f0_file,
515 | f0method0,
516 | file_index1,
517 | index_rate1,
518 | ],
519 | [vc_output1, vc_output2]
520 | )
521 | with gr.Tab("Download Model"):
522 | with gr.Row():
523 | url=gr.Textbox(label="Enter the URL to the Model:")
524 | with gr.Row():
525 | model = gr.Textbox(label="Name your model:")
526 | download_button=gr.Button(label="Download")
527 | with gr.Row():
528 | status_bar=gr.Textbox(label="")
529 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
530 | if iscolab:
531 | try:
532 | public_url = ngrok.connect(7860)
533 | print('Click on THIS link: '+public_url)
534 | except:
535 | print('Failed to create ngrok URL')
536 | try:
537 | app.launch(share=True)
538 | except KeyboardInterrupt:
539 | ngrok.kill()
540 | else:
541 | app.queue(concurrency_count=511, max_size=1022).launch(
542 | server_name="0.0.0.0",
543 | inbrowser=not noautoopen,
544 | server_port=listen_port,
545 | quiet=True,
546 | )
547 |
--------------------------------------------------------------------------------
/easy-infer2.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import cpu_count
2 | import threading, pdb, librosa
3 | from time import sleep
4 | from subprocess import Popen
5 | from time import sleep
6 | import torch, os, traceback, sys, warnings, shutil, numpy as np
7 | import faiss
8 | from random import shuffle
9 | import scipy.io.wavfile as wavfile
10 | from mega import Mega
11 | from config import Config
12 | from pyngrok import ngrok
13 | now_dir = os.getcwd()
14 | sys.path.append(now_dir)
15 | tmp = os.path.join(now_dir, "TEMP")
16 | shutil.rmtree(tmp, ignore_errors=True)
17 | os.makedirs(tmp, exist_ok=True)
18 | os.makedirs("audios",exist_ok=True)
19 | os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
20 | os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
21 | os.environ["TEMP"] = tmp
22 | warnings.filterwarnings("ignore")
23 | torch.manual_seed(114514)
24 | from i18n import I18nAuto
25 | import ffmpeg
26 | import datetime
27 | import subprocess
28 |
29 | i18n = I18nAuto()
30 | # 判断是否有能用来训练和加速推理的N卡
31 | ncpu = cpu_count()
32 | ngpu = torch.cuda.device_count()
33 | gpu_infos = []
34 | mem = []
35 | if (not torch.cuda.is_available()) or ngpu == 0:
36 | if_gpu_ok = False
37 | else:
38 | if_gpu_ok = False
39 | for i in range(ngpu):
40 | gpu_name = torch.cuda.get_device_name(i)
41 | if (
42 | "10" in gpu_name
43 | or "16" in gpu_name
44 | or "20" in gpu_name
45 | or "30" in gpu_name
46 | or "40" in gpu_name
47 | or "A2" in gpu_name.upper()
48 | or "A3" in gpu_name.upper()
49 | or "A4" in gpu_name.upper()
50 | or "P4" in gpu_name.upper()
51 | or "A50" in gpu_name.upper()
52 | or "70" in gpu_name
53 | or "80" in gpu_name
54 | or "90" in gpu_name
55 | or "M4" in gpu_name.upper()
56 | or "T4" in gpu_name.upper()
57 | or "TITAN" in gpu_name.upper()
58 | ): # A10#A100#V100#A40#P40#M40#K80#A4500
59 | if_gpu_ok = True # 至少有一张能用的N卡
60 | gpu_infos.append("%s\t%s" % (i, gpu_name))
61 | mem.append(
62 | int(
63 | torch.cuda.get_device_properties(i).total_memory
64 | / 1024
65 | / 1024
66 | / 1024
67 | + 0.4
68 | )
69 | )
70 | if if_gpu_ok == True and len(gpu_infos) > 0:
71 | gpu_info = "\n".join(gpu_infos)
72 | default_batch_size = min(mem) // 2
73 | else:
74 | gpu_info = "很遗憾您这没有能用的显卡来支持您训练"
75 | default_batch_size = 1
76 | gpus = "-".join([i[0] for i in gpu_infos])
77 | from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
78 | from scipy.io import wavfile
79 | from fairseq import checkpoint_utils
80 | import gradio as gr
81 | import logging
82 | from vc_infer_pipeline import VC
83 | config = Config()
84 | from infer_uvr5 import _audio_pre_
85 | from my_utils import load_audio
86 | from train.process_ckpt import show_info, change_info, merge, extract_small_model
87 |
88 | # from trainset_preprocess_pipeline import PreProcess
89 | logging.getLogger("numba").setLevel(logging.WARNING)
90 |
91 |
92 | class ToolButton(gr.Button, gr.components.IOComponent):
93 | """Small button with single emoji as text, fits inside gradio forms"""
94 |
95 | def __init__(self, **kwargs):
96 | super().__init__(variant="tool", **kwargs)
97 |
98 | def get_block_name(self):
99 | return "button"
100 |
101 |
102 | hubert_model = None
103 |
104 |
105 | def load_hubert():
106 | global hubert_model
107 | models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
108 | ["hubert_base.pt"],
109 | suffix="",
110 | )
111 | hubert_model = models[0]
112 | hubert_model = hubert_model.to(config.device)
113 | if config.is_half:
114 | hubert_model = hubert_model.half()
115 | else:
116 | hubert_model = hubert_model.float()
117 | hubert_model.eval()
118 |
119 |
120 | weight_root = "weights"
121 | weight_uvr5_root = "uvr5_weights"
122 | names = []
123 | for name in os.listdir(weight_root):
124 | if name.endswith(".pth"):
125 | names.append(name)
126 |
127 | uvr5_names = []
128 | for name in os.listdir(weight_uvr5_root):
129 | if name.endswith(".pth"):
130 | uvr5_names.append(name.replace(".pth", ""))
131 |
132 | def find_parent(search_dir, file_name):
133 | for dirpath, dirnames, filenames in os.walk(search_dir):
134 | if file_name in filenames:
135 | return os.path.abspath(dirpath)
136 | return None
137 |
138 | def vc_single(
139 | sid,
140 | input_audio,
141 | f0_up_key,
142 | f0_file,
143 | f0_method,
144 | file_index,
145 | # file_big_npy,
146 | index_rate,
147 | crepe_hop_length
148 | ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
149 | global tgt_sr, net_g, vc, hubert_model
150 | if input_audio is None:
151 | return "You need to upload an audio", None
152 | f0_up_key = int(f0_up_key)
153 | try:
154 | parent_dir = find_parent(".",input_audio)
155 | print(parent_dir+'/'+input_audio)
156 | audio = load_audio(parent_dir+'/'+input_audio, 16000)
157 | times = [0, 0, 0]
158 | if hubert_model == None:
159 | load_hubert()
160 | if_f0 = cpt.get("f0", 1)
161 | try:
162 | file_index = (
163 | file_index.strip(" ")
164 | .strip('"')
165 | .strip("\n")
166 | .strip('"')
167 | .strip(" ")
168 | .replace("trained", "added")
169 | ) # 防止小白写错,自动帮他替换掉
170 | except:
171 | file_index=''
172 | print("Skipped index.")
173 | # file_big_npy = (
174 | # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
175 | # )
176 | audio_opt = vc.pipeline(
177 | hubert_model,
178 | net_g,
179 | sid,
180 | audio,
181 | times,
182 | f0_up_key,
183 | f0_method,
184 | file_index,
185 | # file_big_npy,
186 | index_rate,
187 | if_f0,
188 | crepe_hop_length,
189 | f0_file=f0_file,
190 | )
191 | print(
192 | "npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep=""
193 | )
194 | return "Success", (tgt_sr, audio_opt)
195 | except:
196 | info = traceback.format_exc()
197 | print(info)
198 | return info, (None, None)
199 |
200 |
201 | def vc_multi(
202 | sid,
203 | dir_path,
204 | opt_root,
205 | paths,
206 | f0_up_key,
207 | f0_method,
208 | file_index,
209 | # file_big_npy,
210 | index_rate,
211 | ):
212 | try:
213 | dir_path = (
214 | dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
215 | ) # 防止小白拷路径头尾带了空格和"和回车
216 | opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
217 | os.makedirs(opt_root, exist_ok=True)
218 | try:
219 | if dir_path != "":
220 | paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
221 | else:
222 | paths = [path.name for path in paths]
223 | except:
224 | traceback.print_exc()
225 | paths = [path.name for path in paths]
226 | infos = []
227 | file_index = (
228 | file_index.strip(" ")
229 | .strip('"')
230 | .strip("\n")
231 | .strip('"')
232 | .strip(" ")
233 | .replace("trained", "added")
234 | ) # 防止小白写错,自动帮他替换掉
235 | for path in paths:
236 | info, opt = vc_single(
237 | sid,
238 | path,
239 | f0_up_key,
240 | None,
241 | f0_method,
242 | file_index,
243 | # file_big_npy,
244 | index_rate,
245 | )
246 | if info == "Success":
247 | try:
248 | tgt_sr, audio_opt = opt
249 | wavfile.write(
250 | "%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt
251 | )
252 | except:
253 | info = traceback.format_exc()
254 | infos.append("%s->%s" % (os.path.basename(path), info))
255 | yield "\n".join(infos)
256 | yield "\n".join(infos)
257 | except:
258 | yield traceback.format_exc()
259 |
260 | # 一个选项卡全局只能有一个音色
261 | def get_vc(sid):
262 | global n_spk, tgt_sr, net_g, vc, cpt
263 | if sid == []:
264 | global hubert_model
265 | if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
266 | print("clean_empty_cache")
267 | del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
268 | hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
269 | if torch.cuda.is_available():
270 | torch.cuda.empty_cache()
271 | ###楼下不这么折腾清理不干净
272 | if_f0 = cpt.get("f0", 1)
273 | if if_f0 == 1:
274 | net_g = SynthesizerTrnMs256NSFsid(
275 | *cpt["config"], is_half=config.is_half
276 | )
277 | else:
278 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
279 | del net_g, cpt
280 | if torch.cuda.is_available():
281 | torch.cuda.empty_cache()
282 | cpt = None
283 | return {"visible": False, "__type__": "update"}
284 | person = "%s/%s" % (weight_root, sid)
285 | print("loading %s" % person)
286 | cpt = torch.load(person, map_location="cpu")
287 | tgt_sr = cpt["config"][-1]
288 | cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
289 | if_f0 = cpt.get("f0", 1)
290 | if if_f0 == 1:
291 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
292 | else:
293 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
294 | del net_g.enc_q
295 | print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
296 | net_g.eval().to(config.device)
297 | if config.is_half:
298 | net_g = net_g.half()
299 | else:
300 | net_g = net_g.float()
301 | vc = VC(tgt_sr, config)
302 | n_spk = cpt["config"][-3]
303 | return {"visible": True, "maximum": n_spk, "__type__": "update"}
304 |
305 |
306 | def change_choices():
307 | names = []
308 | for name in os.listdir(weight_root):
309 | if name.endswith(".pth"):
310 | names.append(name)
311 | return {"choices": sorted(names), "__type__": "update"}
312 |
313 | def change_choices2():
314 | audio_files=[]
315 | for filename in os.listdir("./audios"):
316 | if filename.endswith(('.wav','.mp3')):
317 | audio_files.append(filename)
318 | return {"choices": sorted(audio_files), "__type__": "update"}
319 |
320 | def clean():
321 | return {"value": "", "__type__": "update"}
322 |
323 | def change_sr2(sr2, if_f0_3):
324 | if if_f0_3 == "是":
325 | return "pretrained/f0G%s.pth" % sr2, "pretrained/f0D%s.pth" % sr2
326 | else:
327 | return "pretrained/G%s.pth" % sr2, "pretrained/D%s.pth" % sr2
328 |
329 | def get_index():
330 | if check_for_name() != '':
331 | if config.iscolab:
332 | chosen_model=sorted(names)[0].split(".")[0]
333 | logs_path="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+chosen_model
334 | for file in os.listdir(logs_path):
335 | if file.endswith(".index"):
336 | return os.path.join(logs_path, file)
337 | return ''
338 | else:
339 | return ''
340 |
341 | def get_indexes():
342 | indexes_list=[]
343 | if config.iscolab:
344 | for dirpath, dirnames, filenames in os.walk("/content/Retrieval-based-Voice-Conversion-WebUI/logs/"):
345 | for filename in filenames:
346 | if filename.endswith(".index"):
347 | indexes_list.append(os.path.join(dirpath,filename))
348 | return indexes_list
349 | else:
350 | return ''
351 |
352 | audio_files=[]
353 | for filename in os.listdir("./audios"):
354 | if filename.endswith(('.wav','.mp3')):
355 | audio_files.append(filename)
356 |
357 | def get_name():
358 | if len(audio_files) > 0:
359 | return sorted(audio_files)[0]
360 | else:
361 | return ''
362 |
363 | def save_to_wav(record_button):
364 | if record_button is None:
365 | pass
366 | else:
367 | path_to_file=record_button
368 | new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
369 | new_path='./audios/'+new_name
370 | shutil.move(path_to_file,new_path)
371 | return new_name
372 |
373 | def save_to_wav2(dropbox):
374 | file_path=dropbox.name
375 | shutil.move(file_path,'./audios')
376 | return os.path.basename(file_path)
377 |
378 | def match_index(speaker):
379 | folder=speaker.split(".")[0]
380 | parent_dir="/content/Retrieval-based-Voice-Conversion-WebUI/logs/"+folder
381 | for filename in os.listdir(parent_dir):
382 | if filename.endswith(".index"):
383 | index_path=os.path.join(parent_dir,filename)
384 | return index_path
385 |
386 | def download_from_url(url, model):
387 | url = url.strip()
388 | if url == '':
389 | return "URL cannot be left empty."
390 | zip_dirs = ["zips", "unzips"]
391 | for directory in zip_dirs:
392 | if os.path.exists(directory):
393 | shutil.rmtree(directory)
394 | os.makedirs("zips", exist_ok=True)
395 | os.makedirs("unzips", exist_ok=True)
396 | zipfile = model + '.zip'
397 | zipfile_path = './zips/' + zipfile
398 | MODELEPOCH = ''
399 | if "drive.google.com" in url:
400 | subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
401 | elif "mega.nz" in url:
402 | m = Mega()
403 | m.download_url(url, './zips')
404 | else:
405 | subprocess.run(["wget", url, "-O", f"./zips/{zipfile}"])
406 | for filename in os.listdir("./zips"):
407 | if filename.endswith(".zip"):
408 | zipfile_path = os.path.join("./zips/",filename)
409 | shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
410 | else:
411 | return "No zipfile found."
412 | for root, dirs, files in os.walk('./unzips'):
413 | for file in files:
414 | if "G_" in file:
415 | MODELEPOCH = file.split("G_")[1].split(".")[0]
416 | if MODELEPOCH == '':
417 | MODELEPOCH = '404'
418 | for file in files:
419 | file_path = os.path.join(root, file)
420 | if file.endswith(".npy") or file.endswith(".index"):
421 | subprocess.run(["mkdir", "-p", f"./logs/{model}"])
422 | subprocess.run(["mv", file_path, f"./logs/{model}/"])
423 | elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
424 | subprocess.run(["mv", file_path, f"./weights/{model}.pth"])
425 | shutil.rmtree("zips")
426 | shutil.rmtree("unzips")
427 | return "Success."
428 |
429 | def check_for_name():
430 | if len(names) > 0:
431 | return sorted(names)[0]
432 | else:
433 | return ''
434 | print(check_for_name())
435 | #with gr.Blocks() as app
436 | with gr.Blocks(theme=gr.themes.Base()) as app:
437 | with gr.Tab("Inference"):
438 | with gr.Row():
439 | sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name())
440 | refresh_button = gr.Button("Refresh", variant="primary", size='sm')
441 | refresh_button.click(fn=change_choices, inputs=[], outputs=[sid0])
442 | if check_for_name() != '':
443 | get_vc(sorted(names)[0])
444 | else:
445 | print("Starting without preloaded Model.")
446 | vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0)
447 | #clean_button = gr.Button("Unload Voice to Save Memory", variant="primary")
448 | spk_item = gr.Slider(minimum=0,maximum=2333,step=1,label="Please select speaker id",value=0,visible=False,interactive=True)
449 | #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
450 | sid0.change(
451 | fn=get_vc,
452 | inputs=[sid0],
453 | outputs=[],
454 | )
455 | but0 = gr.Button("Convert", variant="primary")
456 | with gr.Row():
457 | with gr.Column():
458 | with gr.Row():
459 | dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
460 | with gr.Row():
461 | record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
462 | with gr.Row():
463 | #input_audio0 = gr.Textbox(label="Enter the Path to the Audio File to be Processed (e.g. /content/youraudio.wav)",value="/content/youraudio.wav")
464 | input_audio0 = gr.Dropdown(choices=sorted(audio_files), label="2.Choose your audio.", value=get_name())
465 | dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
466 | dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
467 | refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
468 | refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0])
469 | record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
470 | record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
471 | with gr.Column():
472 | with gr.Accordion(label="Feature Settings", open=False):
473 | file_index1 = gr.Dropdown(
474 | label="3. Path to your added.index file (if it didn't automatically find it.)",
475 | value=get_index(),
476 | choices=get_indexes(),
477 | interactive=True,
478 | visible=True
479 | )
480 | index_rate1 = gr.Slider(
481 | minimum=0,
482 | maximum=1,
483 | label="Strength:",
484 | value=0.69,
485 | interactive=True,
486 | )
487 | sid0.change(fn=match_index, inputs=[sid0], outputs=[file_index1])
488 |
489 | with gr.Row():
490 | vc_output2 = gr.Audio(label="Output Audio (Click on the Three Dots in the Right Corner to Download)")
491 | with gr.Row():
492 | f0method0 = gr.Radio(
493 | label="Optional: Change the Pitch Extraction Algorithm. Use PM for fast results or Harvest for better low range (but it's extremely slow) or Crepe for the best of both worlds.",
494 | choices=["pm", "harvest","crepe"],
495 | value="crepe",
496 | interactive=True,
497 | )
498 | crepe_hop_length = gr.Slider(
499 | minimum=1,
500 | maximum=512,
501 | step=1,
502 | label=i18n("crepe_hop_length"),
503 | value=128,
504 | interactive=True,
505 | visible=False
506 | )
507 | with gr.Row():
508 | vc_output1 = gr.Textbox(label="")
509 | with gr.Row():
510 | instructions = gr.Markdown("""
511 | This is simply a modified version of the RVC GUI found here:
512 | https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI
513 | """)
514 | f0_file = gr.File(label="F0 Curve File (Optional, One Pitch Per Line, Replaces Default F0 and Pitch Shift)", visible=False)
515 | but0.click(
516 | vc_single,
517 | [
518 | spk_item,
519 | input_audio0,
520 | vc_transform0,
521 | f0_file,
522 | f0method0,
523 | file_index1,
524 | index_rate1,
525 | crepe_hop_length
526 | ],
527 | [vc_output1, vc_output2]
528 | )
529 | with gr.Tab("Download Model"):
530 | with gr.Row():
531 | url=gr.Textbox(label="Enter the URL to the Model:")
532 | with gr.Row():
533 | model = gr.Textbox(label="Name your model:")
534 | download_button=gr.Button(label="Download")
535 | with gr.Row():
536 | status_bar=gr.Textbox(label="")
537 | download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
538 |
539 | if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE)
540 | try:
541 | public_url = ngrok.connect(7860)
542 | print('Click on THIS link: '+public_url)
543 | except:
544 | print('Failed to create ngrok URL')
545 | try:
546 | app.launch(share=True)
547 | except KeyboardInterrupt:
548 | ngrok.kill()
549 | else:
550 | app.launch(
551 | server_name="0.0.0.0",
552 | inbrowser=not config.noautoopen,
553 | server_port=config.listen_port,
554 | quiet=True,
555 | )
556 |
--------------------------------------------------------------------------------
/filefinder:
--------------------------------------------------------------------------------
1 | import os
2 | def find_at(file_name, search_path='/'):
3 | found_files = []
4 | for root, dirs, files in os.walk(search_path):
5 | if file_name in files:
6 | found_files.append(os.path.join(root, file_name))
7 | return found_files
8 |
--------------------------------------------------------------------------------
/myinfer.py:
--------------------------------------------------------------------------------
1 | '''
2 | runtime\python.exe myinfer.py 0 "E:\codes\py39\RVC-beta\todo-songs\1111.wav" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "test.wav" "weights/mi-test.pth" 0.6 cuda:0 True
3 | '''
4 | import os,sys,pdb,torch
5 | now_dir = os.getcwd()
6 | sys.path.append(now_dir)
7 | import argparse
8 | import glob
9 | import sys
10 | import torch
11 | from multiprocessing import cpu_count
12 | class Config:
13 | def __init__(self,device,is_half):
14 | self.device = device
15 | self.is_half = is_half
16 | self.n_cpu = 0
17 | self.gpu_name = None
18 | self.gpu_mem = None
19 | self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
20 |
21 | def device_config(self) -> tuple:
22 | if torch.cuda.is_available():
23 | i_device = int(self.device.split(":")[-1])
24 | self.gpu_name = torch.cuda.get_device_name(i_device)
25 | if (
26 | ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
27 | or "P40" in self.gpu_name.upper()
28 | or "1060" in self.gpu_name
29 | or "1070" in self.gpu_name
30 | or "1080" in self.gpu_name
31 | ):
32 | print("16系/10系显卡和P40强制单精度")
33 | self.is_half = False
34 | for config_file in ["32k.json", "40k.json", "48k.json"]:
35 | with open(f"configs/{config_file}", "r") as f:
36 | strr = f.read().replace("true", "false")
37 | with open(f"configs/{config_file}", "w") as f:
38 | f.write(strr)
39 | with open("trainset_preprocess_pipeline_print.py", "r") as f:
40 | strr = f.read().replace("3.7", "3.0")
41 | with open("trainset_preprocess_pipeline_print.py", "w") as f:
42 | f.write(strr)
43 | else:
44 | self.gpu_name = None
45 | self.gpu_mem = int(
46 | torch.cuda.get_device_properties(i_device).total_memory
47 | / 1024
48 | / 1024
49 | / 1024
50 | + 0.4
51 | )
52 | if self.gpu_mem <= 4:
53 | with open("trainset_preprocess_pipeline_print.py", "r") as f:
54 | strr = f.read().replace("3.7", "3.0")
55 | with open("trainset_preprocess_pipeline_print.py", "w") as f:
56 | f.write(strr)
57 | elif torch.backends.mps.is_available():
58 | print("没有发现支持的N卡, 使用MPS进行推理")
59 | self.device = "mps"
60 | else:
61 | print("没有发现支持的N卡, 使用CPU进行推理")
62 | self.device = "cpu"
63 | self.is_half = True
64 |
65 | if self.n_cpu == 0:
66 | self.n_cpu = cpu_count()
67 |
68 | if self.is_half:
69 | # 6G显存配置
70 | x_pad = 3
71 | x_query = 10
72 | x_center = 60
73 | x_max = 65
74 | else:
75 | # 5G显存配置
76 | x_pad = 1
77 | x_query = 6
78 | x_center = 38
79 | x_max = 41
80 |
81 | if self.gpu_mem != None and self.gpu_mem <= 4:
82 | x_pad = 1
83 | x_query = 5
84 | x_center = 30
85 | x_max = 32
86 |
87 | return x_pad, x_query, x_center, x_max
88 |
89 | f0up_key=sys.argv[1]
90 | input_path=sys.argv[2]
91 | index_path=sys.argv[3]
92 | f0method=sys.argv[4]#harvest or pm
93 | opt_path=sys.argv[5]
94 | model_path=sys.argv[6]
95 | index_rate=float(sys.argv[7])
96 | device=sys.argv[8]
97 | is_half=bool(sys.argv[9])
98 | print(sys.argv)
99 | config=Config(device,is_half)
100 | now_dir=os.getcwd()
101 | sys.path.append(now_dir)
102 | from vc_infer_pipeline import VC
103 | from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
104 | from my_utils import load_audio
105 | from fairseq import checkpoint_utils
106 | from scipy.io import wavfile
107 |
108 | hubert_model=None
109 | def load_hubert():
110 | global hubert_model
111 | models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
112 | hubert_model = models[0]
113 | hubert_model = hubert_model.to(device)
114 | if(is_half):hubert_model = hubert_model.half()
115 | else:hubert_model = hubert_model.float()
116 | hubert_model.eval()
117 |
118 | def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,index_rate):
119 | global tgt_sr,net_g,vc,hubert_model
120 | if input_audio is None:return "You need to upload an audio", None
121 | f0_up_key = int(f0_up_key)
122 | audio=load_audio(input_audio,16000)
123 | times = [0, 0, 0]
124 | if(hubert_model==None):load_hubert()
125 | if_f0 = cpt.get("f0", 1)
126 | # audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file)
127 | audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,index_rate,if_f0,f0_file=f0_file)
128 | print(times)
129 | return audio_opt
130 |
131 |
132 | def get_vc(model_path):
133 | global n_spk,tgt_sr,net_g,vc,cpt,device,is_half
134 | print("loading pth %s"%model_path)
135 | cpt = torch.load(model_path, map_location="cpu")
136 | tgt_sr = cpt["config"][-1]
137 | cpt["config"][-3]=cpt["weight"]["emb_g.weight"].shape[0]#n_spk
138 | if_f0=cpt.get("f0",1)
139 | if(if_f0==1):
140 | net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
141 | else:
142 | net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
143 | del net_g.enc_q
144 | print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净,真奇葩
145 | net_g.eval().to(device)
146 | if (is_half):net_g = net_g.half()
147 | else:net_g = net_g.float()
148 | vc = VC(tgt_sr, config)
149 | n_spk=cpt["config"][-3]
150 | # return {"visible": True,"maximum": n_spk, "__type__": "update"}
151 |
152 |
153 | get_vc(model_path)
154 | wav_opt=vc_single(0,input_path,f0up_key,None,f0method,index_path,index_rate)
155 | wavfile.write(opt_path, tgt_sr, wav_opt)
156 |
--------------------------------------------------------------------------------
/ngrokabled.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RejektsAI/EVC/f00f1aab704a8df00df2710fc9270b788ec474b7/ngrokabled.zip
--------------------------------------------------------------------------------
/somegirl.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RejektsAI/EVC/f00f1aab704a8df00df2710fc9270b788ec474b7/somegirl.mp3
--------------------------------------------------------------------------------
/someguy.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RejektsAI/EVC/f00f1aab704a8df00df2710fc9270b788ec474b7/someguy.mp3
--------------------------------------------------------------------------------
/wav2lip-HD.tar.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:66589c1796b79dd3ded0e6df5345317a3a477a378a1ac0734792e79581e47064
3 | size 405266588
4 |
--------------------------------------------------------------------------------
/wav2lip-cache.tar.gz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:99c6e934981cded0621470a77c9d9979f319e0eb5624f0993d176846c86fd42e
3 | size 3476760
4 |
--------------------------------------------------------------------------------