├── ClipVideo ├── README.md ├── clipvideo │ ├── Dataset_generator.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── argparse_tools.cpython-310.pyc │ │ ├── subtitle_utils.cpython-310.pyc │ │ ├── trans_utils.cpython-310.pyc │ │ └── videoclipper.cpython-310.pyc │ ├── argparse_tools.py │ ├── gradio_service.py │ ├── imagemagick_test.py │ ├── subtitle_utils.py │ ├── test.sh │ ├── trans_utils.py │ └── videoclipper.py ├── docs │ └── images │ │ └── show.png └── requirments.txt ├── FunASR-APP-LICENSE ├── FunASR-APP-README.md ├── LICENSE ├── README.md └── TransAudio ├── README.md └── funasr-runtime-deploy.sh /ClipVideo/README.md: -------------------------------------------------------------------------------- 1 | ## ClipVideo 2 | 3 | As the first application toolkit based of FunASR-APP, ClipVideo enables users to clip ```.mp4``` video files or ```.wav``` audio files with chosen text segments out of the recognition results generated by [Paraformer-long model](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary). 4 | 5 | Under the help of ClipVideo you can get the video clips easily with the following steps (in Gradio service): 6 | - Step1: Chose your video file (or try the example videos below) 7 | - Step2: Copy the text segments you need to 'Text to Clip' 8 | - Step3: Adjust subtitle settings (if needed) 9 | - Step4: Click 'Clip' or 'Clip and Generate Subtitles' 10 | 11 | ### Usage 12 | ```shell 13 | git clone https://github.com/alibaba-damo-academy/FunASR-APP.git 14 | cd FunASR-APP 15 | # install modelscope 16 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html 17 | # python environments 18 | pip install -r ClipVideo/requirments.txt 19 | ``` 20 | (Optional) If you want to clip video file with embedded subtitles 21 | 22 | 1. ffmpeg and imagemagick is required 23 | 24 | - On Ubuntu 25 | ```shell 26 | apt-get -y update && apt-get -y install ffmpeg imagemagick 27 | sed -i 's/none/read,write/g' /etc/ImageMagick-6/policy.xml 28 | ``` 29 | - On MacOS 30 | ```shell 31 | brew install imagemagick 32 | sed -i 's/none/read,write/g' /usr/local/Cellar/imagemagick/7.1.1-8_1/etc/ImageMagick-7/policy.xml 33 | ``` 34 | 2. Download font file to ClipVideo/font 35 | 36 | ```shell 37 | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/STHeitiMedium.ttc -O ClipVideo/font/STHeitiMedium.ttc 38 | ``` 39 | 40 | #### Experience ClipVideo in Modelscope 41 | You can try ClipVideo in modelscope space: [link](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary). 42 | 43 | #### Use ClipVideo by Gradio Service 44 | You can establish your own ClipVideo service which is same as [Modelscope Space](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary) as follow: 45 | ```shell 46 | python clipvideo/gradio_service.py 47 | ``` 48 | then visit ```localhost:7860``` you will get a Gradio service like below and you can use ClipVideo following the steps: 49 |

50 | 51 | #### Use ClipVideo in command line 52 | ClipVideo supports you to recognize and clip with commands: 53 | ```shell 54 | # working in ClipVideo/ 55 | # step1: Recognize 56 | python clipvideo/videoclipper.py --stage 1 \ 57 | --file examples/2022云栖大会_片段.mp4 \ 58 | --output_dir ./output 59 | # now you can find recognition results and entire SRT file in ./output/ 60 | # step2: Clip 61 | python clipvideo/videoclipper.py --stage 2 \ 62 | --file examples/2022云栖大会_片段.mp4 \ 63 | --output_dir ./output \ 64 | --dest_text '我们把它跟乡村振兴去结合起来，利用我们的设计的能力' \ 65 | --start_ost 0 \ 66 | --end_ost 100 \ 67 | --output_file './output/res.mp4' 68 | ``` 69 | -------------------------------------------------------------------------------- /ClipVideo/clipvideo/Dataset_generator.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import soundfile as sf 3 | from videoclipper import VideoClipper 4 | import os 5 | from tqdm import tqdm 6 | import subprocess 7 | from modelscope.pipelines import pipeline 8 | from modelscope.utils.constant import Tasks 9 | from shutil import copyfile, rmtree 10 | 11 | current_directory = os.path.dirname(os.path.abspath(__file__)) 12 | inference_pipeline = pipeline( 13 | task=Tasks.auto_speech_recognition, 14 | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 15 | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch', 16 | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', 17 | ) 18 | video_tools = VideoClipper(inference_pipeline) 19 | audio_clipper = VideoClipper(None) 20 | STAGE_RECOGNIZE = 1 21 | STAGE_CLIP = 2 22 | 23 | # 开始偏移量 24 | START_OST = 0 25 | # 结束偏移量 26 | END_OST = 0 27 | 28 | FONT_SIZE = 32 29 | FONT_COLOR = 'white' 30 | ADD_SUB = False 31 | OUTPUT_SRT_FILE = f"{current_directory}/output/srt/" 32 | OUTPUT_MP4_FILE = f"{current_directory}/output/mp4/" 33 | OUTPUT_WAV_FILE = f"{current_directory}/output/wav/" 34 | INPUT_FILE_PATH = f"{current_directory}/video_files/" 35 | READY_INPUT_WAV_PATH = f"{current_directory}/input/mdx_extra/" 36 | DEMUCS_TARGET_INPUT_PATH = f"{current_directory}/input/" 37 | ORG_INPUT_WAV_PATH = f"{current_directory}/org_wav/" 38 | 39 | 40 | # 获取所有文件 41 | def get_all_files_in_directory(directory): 42 | print(f"from :{current_directory}{directory} ") 43 | file_paths_list = [] 44 | for root, dirs, files in os.walk(directory): 45 | for file in files: 46 | file_path = os.path.join(root, file) 47 | file_paths_list.append(file_path) 48 | return file_paths_list 49 | 50 | 51 | # 获取所有的字幕和字幕文件路径 52 | def vidio_recognizing_to_get_srt_list(): 53 | video_list = get_all_files_in_directory(READY_INPUT_WAV_PATH) 54 | srt_list = [] 55 | srt_file_list = [] 56 | state_json = {} 57 | for video_path in tqdm(video_list): 58 | try: 59 | wav = librosa.load(video_path, sr=16000)[0] 60 | res_text, res_srt, state = video_tools.recog((16000, wav)) 61 | state_json.update({str(video_path): state}) 62 | except Exception as e: 63 | print(f"音频转译错误 {video_path}: {e}") 64 | continue 65 | srt_file_name = os.path.splitext(os.path.basename(video_path))[0] + '.srt' 66 | srt_save_path = os.path.join(OUTPUT_SRT_FILE, srt_file_name) 67 | srt_list.append(res_srt) 68 | try: 69 | with open(srt_save_path, "w", encoding="utf-8") as f: 70 | f.write(res_srt) 71 | srt_file_list.append(srt_save_path) 72 | print(str(srt_save_path) + "写入") 73 | except Exception as e: 74 | print(f"Error writing SRT file {srt_save_path}: {e}") 75 | return srt_list, srt_file_list, state_json 76 | 77 | 78 | # 视频转音频 79 | def mp4_to_wav(input_path): 80 | output_file_name = os.path.splitext(os.path.basename(input_path))[0] + '.wav' 81 | output_path = os.path.join(ORG_INPUT_WAV_PATH, output_file_name) 82 | subprocess.run([ 83 | "ffmpeg", "-i", input_path, "-acodec", "pcm_s16le", "-ar", "16000", output_path 84 | ]) 85 | print(f"转换完成: {input_path} -> {output_path}") 86 | 87 | 88 | # 获取所有文字大列表 89 | def extract_subtitle_text_list_from_srt(): 90 | srt_list, srt_file_path_list, state_json = vidio_recognizing_to_get_srt_list() 91 | all_srt_text_json = {} 92 | index = 0 93 | for srt_file_path in srt_file_path_list: 94 | single_subtitle_text_list = [] 95 | with open(srt_file_path, "r", encoding="utf-8") as srt_file: 96 | lines = srt_file.readlines() 97 | for i in range(2, len(lines), 3): 98 | subtitle_text = lines[i].strip() 99 | single_subtitle_text_list.append(subtitle_text) 100 | index = index + 1 101 | wav_file_path = READY_INPUT_WAV_PATH + os.path.splitext(os.path.basename(srt_file_path))[0] + '.wav' 102 | all_srt_text_json.update({str(wav_file_path): single_subtitle_text_list}) 103 | print(f"总文字数量：{index}") 104 | return all_srt_text_json, state_json 105 | 106 | 107 | # 通过文字去剪裁音频并保存 108 | def clip_audio_from_srt(): 109 | all_srt_text_json, state_json = extract_subtitle_text_list_from_srt() 110 | print(str(state_json)) 111 | if len(all_srt_text_json) == 0 or len(state_json) == 0: 112 | print("字幕文件为空，请检查音频文件内是否存在语音。") 113 | return None 114 | for wav_file_path in all_srt_text_json: 115 | for one_line_text in all_srt_text_json[wav_file_path]: 116 | state = state_json[str(wav_file_path)] 117 | wav_file_name = OUTPUT_WAV_FILE + str(one_line_text) + '_clip.wav' 118 | try: 119 | (sr, audio), message, srt_clip = video_tools.clip(dest_text=str(one_line_text), start_ost=START_OST, 120 | end_ost=END_OST, 121 | state=state) 122 | if "No period found in the speech" not in message: 123 | print(f"{one_line_text} in {wav_file_path} 完成") 124 | sf.write(wav_file_name, audio, 16000) 125 | except Exception as e: 126 | print(f"{one_line_text} in {wav_file_path}切割失败，原因：{e}") 127 | 128 | 129 | # 清空生成物，初始化文件夹 130 | def clean_files(): 131 | rmtree(f"{current_directory}/input/mdx_extra/") 132 | os.mkdir(f"{current_directory}/input/mdx_extra/") 133 | rmtree(f"{current_directory}/org_wav/") 134 | os.mkdir(f"{current_directory}/org_wav/") 135 | rmtree(f"{current_directory}/output/mp4/") 136 | os.mkdir(f"{current_directory}/output/mp4/") 137 | rmtree(f"{current_directory}/output/srt/") 138 | os.mkdir(f"{current_directory}/output/srt/") 139 | rmtree(f"{current_directory}/output/wav/") 140 | os.mkdir(f"{current_directory}/output/wav/") 141 | print("初始化完成") 142 | 143 | 144 | # 降噪 145 | def demucs_wav(): 146 | for org_wav_file in get_all_files_in_directory(ORG_INPUT_WAV_PATH): 147 | target_ready_wav_path = os.path.splitext(os.path.basename(org_wav_file))[0] + ".wav" 148 | os.system( 149 | f"demucs -n mdx_extra -o {DEMUCS_TARGET_INPUT_PATH} --filename {target_ready_wav_path} {org_wav_file}") 150 | 151 | 152 | # 预备…… 153 | def ready_all_to_wav(): 154 | files_list = get_all_files_in_directory(INPUT_FILE_PATH) 155 | for file in files_list: 156 | audio_suffixs = ['wav'] 157 | video_suffixs = ['mp4'] 158 | if file[-3:] in audio_suffixs: 159 | target_wav_path = os.path.join(ORG_INPUT_WAV_PATH, os.path.basename(file)) 160 | copyfile(file, target_wav_path) 161 | elif file[-3:] in video_suffixs: 162 | mp4_to_wav(file) 163 | else: 164 | print(f"只支持.wav和.mp4！Only supports .wav and .mp4") 165 | 166 | 167 | # 润！ 168 | def run(): 169 | clean_files() 170 | ready_all_to_wav() 171 | demucs_wav() 172 | clip_audio_from_srt() 173 | 174 | 175 | # 面函数 176 | if __name__ == '__main__': 177 | run() 178 | -------------------------------------------------------------------------------- /ClipVideo/clipvideo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__init__.py -------------------------------------------------------------------------------- /ClipVideo/clipvideo/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ClipVideo/clipvideo/__pycache__/argparse_tools.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/argparse_tools.cpython-310.pyc -------------------------------------------------------------------------------- /ClipVideo/clipvideo/__pycache__/subtitle_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/subtitle_utils.cpython-310.pyc -------------------------------------------------------------------------------- /ClipVideo/clipvideo/__pycache__/trans_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/trans_utils.cpython-310.pyc -------------------------------------------------------------------------------- /ClipVideo/clipvideo/__pycache__/videoclipper.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/videoclipper.cpython-310.pyc -------------------------------------------------------------------------------- /ClipVideo/clipvideo/argparse_tools.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | import yaml 5 | import sys 6 | 7 | 8 | class ArgumentParser(argparse.ArgumentParser): 9 | """Simple implementation of ArgumentParser supporting config file 10 | 11 | This class is originated from https://github.com/bw2/ConfigArgParse, 12 | but this class is lack of some features that it has. 13 | 14 | - Not supporting multiple config files 15 | - Automatically adding "--config" as an option. 16 | - Not supporting any formats other than yaml 17 | - Not checking argument type 18 | 19 | """ 20 | 21 | def __init__(self, *args, **kwargs): 22 | super().__init__(*args, **kwargs) 23 | self.add_argument("--config", help="Give config file in yaml format") 24 | 25 | def parse_known_args(self, args=None, namespace=None): 26 | # Once parsing for setting from "--config" 27 | _args, _ = super().parse_known_args(args, namespace) 28 | if _args.config is not None: 29 | if not Path(_args.config).exists(): 30 | self.error(f"No such file: {_args.config}") 31 | 32 | with open(_args.config, "r", encoding="utf-8") as f: 33 | d = yaml.safe_load(f) 34 | if not isinstance(d, dict): 35 | self.error("Config file has non dict value: {_args.config}") 36 | 37 | for key in d: 38 | for action in self._actions: 39 | if key == action.dest: 40 | break 41 | else: 42 | self.error(f"unrecognized arguments: {key} (from {_args.config})") 43 | 44 | # NOTE(kamo): Ignore "--config" from a config file 45 | # NOTE(kamo): Unlike "configargparse", this module doesn't check type. 46 | # i.e. We can set any type value regardless of argument type. 47 | self.set_defaults(**d) 48 | return super().parse_known_args(args, namespace) 49 | 50 | 51 | def get_commandline_args(): 52 | extra_chars = [ 53 | " ", 54 | ";", 55 | "&", 56 | "(", 57 | ")", 58 | "|", 59 | "^", 60 | "<", 61 | ">", 62 | "?", 63 | "*", 64 | "[", 65 | "]", 66 | "$", 67 | "`", 68 | '"', 69 | "\\", 70 | "!", 71 | "{", 72 | "}", 73 | ] 74 | 75 | # Escape the extra characters for shell 76 | argv = [ 77 | arg.replace("'", "'\\''") 78 | if all(char not in arg for char in extra_chars) 79 | else "'" + arg.replace("'", "'\\''") + "'" 80 | for arg in sys.argv 81 | ] 82 | 83 | return sys.executable + " " + " ".join(argv) -------------------------------------------------------------------------------- /ClipVideo/clipvideo/gradio_service.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from modelscope.pipelines import pipeline 3 | from modelscope.utils.constant import Tasks 4 | from videoclipper import VideoClipper 5 | 6 | 7 | if __name__ == "__main__": 8 | inference_pipeline = pipeline( 9 | task=Tasks.auto_speech_recognition, 10 | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 11 | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch', 12 | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', 13 | ) 14 | audio_clipper = VideoClipper(inference_pipeline) 15 | 16 | def audio_recog(audio_input): 17 | return audio_clipper.recog(audio_input) 18 | 19 | def audio_clip(dest_text, start_ost, end_ost, state): 20 | return audio_clipper.clip(dest_text, start_ost, end_ost, state) 21 | 22 | def video_recog(video_input): 23 | return audio_clipper.video_recog(video_input) 24 | 25 | def video_clip(dest_text, start_ost, end_ost, state): 26 | return audio_clipper.video_clip(dest_text, start_ost, end_ost, state) 27 | 28 | def video_clip_addsub(dest_text, start_ost, end_ost, state, font_size, font_color): 29 | return audio_clipper.video_clip(dest_text, start_ost, end_ost, state, font_size, font_color, add_sub=True) 30 | 31 | ''' 32 | top_md_1 = (""" 33 | 基于达摩院自研Paraformer-长音频版的语音识别、端点检测、标点预测、时间戳功能 34 | 35 | 准确识别，自由复制所需段落并一键裁剪、添加字幕 36 | 37 | * Step1: 上传视频文件（或使用下方的用例体验），点击 **识别** 按钮 38 | * Step2: 复制识别结果中所需的文字至右上方，设置偏移与字幕配置（可选） 39 | * Step3: 点击 **裁剪** 按钮或 **裁剪并添加字幕** 按钮获得结果 40 | """) 41 | ''' 42 | 43 | top_md_2 = (""" 44 | 受到网络传输与服务资源的限制，用于体验的视频最好大小在40mb以下 45 | 过大的视频可以尝试分离音轨使用音频剪辑，或 **通过源代码将您的ClipVideo服务部署在本地（推荐）** ： 46 |

47 |

48 | FunASR_APP:

49 | 🌟支持我们:

50 |

51 |

52 | """) 53 | 54 | top_md_3 = ("""访问FunASR项目与论文能够帮助您深入了解ClipVideo中所使用的语音处理相关模型： 55 |

56 |

57 | FunASR:

58 | FunASR Paper:

59 | 🌟Star FunASR:

60 |

61 |

62 | """) 63 | 64 | # gradio interface 65 | with gr.Blocks() as demo: 66 | #gr.Image("./examples/guide.png", show_label=False) 67 | # gr.Markdown(top_md_1) 68 | #gr.Markdown(top_md_2) 69 | #gr.Markdown(top_md_3) 70 | video_state = gr.State() 71 | audio_state = gr.State() 72 | with gr.Tab("🎥✂️视频裁剪 Video Clipping"): 73 | with gr.Row(): 74 | with gr.Column(): 75 | video_input = gr.Video(label="🎥视频输入 Video Input") 76 | gr.Examples(['https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/%E4%B8%BA%E4%BB%80%E4%B9%88%E8%A6%81%E5%A4%9A%E8%AF%BB%E4%B9%A6%EF%BC%9F%E8%BF%99%E6%98%AF%E6%88%91%E5%90%AC%E8%BF%87%E6%9C%80%E5%A5%BD%E7%9A%84%E7%AD%94%E6%A1%88-%E7%89%87%E6%AE%B5.mp4', 77 | 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/2022%E4%BA%91%E6%A0%96%E5%A4%A7%E4%BC%9A_%E7%89%87%E6%AE%B5.mp4', 78 | 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/2022%E4%BA%91%E6%A0%96%E5%A4%A7%E4%BC%9A_%E7%89%87%E6%AE%B52.mp4', 79 | 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/%E4%BD%BF%E7%94%A8chatgpt_%E7%89%87%E6%AE%B5.mp4'], 80 | [video_input]) 81 | recog_button2 = gr.Button("👂识别 Recognize") 82 | video_text_output = gr.Textbox(label="✏️识别结果 Recognition Result") 83 | video_srt_output = gr.Textbox(label="📖SRT字幕内容 RST Subtitles") 84 | with gr.Column(): 85 | video_text_input = gr.Textbox(label="✏️待裁剪文本 Text to Clip (多段文本使用'#'连接)") 86 | with gr.Row(): 87 | video_start_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, label="⏪开始位置偏移 Start Offset (ms)") 88 | video_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, label="⏩结束位置偏移 End Offset (ms)") 89 | with gr.Row(): 90 | font_size = gr.Slider(minimum=10, maximum=100, value=32, step=2, label="🔠字幕字体大小 Subtitle Font Size") 91 | font_color = gr.Radio(["black", "white", "green", "red"], label="🌈字幕颜色 Subtitle Color", value='white') 92 | # font = gr.Radio(["黑体", "Alibaba Sans"], label="字体 Font") 93 | with gr.Row(): 94 | clip_button2 = gr.Button("✂️裁剪\nClip") 95 | clip_button3 = gr.Button("✂️裁剪并添加字幕\nClip and Generate Subtitles") 96 | video_output = gr.Video(label="🎥裁剪结果 Audio Clipped") 97 | video_mess_output = gr.Textbox(label="ℹ️裁剪信息 Clipping Log") 98 | video_srt_clip_output = gr.Textbox(label="📖裁剪部分SRT字幕内容 Clipped RST Subtitles") 99 | 100 | with gr.Tab("🔊✂️音频裁剪 Audio Clipping"): 101 | with gr.Row(): 102 | with gr.Column(): 103 | audio_input = gr.Audio(label="🔊音频输入 Audio Input") 104 | gr.Examples(['https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/%E9%B2%81%E8%82%83%E9%87%87%E8%AE%BF%E7%89%87%E6%AE%B51.wav'], [audio_input]) 105 | recog_button1 = gr.Button("👂识别 Recognize") 106 | audio_text_output = gr.Textbox(label="✏️识别结果 Recognition Result") 107 | audio_srt_output = gr.Textbox(label="📖SRT字幕内容 RST Subtitles") 108 | with gr.Column(): 109 | audio_text_input = gr.Textbox(label="✏️待裁剪文本 Text to Clip (多段文本使用'#'连接)") 110 | with gr.Row(): 111 | audio_start_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, label="⏪开始位置偏移 Start Offset (ms)") 112 | audio_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, label="⏩结束位置偏移 End Offset (ms)") 113 | with gr.Row(): 114 | clip_button1 = gr.Button("✂️裁剪 Clip") 115 | audio_output = gr.Audio(label="🔊裁剪结果 Audio Clipped") 116 | audio_mess_output = gr.Textbox(label="ℹ️裁剪信息 Clipping Log") 117 | audio_srt_clip_output = gr.Textbox(label="📖裁剪部分SRT字幕内容 Clipped RST Subtitles") 118 | 119 | recog_button1.click(audio_recog, 120 | inputs=audio_input, 121 | outputs=[audio_text_output, audio_srt_output, audio_state]) 122 | clip_button1.click(audio_clip, 123 | inputs=[audio_text_input, audio_start_ost, audio_end_ost, audio_state], 124 | outputs=[audio_output, audio_mess_output, audio_srt_clip_output]) 125 | 126 | recog_button2.click(video_recog, 127 | inputs=video_input, 128 | outputs=[video_text_output, video_srt_output, video_state]) 129 | clip_button2.click(video_clip, 130 | inputs=[video_text_input, video_start_ost, video_end_ost, video_state], 131 | outputs=[video_output, video_mess_output, video_srt_clip_output]) 132 | clip_button3.click(video_clip_addsub, 133 | inputs=[video_text_input, video_start_ost, video_end_ost, video_state, font_size, font_color], 134 | outputs=[video_output, video_mess_output, video_srt_clip_output]) 135 | 136 | # start gradio service in local 137 | demo.queue(concurrency_count=3).launch() 138 | -------------------------------------------------------------------------------- /ClipVideo/clipvideo/imagemagick_test.py: -------------------------------------------------------------------------------- 1 | from moviepy.editor import * 2 | from moviepy.video.tools.subtitles import SubtitlesClip 3 | 4 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=48, color='white') 5 | subs = [((0, 2), 'sub1中文字幕'), 6 | ((2, 4), 'subs2'), 7 | ((4, 6), 'subs3'), 8 | ((6, 8), 'subs4')] 9 | 10 | subtitles = SubtitlesClip(subs, generator) 11 | 12 | video = VideoFileClip("examples/2022云栖大会_片段.mp4.mp4") 13 | video = video.subclip(0, 8) 14 | video = CompositeVideoClip([video, subtitles.set_pos(('center','bottom'))]) 15 | 16 | video.write_videofile("test_output.mp4") -------------------------------------------------------------------------------- /ClipVideo/clipvideo/subtitle_utils.py: -------------------------------------------------------------------------------- 1 | def time_convert(ms): 2 | ms = int(ms) 3 | tail = ms % 1000 4 | s = ms // 1000 5 | mi = s // 60 6 | s = s % 60 7 | h = mi // 60 8 | mi = mi % 60 9 | h = "00" if h == 0 else str(h) 10 | mi = "00" if mi == 0 else str(mi) 11 | s = "00" if s == 0 else str(s) 12 | tail = str(tail) 13 | if len(h) == 1: h = '0' + h 14 | if len(mi) == 1: mi = '0' + mi 15 | if len(s) == 1: s = '0' + s 16 | return "{}:{}:{},{}".format(h, mi, s, tail) 17 | 18 | 19 | class Text2SRT(): 20 | def __init__(self, text_seg, ts_list, offset=0): 21 | self.token_list = [i for i in text_seg.split() if len(i)] 22 | self.ts_list = ts_list 23 | start, end = ts_list[0][0] - offset, ts_list[-1][1] - offset 24 | self.start_sec, self.end_sec = start, end 25 | self.start_time = time_convert(start) 26 | self.end_time = time_convert(end) 27 | def text(self): 28 | res = "" 29 | for word in self.token_list: 30 | if '\u4e00' <= word <= '\u9fff': 31 | res += word 32 | else: 33 | res += " " + word 34 | return res 35 | def len(self): 36 | return len(self.token_list) 37 | def srt(self, acc_ost=0.0): 38 | return "{} --> {}\n{}\n".format( 39 | time_convert(self.start_sec+acc_ost*1000), 40 | time_convert(self.end_sec+acc_ost*1000), 41 | self.text()) 42 | def time(self, acc_ost=0.0): 43 | return (self.start_sec/1000+acc_ost, self.end_sec/1000+acc_ost) 44 | 45 | 46 | def generate_srt(sentence_list): 47 | srt_total = '' 48 | for i, d in enumerate(sentence_list): 49 | t2s = Text2SRT(d['text_seg'], d['ts_list']) 50 | srt_total += "{}\n{}".format(i, t2s.srt()) 51 | return srt_total 52 | 53 | def generate_srt_clip(sentence_list, start, end, begin_index=0, time_acc_ost=0.0): 54 | start, end = int(start * 1000), int(end * 1000) 55 | srt_total = '' 56 | cc = 1 + begin_index 57 | subs = [] 58 | for i, d in enumerate(sentence_list): 59 | if d['ts_list'][-1][1] <= start: 60 | continue 61 | if d['ts_list'][0][0] >= end: 62 | break 63 | # parts in between 64 | if (d['ts_list'][-1][1] <= end and d['ts_list'][0][0] > start) or (d['ts_list'][-1][1] == end and d['ts_list'][0][0] == start): 65 | t2s = Text2SRT(d['text_seg'], d['ts_list'], offset=start) 66 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost)) 67 | subs.append((t2s.time(time_acc_ost), t2s.text())) 68 | cc += 1 69 | continue 70 | if d['ts_list'][0][0] <= start: 71 | if not d['ts_list'][-1][1] > end: 72 | for j, ts in enumerate(d['ts_list']): 73 | if ts[1] > start: 74 | break 75 | _text = " ".join(d['text_seg'].split()[j:]) 76 | _ts = d['ts_list'][j:] 77 | else: 78 | for j, ts in enumerate(d['ts_list']): 79 | if ts[1] > start: 80 | _start = j 81 | break 82 | for j, ts in enumerate(d['ts_list']): 83 | if ts[1] > end: 84 | _end = j 85 | break 86 | _text = " ".join(d['text_seg'].split()[_start:_end]) 87 | _ts = d['ts_list'][_start:_end] 88 | if len(ts): 89 | t2s = Text2SRT(_text, _ts, offset=start) 90 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost)) 91 | subs.append((t2s.time(time_acc_ost), t2s.text())) 92 | cc += 1 93 | continue 94 | if d['ts_list'][-1][1] > end: 95 | for j, ts in enumerate(d['ts_list']): 96 | if ts[1] > end: 97 | break 98 | _text = " ".join(d['text_seg'].split()[:j]) 99 | _ts = d['ts_list'][:j] 100 | if len(_ts): 101 | t2s = Text2SRT(_text, _ts, offset=start) 102 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost)) 103 | subs.append( 104 | (t2s.time(time_acc_ost), t2s.text()) 105 | ) 106 | cc += 1 107 | continue 108 | return srt_total, subs, cc 109 | -------------------------------------------------------------------------------- /ClipVideo/clipvideo/test.sh: -------------------------------------------------------------------------------- 1 | # step1: Recognize 2 | python videoclipper.py --stage 1 \ 3 | --file ../examples/2022云栖大会_片段.mp4 \ 4 | --output_dir ./output 5 | # now you can find recognition results and entire SRT file in ./output/ 6 | # step2: Clip 7 | python videoclipper.py --stage 2 \ 8 | --file ../examples/2022云栖大会_片段.mp4 \ 9 | --output_dir ./output \ 10 | --dest_text '所以这个是我们办这个奖的初心啊，我们也会一届一届的办下去' \ 11 | --start_ost 0 \ 12 | --end_ost 100 \ 13 | --output_file './output/res.mp4' -------------------------------------------------------------------------------- /ClipVideo/clipvideo/trans_utils.py: -------------------------------------------------------------------------------- 1 | PUNC_LIST = ['，', '。', '！', '？', '、'] 2 | 3 | 4 | def pre_proc(text): 5 | res = '' 6 | for i in range(len(text)): 7 | if text[i] in PUNC_LIST: 8 | continue 9 | if '\u4e00' <= text[i] <= '\u9fff': 10 | if len(res) and res[-1] != " ": 11 | res += ' ' + text[i]+' ' 12 | else: 13 | res += text[i]+' ' 14 | else: 15 | res += text[i] 16 | if res[-1] == ' ': 17 | res = res[:-1] 18 | return res 19 | 20 | def proc(raw_text, timestamp, dest_text): 21 | # simple matching 22 | ld = len(dest_text.split()) 23 | mi, ts = [], [] 24 | offset = 0 25 | while True: 26 | fi = raw_text.find(dest_text, offset, len(raw_text)) 27 | # import pdb; pdb.set_trace() 28 | ti = raw_text[:fi].count(' ') 29 | if fi == -1: 30 | break 31 | offset = fi + ld 32 | mi.append(fi) 33 | ts.append([timestamp[ti][0]*16, timestamp[ti+ld-1][1]*16]) 34 | # import pdb; pdb.set_trace() 35 | return ts 36 | 37 | 38 | def write_state(output_dir, state): 39 | for key in ['/recog_res_raw', '/timestamp', '/sentences']: 40 | with open(output_dir+key, 'w', encoding="UTF-8") as fout: 41 | fout.write(str(state[key[1:]])) 42 | 43 | 44 | def load_state(output_dir): 45 | state = {} 46 | with open(output_dir+'/recog_res_raw', encoding='UTF-8') as fin: 47 | line = fin.read() 48 | state['recog_res_raw'] = line 49 | with open(output_dir+'/timestamp', encoding='UTF-8') as fin: 50 | line = fin.read() 51 | state['timestamp'] = eval(line) 52 | with open(output_dir+'/sentences', encoding='UTF-8') as fin: 53 | line = fin.read() 54 | state['sentences'] = eval(line) 55 | return state 56 | 57 | -------------------------------------------------------------------------------- /ClipVideo/clipvideo/videoclipper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import copy 4 | import librosa 5 | import logging 6 | import argparse 7 | import numpy as np 8 | import soundfile as sf 9 | import moviepy.editor as mpy 10 | from subtitle_utils import generate_srt, generate_srt_clip 11 | from trans_utils import pre_proc, proc, write_state, load_state 12 | from argparse_tools import ArgumentParser, get_commandline_args 13 | 14 | from moviepy.editor import * 15 | from moviepy.video.tools.subtitles import SubtitlesClip 16 | 17 | 18 | class VideoClipper(): 19 | def __init__(self, asr_pipeline): 20 | logging.warning("Initializing VideoClipper.") 21 | self.asr_pipeline = asr_pipeline 22 | 23 | def recog(self, audio_input, state=None): 24 | if state is None: 25 | state = {} 26 | sr, data = audio_input 27 | assert sr == 16000, "16kHz sample rate required, {} given.".format(sr) 28 | if len(data.shape) == 2: # multi-channel wav input 29 | # logging.warning("Input wav shape: {}, only first channel reserved.").format(data.shape) 30 | data = data[:,0] 31 | state['audio_input'] = (sr, data) 32 | data = data.astype(np.float64) 33 | rec_result = self.asr_pipeline(audio_in=data) 34 | state['recog_res_raw'] = rec_result['text_postprocessed'] 35 | state['timestamp'] = rec_result['time_stamp'] 36 | state['sentences'] = rec_result['sentences'] 37 | res_text = rec_result['text'] 38 | res_srt = generate_srt(rec_result['sentences']) 39 | return res_text, res_srt, state 40 | 41 | def clip(self, dest_text, start_ost, end_ost, state): 42 | # get from state 43 | audio_input = state['audio_input'] 44 | recog_res_raw = state['recog_res_raw'] 45 | timestamp = state['timestamp'] 46 | sentences = state['sentences'] 47 | sr, data = audio_input 48 | data = data.astype(np.float64) 49 | 50 | all_ts = [] 51 | for _dest_text in dest_text.split('#'): 52 | _dest_text = pre_proc(_dest_text) 53 | ts = proc(recog_res_raw, timestamp, _dest_text) 54 | for _ts in ts: all_ts.append(_ts) 55 | ts = all_ts 56 | srt_index = 0 57 | clip_srt = "" 58 | if len(ts): 59 | start, end = ts[0] 60 | start = min(max(0, start+start_ost*16), len(data)) 61 | end = min(max(0, end+end_ost*16), len(data)) 62 | res_audio = data[start:end] 63 | start_end_info = "from {} to {}".format(start/16000, end/16000) 64 | srt_clip, _, srt_index = generate_srt_clip(sentences, start/16000.0, end/16000.0, begin_index=srt_index) 65 | clip_srt += srt_clip 66 | for _ts in ts[1:]: # multiple sentence input or multiple output matched 67 | start, end = _ts 68 | start = min(max(0, start+start_ost*16), len(data)) 69 | end = min(max(0, end+end_ost*16), len(data)) 70 | start_end_info += ", from {} to {}".format(start, end) 71 | res_audio = np.concatenate([res_audio, data[start+start_ost*16:end+end_ost*16]], -1) 72 | srt_clip, _, srt_index = generate_srt_clip(sentences, start/16000.0, end/16000.0, begin_index=srt_index-1) 73 | clip_srt += srt_clip 74 | if len(ts): 75 | message = "{} periods found in the speech: ".format(len(ts)) + start_end_info 76 | else: 77 | message = "No period found in the speech, return raw speech. You may check the recognition result and try other destination text." 78 | res_audio = data 79 | return (sr, res_audio), message, clip_srt 80 | 81 | def video_recog(self, vedio_filename): 82 | vedio_filename = vedio_filename 83 | clip_video_file = vedio_filename[:-4] + '_clip.mp4' 84 | video = mpy.VideoFileClip(vedio_filename) 85 | audio_file = vedio_filename[:-3] + 'wav' 86 | video.audio.write_audiofile(audio_file) 87 | wav = librosa.load(audio_file, sr=16000)[0] 88 | state = { 89 | 'vedio_filename': vedio_filename, 90 | 'clip_video_file': clip_video_file, 91 | 'video': video, 92 | } 93 | # res_text, res_srt = self.recog((16000, wav), state) 94 | os.remove(audio_file) 95 | return self.recog((16000, wav), state) 96 | 97 | def video_clip(self, dest_text, start_ost, end_ost, state, font_size=32, font_color='white', add_sub=False): 98 | # get from state 99 | recog_res_raw = state['recog_res_raw'] 100 | timestamp = state['timestamp'] 101 | sentences = state['sentences'] 102 | video = state['video'] 103 | clip_video_file = state['clip_video_file'] 104 | vedio_filename = state['vedio_filename'] 105 | 106 | all_ts = [] 107 | srt_index = 0 108 | time_acc_ost = 0.0 109 | for _dest_text in dest_text.split('#'): 110 | _dest_text = pre_proc(_dest_text) 111 | ts = proc(recog_res_raw, timestamp, _dest_text) 112 | for _ts in ts: all_ts.append(_ts) 113 | ts = all_ts 114 | clip_srt = "" 115 | if len(ts): 116 | start, end = ts[0][0] / 16000, ts[0][1] / 16000 117 | srt_clip, subs, srt_index = generate_srt_clip(sentences, start, end, begin_index=srt_index, time_acc_ost=time_acc_ost) 118 | start, end = start+start_ost/1000.0, end+end_ost/1000.0 119 | video_clip = video.subclip(start, end) 120 | start_end_info = "from {} to {}".format(start, end) 121 | clip_srt += srt_clip 122 | if add_sub: 123 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=font_size, color=font_color) 124 | subtitles = SubtitlesClip(subs, generator) 125 | video_clip = CompositeVideoClip([video_clip, subtitles.set_pos(('center','bottom'))]) 126 | concate_clip = [video_clip] 127 | time_acc_ost += end+end_ost/1000.0 - (start+start_ost/1000.0) 128 | for _ts in ts[1:]: 129 | start, end = _ts[0] / 16000, _ts[1] / 16000 130 | srt_clip, subs, srt_index = generate_srt_clip(sentences, start, end, begin_index=srt_index-1, time_acc_ost=time_acc_ost) 131 | start, end = start+start_ost/1000.0, end+end_ost/1000.0 132 | _video_clip = video.subclip(start, end) 133 | start_end_info += ", from {} to {}".format(start, end) 134 | clip_srt += srt_clip 135 | if add_sub: 136 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=font_size, color=font_color) 137 | subtitles = SubtitlesClip(subs, generator) 138 | _video_clip = CompositeVideoClip([_video_clip, subtitles.set_pos(('center','bottom'))]) 139 | concate_clip.append(copy.copy(_video_clip)) 140 | time_acc_ost += end+end_ost/1000.0 - (start+start_ost/1000.0) 141 | message = "{} periods found in the audio: ".format(len(ts)) + start_end_info 142 | logging.warning("Concating...") 143 | if len(concate_clip) > 1: 144 | video_clip = concatenate_videoclips(concate_clip) 145 | video_clip.write_videofile(clip_video_file, audio_codec="aac") 146 | else: 147 | clip_video_file = vedio_filename 148 | message = "No period found in the audio, return raw speech. You may check the recognition result and try other destination text." 149 | srt_clip = '' 150 | return clip_video_file, message, clip_srt 151 | 152 | 153 | def get_parser(): 154 | parser = ArgumentParser( 155 | description="ClipVideo Argument", 156 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 157 | ) 158 | parser.add_argument( 159 | "--stage", 160 | type=int, 161 | choices=(1, 2), 162 | help="Stage, 0 for recognizing and 1 for clipping", 163 | required=True 164 | ) 165 | parser.add_argument( 166 | "--file", 167 | type=str, 168 | default=None, 169 | help="Input file path", 170 | required=True 171 | ) 172 | parser.add_argument( 173 | "--output_dir", 174 | type=str, 175 | default='./output/mp4', 176 | help="Output files path", 177 | ) 178 | parser.add_argument( 179 | "--dest_text", 180 | type=str, 181 | default=None, 182 | help="Destination text string for clipping", 183 | ) 184 | parser.add_argument( 185 | "--start_ost", 186 | type=int, 187 | default=0, 188 | help="Offset time in ms at beginning for clipping" 189 | ) 190 | parser.add_argument( 191 | "--end_ost", 192 | type=int, 193 | default=0, 194 | help="Offset time in ms at ending for clipping" 195 | ) 196 | parser.add_argument( 197 | "--output_file", 198 | type=str, 199 | default=None, 200 | help="Output file path" 201 | ) 202 | return parser 203 | 204 | 205 | def runner(stage, file, output_dir, dest_text, start_ost, end_ost, output_file, config=None): 206 | audio_suffixs = ['wav'] 207 | video_suffixs = ['mp4'] 208 | if file[-3:] in audio_suffixs: 209 | mode = 'audio' 210 | elif file[-3:] in video_suffixs: 211 | mode = 'video' 212 | else: 213 | logging.error("Unsupported file format: {}".format(file)) 214 | while output_dir.endswith('/'): 215 | output_dir = output_dir[:-1] 216 | if stage == 1: 217 | from modelscope.pipelines import pipeline 218 | from modelscope.utils.constant import Tasks 219 | # initialize modelscope asr pipeline 220 | logging.warning("Initializing modelscope asr pipeline.") 221 | inference_pipeline = pipeline( 222 | task=Tasks.auto_speech_recognition, 223 | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 224 | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch', 225 | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', 226 | output_dir=output_dir, 227 | ) 228 | audio_clipper = VideoClipper(inference_pipeline) 229 | if mode == 'audio': 230 | logging.warning("Recognizing audio file: {}".format(file)) 231 | wav, sr = librosa.load(file, sr=16000) 232 | res_text, res_srt, state = audio_clipper.recog((sr, wav)) 233 | if mode == 'video': 234 | logging.warning("Recognizing video file: {}".format(file)) 235 | res_text, res_srt, state = audio_clipper.video_recog(file) 236 | total_srt_file = output_dir + '/total.srt' 237 | with open(total_srt_file, 'w') as fout: 238 | fout.write(res_srt) 239 | logging.warning("Write total subtitile to {}".format(total_srt_file)) 240 | write_state(output_dir, state) 241 | logging.warning("Recognition successed. You can copy the text segment from below and use stage 2.") 242 | print(res_text) 243 | if stage == 2: 244 | audio_clipper = VideoClipper(None) 245 | if mode == 'audio': 246 | state = load_state(output_dir) 247 | wav, sr = librosa.load(file, sr=16000) 248 | state['audio_input'] = (sr, wav) 249 | (sr, audio), message, srt_clip = audio_clipper.clip(dest_text, start_ost, end_ost, state) 250 | if output_file is None: 251 | output_file = output_dir + '/result.wav' 252 | clip_srt_file = output_file[:-3] + 'srt' 253 | logging.warning(message) 254 | sf.write(output_file, audio, 16000) 255 | assert output_file.endswith('.wav'), "output_file must ends with '.wav'" 256 | logging.warning("Save clipped wav file to {}".format(output_file)) 257 | with open(clip_srt_file, 'w') as fout: 258 | fout.write(srt_clip) 259 | logging.warning("Write clipped subtitile to {}".format(clip_srt_file)) 260 | if mode == 'video': 261 | state = load_state(output_dir) 262 | state['vedio_filename'] = file 263 | if output_file is None: 264 | state['clip_video_file'] = file[:-4] + '_clip.mp4' 265 | else: 266 | state['clip_video_file'] = output_file 267 | clip_srt_file = state['clip_video_file'][:-3] + 'srt' 268 | state['video'] = mpy.VideoFileClip(file) 269 | clip_video_file, message, srt_clip = audio_clipper.video_clip(dest_text, start_ost, end_ost, state) 270 | logging.warning("Clipping Log: {}".format(message)) 271 | logging.warning("Save clipped mp4 file to {}".format(clip_video_file)) 272 | with open(clip_srt_file, 'w') as fout: 273 | fout.write(srt_clip) 274 | logging.warning("Write clipped subtitile to {}".format(clip_srt_file)) 275 | 276 | 277 | def main(cmd=None): 278 | print(get_commandline_args(), file=sys.stderr) 279 | parser = get_parser() 280 | args = parser.parse_args(cmd) 281 | kwargs = vars(args) 282 | runner(**kwargs) 283 | 284 | 285 | if __name__ == '__main__': 286 | main() -------------------------------------------------------------------------------- /ClipVideo/docs/images/show.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/docs/images/show.png -------------------------------------------------------------------------------- /ClipVideo/requirments.txt: -------------------------------------------------------------------------------- 1 | librosa 2 | soundfile 3 | funasr>=0.5.5 4 | moviepy 5 | numpy 6 | gradio -------------------------------------------------------------------------------- /FunASR-APP-LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Alibaba 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /FunASR-APP-README.md: -------------------------------------------------------------------------------- 1 | # FunASR-APP 2 | 3 | FunASR-APP is a comprehensive speech application toolkit designed to facilitate the application and integration of [FunASR](https://github.com/alibaba-damo-academy/FunASR)'s open-source speech models. Its primary goal is to package the models into convenient application packages, enabling easy application and seamless integration. 4 | 5 | ## ClipVideo 6 | 7 | As the first application toolkit of FunASR-APP, ClipVideo enables users to clip ```.mp4``` video files or ```.wav``` audio files with chosen text segments out of the recognition results generated by [Paraformer-long model](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary). 8 | 9 | Under the help of ClipVideo you can get the video clips easily with the following steps (in Gradio service): 10 | - Step1: Upload your video file (or try the example videos below) 11 | - Step2: Copy the text segments you need to 'Text to Clip' 12 | - Step3: Adjust subtitle settings (if needed) 13 | - Step4: Click 'Clip' or 'Clip and Generate Subtitles' 14 | 15 | ### Usage 16 | ```shell 17 | git clone https://github.com/alibaba-damo-academy/FunASR-APP.git 18 | cd FunASR-APP 19 | # install modelscope 20 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html 21 | # python environments 22 | pip install -r ClipVideo/requirments.txt 23 | ``` 24 | (Optional) If you want to clip video file with embedded subtitles 25 | 26 | 1. ffmpeg and imagemagick is required 27 | 28 | - On Ubuntu 29 | ```shell 30 | apt-get -y update && apt-get -y install ffmpeg imagemagick 31 | sed -i 's/none/read,write/g' /etc/ImageMagick-6/policy.xml 32 | ``` 33 | - On MacOS 34 | ```shell 35 | brew install imagemagick 36 | sed -i 's/none/read,write/g' /usr/local/Cellar/imagemagick/7.1.1-8_1/etc/ImageMagick-7/policy.xml 37 | ``` 38 | 2. Download font file to ClipVideo/font 39 | 40 | ```shell 41 | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/STHeitiMedium.ttc -O ClipVideo/font/STHeitiMedium.ttc 42 | ``` 43 | 44 | #### Experience ClipVideo in Modelscope 45 | You can try ClipVideo in modelscope space: [link](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary). 46 | 47 | #### Use ClipVideo as Gradio Service 48 | You can establish your own ClipVideo service which is same as [Modelscope Space](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary) as follow: 49 | ```shell 50 | python ClipVideo/clipvideo/gradio_service.py 51 | ``` 52 | then visit ```localhost:7860``` you will get a Gradio service like below and you can use ClipVideo following the steps: 53 |

54 | 55 | #### Use ClipVideo in command line 56 | ClipVideo supports you to recognize and clip with commands: 57 | ```shell 58 | # working in ClipVideo/ 59 | # step1: Recognize 60 | python clipvideo/videoclipper.py --stage 1 \ 61 | --file examples/2022云栖大会_片段.mp4 \ 62 | --output_dir ./output 63 | # now you can find recognition results and entire SRT file in ./output/ 64 | # step2: Clip 65 | python clipvideo/videoclipper.py --stage 2 \ 66 | --file examples/2022云栖大会_片段.mp4 \ 67 | --output_dir ./output \ 68 | --dest_text '我们把它跟乡村振兴去结合起来，利用我们的设计的能力' \ 69 | --start_ost 0 \ 70 | --end_ost 100 \ 71 | --output_file './output/res.mp4' 72 | ``` 73 | 74 | ### Study Speech Related Models in FunASR 75 | 76 | [FunASR](https://github.com/alibaba-damo-academy/FunASR) hopes to build a bridge between academic research and industrial applications on speech recognition. By supporting the training & finetuning of the industrial-grade speech recognition model released on ModelScope, researchers and developers can conduct research and production of speech recognition models more conveniently, and promote the development of speech recognition ecology. ASR for Fun！ 77 | 78 | 📚FunASR Paper:

79 | 🌟Support FunASR:

-------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Fatfish588 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dataset_Generator_For_VITS 2 | 基于达摩院视频切割技术的视频转换为短音频的vits数据集生成工具 3 | A VITS Dataset Generation Tool for Converting Video to Short Audio Based on Damo Academy Video Cutting Technology 4 | 5 | # 介绍 6 | >在现在的vits语音模型训练的步骤中，标注、清洗和训练框架都有不少很棒的项目可供学习和使用，但是在数据集的准备这一项好像还没有多少人去探究。手动准备动辄几百成千上万条短音频的数据集，对于一些只想听听自己喜欢的主播、恋人等声音的vits普通爱好者来说是基本不现实的。此前(2023.10)唯一开源且能用的音视频切片机还是VITS-fast-fine-tuning快速微调中内嵌的一套基于whisper的切片机，因为耦合度较大，导致也只能在快速微调自己的项目中使用，而无法适配其他如bert-vits2等项目。 7 | 8 | >综上所诉，我尝试着基于阿里达摩院FunASR-APP的ClipVideo作简单修改，制作了这个小工具，希望能补上国内vits训练圈子的一块拼图，您只需要一个人物或者角色的音频与视频，不管多长无论多大，有一点背景音乐也无妨，就算音视频混在一起放入指定目录都没问题，然后点击运行，就可以获得一系列1～10秒的短音频，这些音频完全可以直接送入其他用于标注的项目进行下一步准备。 9 | 10 | 1、本项目基于阿里达摩院FunASR-APP的ClipVideo简单修改制作，其原理是使用ClipVideo通过文字去一个视频中裁剪出对应的那句话的音视频（美妙的技术），并且可以指定音频的开始和结尾偏移量，在中文方面比whisper效果更好，不会出现尾音最后一个音只有一半的情况。 11 | 2、本项目的输出都在根目录下ClipVideo/output/中，包含了切割后的音视频和每个视频对应的字幕文件，需要的话可以使用。 12 | 3、本项目比较简单，所以从输入视频到输出音频数据集是连续的，如果有参数和自己想要的不一样可以自行修改，例如要修改第一条中的偏移量，可在Dataset_generator.py的前半部分修改，默认是不偏移直接截取。 13 | 14 | 下图是效果展示，使用GPU加速的情况下2两分钟生成600句短音频. 15 | ![7月10日](https://github.com/Fatfish588/Dataset_Generator_For_VITS/assets/59791439/00f57562-798a-4368-921c-6a6886f65d13) 16 | 17 | # 更新日志 18 | 2023/10/12 19 | 1、现在可以把音频(.wav)和视频(.mp4)一起混合放进video_files中直接运行了！生成器会安排好所有事！ 20 | 2、增加了demucs人声分离，现在可以接受浅bgm的视频与音频进行处理，哦对，不可以用在唱中文的背景音乐视频，因为歌词也会被识别并切片。 21 | 3、增加了初始化输目录的功能，生成器会先把除了video_files以外的目录清空再运行，你只需要开始运行就好了，一切交给生成器！ 22 | 23 | # 教程 24 | 1、克隆此仓库(python版本3.10，3.8往后应该都行) 25 | 26 | ```bash 27 | git clone https://github.com/Fatfish588/Dataset_Denerator_For_VITS.git 28 | ``` 29 | 30 | 创建所需要的目录,windows直接新建文件夹就好了 31 | ```bash 32 | mkdir ClipVideo/font 33 | mkdir ClipVideo/clipvideo/video_files 34 | mkdir ClipVideo/clipvideo/output 35 | mkdir ClipVideo/clipvideo/input 36 | mkdir ClipVideo/clipvideo/input/mdx_extra 37 | mkdir ClipVideo/clipvideo/org_wav 38 | mkdir ClipVideo/clipvideo/output/mp4 39 | mkdir ClipVideo/clipvideo/output/srt 40 | mkdir ClipVideo/clipvideo/output/wav 41 | ``` 42 | 此时，这部分结构应该如下图，以下操作都处于Dataset_Generator_For_VITS根目录下 43 | ![image](https://github.com/Fatfish588/Dataset_Generator_For_VITS/assets/59791439/2c6ba932-4a9a-4c7f-902a-26cdfaa50c6b) 44 | 45 | 46 | 47 | 2、安装环境依赖 48 | ```bash 49 | # install modelscope 50 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html 51 | # python environments 52 | pip install -r ClipVideo/requirments.txt 53 | pip install torchaudio 54 | pip install demucs~=4.0.0 55 | pip install umap 56 | pip install hdbscan 57 | # 下载字体（给webUI的字幕镶嵌用的） 58 | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/STHeitiMedium.ttc -O ClipVideo/font/STHeitiMedium.ttc 59 | ``` 60 | 如果想用GPU加速（尤其是降噪）需要对应的torch和cuda，因为大家的显卡各不相同，这里只提供一个我的作为参考：RTX 4090 + cuda11.7 61 | ```bash 62 | pip install torch==2.0.1+cu117 torchaudio==2.0.2+cu117 torchvision==0.15.2+cu117 --extra-index-url https://download.pytorch.org/whl/cu117 63 | ``` 64 | 3、启动一次webUI，这一步是为了让FunASR-APP自动下载视频转文字的相关模型，此步骤下载模型比较耗时，进度条卡住不动是正常情况，稍等就好，模型来自阿里的服务器，可能需要关掉魔法。 65 | 66 | ```bash 67 | python ClipVideo/clipvideo/gradio_service.py 68 | ``` 69 | 成功打开webUI则说明FunASR-APP的依赖准备完成了 70 | 71 | 4、将要处理的音频或视频全部放入video_files目录下，支持多个视频音频混合处理，只要确保都是一个人的声音就好。 72 | ![image](https://github.com/Fatfish588/Dataset_Denerator_For_VITS/assets/59791439/a85784e4-b390-4c5c-b02d-c5cdf50d7e1c) 73 | 74 | 5、开始运行 75 | 76 | ```bash 77 | python ClipVideo/clipvideo/Dataset_generator.py 78 | ``` 79 | 6、运行完成后，结果保存在ClipVideo/clipvideo/output/wav目录下 80 | ![image](https://github.com/Fatfish588/Dataset_Generator_For_VITS/assets/59791439/ae24892e-7ab2-43ac-9485-46caf28b9df6) 81 | 82 | 83 | # 后续计划 84 | 1、添加降噪模型，将支持带背景音乐的视频输入。(已完成) 85 | 2、一键恢复初始化状态，免得每次都要手动清空。（已完成） 86 | 3、优化代码，目前是步骤太多太繁杂了。 87 | 4、从视频一键生成到音频数据集和标注训练集（大概率鸽了） 88 | # 碎碎念 89 | 1、本项目目前只支持中文的，背景音乐不大或者是纯音乐的音频与视频，例如有声书、教程类的视频、科普类视频、虚拟主播的聊天回（天呐她们能和弹幕聊整整6个小时）等等。 90 | 2、本项目只是将长视频生成几秒钟的短音频数据集，减少了手动切片的时间，并不带标注、重采样、生成训练集的功能。 91 | 3、本项目生成的文件名字是Paraformer模型听写出来的，只是用作区分文件而已，并不是百分百准确，不推荐直接拿文件名去当训练集。 92 | 4、请确保ClipVideo/clipvideo/目录下的video_files、output/mp4、output/srt、output/wav这4个目录存在，生成器现在会在每次点击运行时去清空它们，但是需要在第一次运行前先创建好它们，在这之后就不用管它们了。 93 | 5、代码超简单的，每个方法都有备注，有些功能不需要比如降噪部分可以自己修改。 94 | 6、关注永雏塔菲喵，关注永雏塔菲谢谢喵。 95 | # 相关链接： 96 | [Paraformer视频自动切片与字幕（创空间）———阿里达摩院](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary) 97 | [FunASR-APP（GitHub）](https://github.com/alibaba-damo-academy/FunASR-APP) 98 | -------------------------------------------------------------------------------- /TransAudio/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/TransAudio/README.md -------------------------------------------------------------------------------- /TransAudio/funasr-runtime-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | scriptVersion="0.0.3" 4 | scriptDate="20230629" 5 | 6 | clear 7 | 8 | 9 | # Set color 10 | RED="\033[31;1m" 11 | GREEN="\033[32;1m" 12 | YELLOW="\033[33;1m" 13 | BLUE="\033[34;1m" 14 | CYAN="\033[36;1m" 15 | PLAIN="\033[0m" 16 | 17 | # Info messages 18 | ERROR="${RED}[ERROR]${PLAIN}" 19 | WARNING="${YELLOW}[WARNING]${PLAIN}" 20 | 21 | # Font Format 22 | BOLD="\033[1m" 23 | UNDERLINE="\033[4m" 24 | 25 | # Current folder 26 | cur_dir=`pwd` 27 | 28 | 29 | checkConfigFileAndTouch(){ 30 | mkdir -p /var/funasr 31 | if [ ! -f $FUNASR_CONFIG_FILE ]; then 32 | touch $FUNASR_CONFIG_FILE 33 | fi 34 | } 35 | 36 | SAMPLE_CLIENTS=( \ 37 | "Python" \ 38 | "Linux_Cpp" \ 39 | ) 40 | ASR_MODELS=( \ 41 | "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx" \ 42 | "model_name" \ 43 | "model_path" \ 44 | ) 45 | VAD_MODELS=( \ 46 | "damo/speech_fsmn_vad_zh-cn-16k-common-onnx" \ 47 | "model_name" \ 48 | "model_path" \ 49 | ) 50 | PUNC_MODELS=( \ 51 | "damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx" \ 52 | "model_name" \ 53 | "model_path" \ 54 | ) 55 | DOCKER_IMAGES=( \ 56 | "registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest" \ 57 | "registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.1.0" \ 58 | ) 59 | menuSelection(){ 60 | local menu 61 | menu=($(echo "$@")) 62 | result=1 63 | show_no=1 64 | menu_no=0 65 | len=${#menu[@]} 66 | 67 | while true 68 | do 69 | echo -e " ${BOLD}${show_no})${PLAIN} ${menu[menu_no]}" 70 | 71 | let show_no++ 72 | let menu_no++ 73 | if [ ${menu_no} -ge ${len} ]; then 74 | break 75 | fi 76 | done 77 | 78 | while true 79 | do 80 | read -p " Enter your choice: " result 81 | 82 | expr ${result} + 0 &>/dev/null 83 | if [ $? -eq 0 ]; then 84 | if [ ${result} -ge 1 ] && [ ${result} -le ${len} ]; then 85 | break 86 | else 87 | echo -e " ${RED}Input error, please input correct number!${PLAIN}" 88 | fi 89 | else 90 | echo -e " ${RED}Input error, please input correct number!${PLAIN}" 91 | fi 92 | done 93 | 94 | return $result 95 | } 96 | 97 | DrawProgress(){ 98 | model=$1 99 | title=$2 100 | percent_str=$3 101 | speed=$4 102 | revision=$5 103 | latest_percent=$6 104 | 105 | progress=0 106 | if [ ! -z "$percent_str" ]; then 107 | progress=`expr $percent_str + 0` 108 | latest_percent=`expr $latest_percent + 0` 109 | if [ $progress -ne 0 ] && [ $progress -lt $latest_percent ]; then 110 | progress=$latest_percent 111 | fi 112 | fi 113 | 114 | LOADING_FLAG="Loading" 115 | if [ "$title" = "$LOADING_FLAG" ]; then 116 | progress=100 117 | fi 118 | 119 | i=0 120 | str="" 121 | let max=progress/2 122 | while [ $i -lt $max ] 123 | do 124 | let i++ 125 | str+='=' 126 | done 127 | let color=36 128 | let index=max*2 129 | if [ -z "$speed" ]; then 130 | printf "\r \e[0;$color;1m[%s][%-11s][%-50s][%d%%][%s]\e[0m" "$model" "$title" "$str" "$$index" "$revision" 131 | else 132 | printf "\r \e[0;$color;1m[%s][%-11s][%-50s][%3d%%][%8s][%s]\e[0m" "$model" "$title" "$str" "$index" "$speed" "$revision" 133 | fi 134 | printf "\n" 135 | 136 | return $progress 137 | } 138 | 139 | PROGRESS_TXT="/var/funasr/progress.txt" 140 | ASR_PERCENT_INT=0 141 | VAD_PERCENT_INT=0 142 | PUNC_PERCENT_INT=0 143 | ASR_TITLE="Downloading" 144 | ASR_PERCENT="0" 145 | ASR_SPEED="0KB/s" 146 | ASR_REVISION="" 147 | VAD_TITLE="Downloading" 148 | VAD_PERCENT="0" 149 | VAD_SPEED="0KB/s" 150 | VAD_REVISION="" 151 | PUNC_TITLE="Downloading" 152 | PUNC_PERCENT="0" 153 | PUNC_SPEED="0KB/s" 154 | PUNC_REVISION="" 155 | ServerProgress(){ 156 | status_flag="STATUS:" 157 | stage=0 158 | wait=0 159 | server_status="" 160 | 161 | while true 162 | do 163 | if [ -f "$PROGRESS_TXT" ]; then 164 | break 165 | else 166 | sleep 1 167 | let wait=wait+1 168 | if [ ${wait} -ge 10 ]; then 169 | break 170 | fi 171 | fi 172 | done 173 | 174 | if [ ! -f "$PROGRESS_TXT" ]; then 175 | echo -e " ${RED}The note of progress does not exist.(${PROGRESS_TXT}) ${PLAIN}" 176 | return 98 177 | fi 178 | 179 | stage=1 180 | while read line 181 | do 182 | if [ $stage -eq 1 ]; then 183 | result=$(echo $line | grep "STATUS:") 184 | if [[ "$result" != "" ]] 185 | then 186 | stage=2 187 | server_status=${line#*:} 188 | status=`expr $server_status + 0` 189 | if [ $status -eq 99 ]; then 190 | stage=99 191 | fi 192 | continue 193 | fi 194 | elif [ $stage -eq 2 ]; then 195 | result=$(echo $line | grep "ASR") 196 | if [[ "$result" != "" ]] 197 | then 198 | stage=3 199 | continue 200 | fi 201 | elif [ $stage -eq 3 ]; then 202 | result=$(echo $line | grep "VAD") 203 | if [[ "$result" != "" ]] 204 | then 205 | stage=4 206 | continue 207 | fi 208 | result=$(echo $line | grep "title:") 209 | if [[ "$result" != "" ]] 210 | then 211 | ASR_TITLE=${line#*:} 212 | continue 213 | fi 214 | result=$(echo $line | grep "percent:") 215 | if [[ "$result" != "" ]] 216 | then 217 | ASR_PERCENT=${line#*:} 218 | continue 219 | fi 220 | result=$(echo $line | grep "speed:") 221 | if [[ "$result" != "" ]] 222 | then 223 | ASR_SPEED=${line#*:} 224 | continue 225 | fi 226 | result=$(echo $line | grep "revision:") 227 | if [[ "$result" != "" ]] 228 | then 229 | ASR_REVISION=${line#*:} 230 | continue 231 | fi 232 | elif [ $stage -eq 4 ]; then 233 | result=$(echo $line | grep "PUNC") 234 | if [[ "$result" != "" ]] 235 | then 236 | stage=5 237 | continue 238 | fi 239 | result=$(echo $line | grep "title:") 240 | if [[ "$result" != "" ]] 241 | then 242 | VAD_TITLE=${line#*:} 243 | continue 244 | fi 245 | result=$(echo $line | grep "percent:") 246 | if [[ "$result" != "" ]] 247 | then 248 | VAD_PERCENT=${line#*:} 249 | continue 250 | fi 251 | result=$(echo $line | grep "speed:") 252 | if [[ "$result" != "" ]] 253 | then 254 | VAD_SPEED=${line#*:} 255 | continue 256 | fi 257 | result=$(echo $line | grep "revision:") 258 | if [[ "$result" != "" ]] 259 | then 260 | VAD_REVISION=${line#*:} 261 | continue 262 | fi 263 | elif [ $stage -eq 5 ]; then 264 | result=$(echo $line | grep "DONE") 265 | if [[ "$result" != "" ]] 266 | then 267 | # Done and break. 268 | stage=6 269 | break 270 | fi 271 | result=$(echo $line | grep "title:") 272 | if [[ "$result" != "" ]] 273 | then 274 | PUNC_TITLE=${line#*:} 275 | continue 276 | fi 277 | result=$(echo $line | grep "percent:") 278 | if [[ "$result" != "" ]] 279 | then 280 | PUNC_PERCENT=${line#*:} 281 | continue 282 | fi 283 | result=$(echo $line | grep "speed:") 284 | if [[ "$result" != "" ]] 285 | then 286 | PUNC_SPEED=${line#*:} 287 | continue 288 | fi 289 | result=$(echo $line | grep "revision:") 290 | if [[ "$result" != "" ]] 291 | then 292 | PUNC_REVISION=${line#*:} 293 | continue 294 | fi 295 | elif [ $stage -eq 99 ]; then 296 | echo -e " ${RED}ERROR: $line${PLAIN}" 297 | fi 298 | done < $PROGRESS_TXT 299 | 300 | if [ $stage -ne 99 ]; then 301 | DrawProgress "ASR " $ASR_TITLE $ASR_PERCENT $ASR_SPEED $ASR_REVISION $ASR_PERCENT_INT 302 | ASR_PERCENT_INT=$? 303 | DrawProgress "VAD " $VAD_TITLE $VAD_PERCENT $VAD_SPEED $VAD_REVISION $VAD_PERCENT_INT 304 | VAD_PERCENT_INT=$? 305 | DrawProgress "PUNC" $PUNC_TITLE $PUNC_PERCENT $PUNC_SPEED $PUNC_REVISION $PUNC_PERCENT_INT 306 | PUNC_PERCENT_INT=$? 307 | fi 308 | 309 | return $stage 310 | } 311 | 312 | # Make sure root user 313 | rootNess(){ 314 | echo -e "${UNDERLINE}${BOLD}[0/9]${PLAIN}" 315 | echo -e " ${YELLOW}Please check root access.${PLAIN}" 316 | echo 317 | 318 | echo -e " ${WARNING} MUST RUN AS ${RED}ROOT${PLAIN} USER!" 319 | if [[ $EUID -ne 0 ]]; then 320 | echo -e " ${ERROR} MUST RUN AS ${RED}ROOT${PLAIN} USER!" 321 | fi 322 | 323 | checkConfigFileAndTouch 324 | cd ${cur_dir} 325 | echo 326 | } 327 | 328 | selectDockerImages(){ 329 | echo -e "${UNDERLINE}${BOLD}[1/9]${PLAIN}" 330 | echo -e " ${YELLOW}Please choose the Docker image.${PLAIN}" 331 | 332 | menuSelection ${DOCKER_IMAGES[*]} 333 | result=$? 334 | index=`expr $result - 1` 335 | 336 | PARAMS_DOCKER_IMAGE=${DOCKER_IMAGES[${index}]} 337 | echo -e " ${UNDERLINE}You have chosen the Docker image:${PLAIN} ${GREEN}${PARAMS_DOCKER_IMAGE}${PLAIN}" 338 | 339 | checkDockerExist 340 | result=$? 341 | result=`expr $result + 0` 342 | if [ ${result} -eq 50 ]; then 343 | return 50 344 | fi 345 | 346 | echo 347 | } 348 | 349 | setupModelType(){ 350 | echo -e "${UNDERLINE}${BOLD}[2/9]${PLAIN}" 351 | echo -e " ${YELLOW}Please input [Y/n] to confirm whether to automatically download model_id in ModelScope or use a local model.${PLAIN}" 352 | echo -e " [y] With the model in ModelScope, the model will be automatically downloaded to Docker(${CYAN}/workspace/models${PLAIN})." 353 | echo -e " If you select both the local model and the model in ModelScope, select [y]." 354 | echo " [n] Use the models on the localhost, the directory where the model is located will be mapped to Docker." 355 | 356 | while true 357 | do 358 | read -p " Setting confirmation[Y/n]: " model_id_flag 359 | 360 | if [ -z "$model_id_flag" ]; then 361 | model_id_flag="y" 362 | fi 363 | YES="Y" 364 | yes="y" 365 | NO="N" 366 | no="n" 367 | if [ "$model_id_flag" = "$YES" ] || [ "$model_id_flag" = "$yes" ]; then 368 | # please set model_id later. 369 | PARAMS_DOWNLOAD_MODEL_DIR="/workspace/models" 370 | echo -e " ${UNDERLINE}You have chosen to use the model in ModelScope, please set the model ID in the next steps, and the model will be automatically downloaded in (${PARAMS_DOWNLOAD_MODEL_DIR}) during the run.${PLAIN}" 371 | 372 | params_local_models_dir=`sed '/^PARAMS_LOCAL_MODELS_DIR=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 373 | if [ -z "$params_local_models_dir" ]; then 374 | params_local_models_dir="${cur_dir}/models" 375 | mkdir -p ${params_local_models_dir} 376 | fi 377 | while true 378 | do 379 | echo 380 | echo -e " ${YELLOW}Please enter the local path to download models, the corresponding path in Docker is ${PARAMS_DOWNLOAD_MODEL_DIR}.${PLAIN}" 381 | read -p " Setting the local path to download models, default(${params_local_models_dir}): " PARAMS_LOCAL_MODELS_DIR 382 | if [ -z "$PARAMS_LOCAL_MODELS_DIR" ]; then 383 | if [ -z "$params_local_models_dir" ]; then 384 | echo -e " ${RED}The local path set is empty, please setup again.${PLAIN}" 385 | continue 386 | else 387 | PARAMS_LOCAL_MODELS_DIR=$params_local_models_dir 388 | fi 389 | fi 390 | if [ ! -d "$PARAMS_LOCAL_MODELS_DIR" ]; then 391 | echo -e " ${RED}The local model path(${PARAMS_LOCAL_MODELS_DIR}) set does not exist, please setup again.${PLAIN}" 392 | else 393 | echo -e " The local path(${GREEN}${PARAMS_LOCAL_MODELS_DIR}${PLAIN}) set will store models during the run." 394 | break 395 | fi 396 | done 397 | 398 | break 399 | elif [ "$model_id_flag" = "$NO" ] || [ "$model_id_flag" = "$no" ]; then 400 | # download_model_dir is empty, will use models in localhost. 401 | PARAMS_DOWNLOAD_MODEL_DIR="" 402 | PARAMS_LOCAL_MODELS_DIR="" 403 | echo -e " ${UNDERLINE}You have chosen to use models from the localhost, set the path to each model in the localhost in the next steps.${PLAIN}" 404 | echo 405 | break 406 | fi 407 | done 408 | 409 | echo 410 | } 411 | 412 | # Set asr model for FunASR server 413 | setupAsrModelId(){ 414 | echo -e " ${UNDERLINE}${BOLD}[2.1/9]${PLAIN}" 415 | 416 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then 417 | # download_model_dir is empty, will use models in localhost. 418 | params_local_asr_path=`sed '/^PARAMS_LOCAL_ASR_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 419 | if [ -z "$params_local_asr_path" ]; then 420 | PARAMS_LOCAL_ASR_PATH="" 421 | else 422 | PARAMS_LOCAL_ASR_PATH=${params_local_asr_path} 423 | fi 424 | 425 | echo -e " ${YELLOW}Please input ASR model path in local for FunASR server.${PLAIN}" 426 | echo -e " Default: ${CYAN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}" 427 | 428 | while true 429 | do 430 | read -p " Setting ASR model path in localhost: " PARAMS_LOCAL_ASR_PATH 431 | 432 | if [ -z "$PARAMS_LOCAL_ASR_PATH" ]; then 433 | PARAMS_LOCAL_ASR_PATH=${params_local_asr_path} 434 | fi 435 | if [ -z "$PARAMS_LOCAL_ASR_PATH" ]; then 436 | # use default asr model in Docker 437 | PARAMS_LOCAL_ASR_DIR="" 438 | PARAMS_DOCKER_ASR_DIR="" 439 | PARAMS_DOCKER_ASR_PATH="/workspace/models/asr" 440 | echo -e " ${RED}Donnot set the local ASR model path, will use ASR model(${CYAN}/workspace/models/asr${PLAIN}${RED}) in Docker.${PLAIN}" 441 | 442 | echo -e " ${UNDERLINE}You have chosen the default model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}" 443 | echo -e " ${UNDERLINE}The defalut model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}" 444 | break 445 | else 446 | if [ ! -d "$PARAMS_LOCAL_ASR_PATH" ]; then 447 | echo -e " ${RED}The ASR model path set does not exist, please setup again.${PLAIN}" 448 | else 449 | # use asr model in localhost 450 | PARAMS_LOCAL_ASR_DIR=$(dirname "$PARAMS_LOCAL_ASR_PATH") 451 | asr_name=$(basename "$PARAMS_LOCAL_ASR_PATH") 452 | PARAMS_DOCKER_ASR_DIR="/workspace/user_asr" 453 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${asr_name} 454 | 455 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}" 456 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}" 457 | break 458 | fi 459 | fi 460 | done 461 | 462 | PARAMS_ASR_ID="" 463 | else 464 | # please set model_id later. 465 | echo -e " ${YELLOW}Please select ASR model_id in ModelScope from the list below.${PLAIN}" 466 | 467 | menuSelection ${ASR_MODELS[*]} 468 | result=$? 469 | index=`expr $result - 1` 470 | PARAMS_ASR_ID=${ASR_MODELS[${index}]} 471 | 472 | OTHERS="model_name" 473 | LOCAL_MODEL="model_path" 474 | if [ "$PARAMS_ASR_ID" = "$OTHERS" ]; then 475 | params_asr_id=`sed '/^PARAMS_ASR_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 476 | if [ -z "$params_asr_id" ]; then 477 | PARAMS_ASR_ID="" 478 | else 479 | PARAMS_ASR_ID=${params_asr_id} 480 | fi 481 | 482 | echo -e " Default: ${CYAN}${PARAMS_ASR_ID}${PLAIN}" 483 | 484 | while true 485 | do 486 | read -p " Setting ASR model_id in ModelScope: " PARAMS_ASR_ID 487 | 488 | PARAMS_LOCAL_ASR_DIR="" 489 | PARAMS_LOCAL_ASR_PATH="" 490 | PARAMS_DOCKER_ASR_DIR="" 491 | if [ -z "$PARAMS_ASR_ID" ]; then 492 | echo -e " ${RED}The ASR model ID is empty, please setup again.${PLAIN}" 493 | else 494 | break 495 | fi 496 | done 497 | elif [ "$PARAMS_ASR_ID" = "$LOCAL_MODEL" ]; then 498 | PARAMS_ASR_ID="" 499 | echo -e " Please input ASR model path in local for FunASR server." 500 | 501 | while true 502 | do 503 | read -p " Setting ASR model path in localhost: " PARAMS_LOCAL_ASR_PATH 504 | if [ -z "$PARAMS_LOCAL_ASR_PATH" ]; then 505 | # use default asr model in Docker 506 | echo -e " ${RED}Please do not set an empty path in localhost.${PLAIN}" 507 | else 508 | if [ ! -d "$PARAMS_LOCAL_ASR_PATH" ]; then 509 | echo -e " ${RED}The ASR model path set does not exist, please setup again.${PLAIN}" 510 | else 511 | # use asr model in localhost 512 | PARAMS_LOCAL_ASR_DIR=$(dirname "$PARAMS_LOCAL_ASR_PATH") 513 | asr_name=$(basename "$PARAMS_LOCAL_ASR_PATH") 514 | PARAMS_DOCKER_ASR_DIR="/workspace/user_asr" 515 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${asr_name} 516 | 517 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}" 518 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}" 519 | echo 520 | return 0 521 | fi 522 | fi 523 | done 524 | fi 525 | 526 | PARAMS_DOCKER_ASR_DIR=$PARAMS_DOWNLOAD_MODEL_DIR 527 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${PARAMS_ASR_ID} 528 | 529 | echo -e " ${UNDERLINE}The model ID is${PLAIN} ${GREEN}${PARAMS_ASR_ID}${PLAIN}" 530 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}" 531 | fi 532 | 533 | echo 534 | } 535 | 536 | # Set vad model for FunASR server 537 | setupVadModelId(){ 538 | echo -e " ${UNDERLINE}${BOLD}[2.2/9]${PLAIN}" 539 | 540 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then 541 | # download_model_dir is empty, will use models in localhost. 542 | params_local_vad_path=`sed '/^PARAMS_LOCAL_VAD_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 543 | if [ -z "$params_local_vad_path" ]; then 544 | PARAMS_LOCAL_VAD_PATH="" 545 | else 546 | PARAMS_LOCAL_VAD_PATH=${params_local_vad_path} 547 | fi 548 | 549 | echo -e " ${YELLOW}Please input VAD model path in local for FunASR server.${PLAIN}" 550 | echo -e " Default: ${CYAN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}" 551 | 552 | while true 553 | do 554 | read -p " Setting VAD model path in localhost: " PARAMS_LOCAL_VAD_PATH 555 | 556 | if [ -z "$PARAMS_LOCAL_VAD_PATH" ]; then 557 | PARAMS_LOCAL_VAD_PATH=${params_local_vad_path} 558 | fi 559 | if [ -z "$PARAMS_LOCAL_VAD_PATH" ]; then 560 | # use default vad model in Docker 561 | PARAMS_LOCAL_VAD_DIR="" 562 | PARAMS_DOCKER_VAD_DIR="" 563 | PARAMS_DOCKER_VAD_PATH="/workspace/models/vad" 564 | echo -e " ${RED}Donnot set the local VAD model path, will use VAD model(${CYAN}/workspace/models/vad${PLAIN}${RED}) in Docker.${PLAIN}" 565 | 566 | echo -e " ${UNDERLINE}You have chosen the default model dir in localhost${PLAIN}: ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}" 567 | echo -e " ${UNDERLINE}The defalut model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}" 568 | break 569 | else 570 | if [ ! -d "$PARAMS_LOCAL_VAD_PATH" ]; then 571 | echo -e " ${RED}The VAD model path set does not exist, please setup again.${PLAIN}" 572 | else 573 | # use vad model in localhost 574 | PARAMS_LOCAL_VAD_DIR=$(dirname "$PARAMS_LOCAL_VAD_PATH") 575 | vad_name=$(basename "$PARAMS_LOCAL_VAD_PATH") 576 | PARAMS_DOCKER_VAD_DIR="/workspace/user_vad" 577 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${vad_name} 578 | 579 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}" 580 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}" 581 | break 582 | fi 583 | fi 584 | done 585 | 586 | PARAMS_VAD_ID="" 587 | else 588 | # please set model_id later. 589 | echo -e " ${YELLOW}Please select VAD model_id in ModelScope from the list below.${PLAIN}" 590 | 591 | menuSelection ${VAD_MODELS[*]} 592 | result=$? 593 | index=`expr $result - 1` 594 | PARAMS_VAD_ID=${VAD_MODELS[${index}]} 595 | 596 | OTHERS="model_name" 597 | LOCAL_MODEL="model_path" 598 | if [ "$PARAMS_VAD_ID" = "$OTHERS" ]; then 599 | params_vad_id=`sed '/^PARAMS_VAD_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 600 | if [ -z "$params_vad_id" ]; then 601 | PARAMS_VAD_ID="" 602 | else 603 | PARAMS_VAD_ID=${params_vad_id} 604 | fi 605 | 606 | echo -e " Default: ${CYAN}${PARAMS_VAD_ID}${PLAIN}" 607 | 608 | while true 609 | do 610 | read -p " Setting VAD model_id in ModelScope: " PARAMS_VAD_ID 611 | 612 | PARAMS_LOCAL_VAD_DIR="" 613 | PARAMS_LOCAL_VAD_PATH="" 614 | PARAMS_DOCKER_VAD_DIR="" 615 | if [ -z "$PARAMS_VAD_ID" ]; then 616 | echo -e " ${RED}The VAD model ID is empty, please setup again.${PLAIN}" 617 | else 618 | break 619 | fi 620 | done 621 | elif [ "$PARAMS_VAD_ID" = "$LOCAL_MODEL" ]; then 622 | PARAMS_VAD_ID="" 623 | echo -e " Please input VAD model path in local for FunASR server." 624 | 625 | while true 626 | do 627 | read -p " Setting VAD model path in localhost: " PARAMS_LOCAL_VAD_PATH 628 | if [ -z "$PARAMS_LOCAL_VAD_PATH" ]; then 629 | # use default vad model in Docker 630 | echo -e " ${RED}Please do not set an empty path in localhost.${PLAIN}" 631 | else 632 | if [ ! -d "$PARAMS_LOCAL_VAD_PATH" ]; then 633 | echo -e " ${RED}The VAD model path set does not exist, please setup again.${PLAIN}" 634 | else 635 | # use vad model in localhost 636 | PARAMS_LOCAL_VAD_DIR=$(dirname "$PARAMS_LOCAL_VAD_PATH") 637 | vad_name=$(basename "$PARAMS_LOCAL_VAD_PATH") 638 | PARAMS_DOCKER_VAD_DIR="/workspace/user_vad" 639 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${vad_name} 640 | 641 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}" 642 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}" 643 | echo 644 | return 0 645 | fi 646 | fi 647 | done 648 | fi 649 | 650 | PARAMS_DOCKER_VAD_DIR=$PARAMS_DOWNLOAD_MODEL_DIR 651 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${PARAMS_VAD_ID} 652 | 653 | echo -e " ${UNDERLINE}The model ID is${PLAIN} ${GREEN}${PARAMS_VAD_ID}${PLAIN}" 654 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}" 655 | fi 656 | 657 | echo 658 | } 659 | 660 | # Set punc model for FunASR server 661 | setupPuncModelId(){ 662 | echo -e " ${UNDERLINE}${BOLD}[2.3/9]${PLAIN}" 663 | 664 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then 665 | # download_model_dir is empty, will use models in localhost. 666 | params_local_punc_path=`sed '/^PARAMS_LOCAL_PUNC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 667 | if [ -z "$params_local_punc_path" ]; then 668 | PARAMS_LOCAL_PUNC_PATH="" 669 | else 670 | PARAMS_LOCAL_PUNC_PATH=${params_local_punc_path} 671 | fi 672 | 673 | echo -e " ${YELLOW}Please input PUNC model path in local for FunASR server.${PLAIN}" 674 | echo -e " Default: ${CYAN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}" 675 | 676 | while true 677 | do 678 | read -p " Setting PUNC model path in localhost: " PARAMS_LOCAL_PUNC_PATH 679 | 680 | if [ -z "$PARAMS_LOCAL_PUNC_PATH" ]; then 681 | PARAMS_LOCAL_PUNC_PATH=${params_local_punc_path} 682 | fi 683 | if [ -z "$PARAMS_LOCAL_PUNC_PATH" ]; then 684 | # use default punc model in Docker 685 | PARAMS_LOCAL_PUNC_DIR="" 686 | PARAMS_DOCKER_PUNC_DIR="" 687 | PARAMS_DOCKER_PUNC_PATH="/workspace/models/punc" 688 | echo -e " ${RED}Donnot set the local PUNC model path, will use PUNC model(${CYAN}/workspace/models/punc${PLAIN}${RED}) in Docker.${PLAIN}" 689 | 690 | echo -e " ${UNDERLINE}You have chosen the default model dir in localhost: ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}" 691 | echo -e " ${UNDERLINE}The defalut model dir in Docker is ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}" 692 | break 693 | else 694 | if [ ! -d "$PARAMS_LOCAL_PUNC_PATH" ]; then 695 | echo -e " ${RED}The PUNC model path set does not exist, please setup again.${PLAIN}" 696 | else 697 | # use punc model in localhost 698 | PARAMS_LOCAL_PUNC_DIR=$(dirname "$PARAMS_LOCAL_PUNC_PATH") 699 | punc_name=$(basename "$PARAMS_LOCAL_PUNC_PATH") 700 | PARAMS_DOCKER_PUNC_DIR="/workspace/user_punc" 701 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${punc_name} 702 | 703 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}" 704 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}" 705 | break 706 | fi 707 | fi 708 | done 709 | 710 | PARAMS_PUNC_ID="" 711 | else 712 | # please set model_id later. 713 | echo -e " ${YELLOW}Please select PUNC model_id in ModelScope from the list below.${PLAIN}" 714 | 715 | menuSelection ${PUNC_MODELS[*]} 716 | result=$? 717 | index=`expr $result - 1` 718 | PARAMS_PUNC_ID=${PUNC_MODELS[${index}]} 719 | 720 | OTHERS="model_name" 721 | LOCAL_MODEL="model_path" 722 | if [ "$PARAMS_PUNC_ID" = "$OTHERS" ]; then 723 | params_punc_id=`sed '/^PARAMS_PUNC_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 724 | if [ -z "$params_punc_id" ]; then 725 | PARAMS_PUNC_ID="" 726 | else 727 | PARAMS_PUNC_ID=${params_punc_id} 728 | fi 729 | 730 | echo -e " Default: ${CYAN}${PARAMS_PUNC_ID}${PLAIN}" 731 | 732 | while true 733 | do 734 | read -p " Setting PUNC model_id in ModelScope: " PARAMS_PUNC_ID 735 | 736 | PARAMS_LOCAL_PUNC_DIR="" 737 | PARAMS_LOCAL_PUNC_PATH="" 738 | PARAMS_DOCKER_PUNC_DIR="" 739 | if [ -z "$PARAMS_PUNC_ID" ]; then 740 | echo -e " ${RED}The PUNC model ID is empty, please setup again.${PLAIN}" 741 | else 742 | break 743 | fi 744 | done 745 | elif [ "$PARAMS_PUNC_ID" = "$LOCAL_MODEL" ]; then 746 | PARAMS_PUNC_ID="" 747 | echo -e " Please input PUNC model path in local for FunASR server." 748 | 749 | while true 750 | do 751 | read -p " Setting PUNC model path in localhost: " PARAMS_LOCAL_PUNC_PATH 752 | if [ -z "$PARAMS_LOCAL_PUNC_PATH" ]; then 753 | # use default punc model in Docker 754 | echo -e " ${RED}Please do not set an empty path in localhost.${PLAIN}" 755 | else 756 | if [ ! -d "$PARAMS_LOCAL_PUNC_PATH" ]; then 757 | echo -e " ${RED}The PUNC model path set does not exist, please setup again.${PLAIN}" 758 | else 759 | # use punc model in localhost 760 | PARAMS_LOCAL_PUNC_DIR=$(dirname "$PARAMS_LOCAL_PUNC_PATH") 761 | punc_name=$(basename "$PARAMS_LOCAL_PUNC_PATH") 762 | PARAMS_DOCKER_PUNC_DIR="/workspace/user_punc" 763 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${punc_name} 764 | 765 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}" 766 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}" 767 | echo 768 | return 0 769 | fi 770 | fi 771 | done 772 | fi 773 | 774 | PARAMS_DOCKER_PUNC_DIR=$PARAMS_DOWNLOAD_MODEL_DIR 775 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${PARAMS_PUNC_ID} 776 | 777 | echo -e " ${UNDERLINE}The model ID is${PLAIN} ${GREEN}${PARAMS_PUNC_ID}${PLAIN}" 778 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}" 779 | fi 780 | 781 | echo 782 | } 783 | 784 | # Set server exec for FunASR 785 | setupServerExec(){ 786 | echo -e "${UNDERLINE}${BOLD}[3/9]${PLAIN}" 787 | 788 | params_docker_exec_path=`sed '/^PARAMS_DOCKER_EXEC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 789 | if [ -z "$params_docker_exec_path" ]; then 790 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server" 791 | else 792 | PARAMS_DOCKER_EXEC_PATH=${params_docker_exec_path} 793 | fi 794 | 795 | echo -e " ${YELLOW}Please enter the path to the excutor of the FunASR service on the localhost.${PLAIN}" 796 | echo -e " If not set, the default ${CYAN}/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server${PLAIN} in Docker is used." 797 | read -p " Setting the path to the excutor of the FunASR service on the localhost: " PARAMS_LOCAL_EXEC_PATH 798 | 799 | if [ -z "$PARAMS_LOCAL_EXEC_PATH" ]; then 800 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server" 801 | else 802 | if [ ! -d "$PARAMS_LOCAL_EXEC_PATH" ]; then 803 | echo -e " ${RED}The FunASR server path set does not exist, will use default.${PLAIN}" 804 | PARAMS_LOCAL_EXEC_PATH="" 805 | PARAMS_LOCAL_EXEC_DIR="" 806 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server" 807 | PARAMS_DOCKER_EXEC_DIR="/workspace/FunASR/funasr/runtime/websocket/build/bin" 808 | else 809 | PARAMS_LOCAL_EXEC_DIR=$(dirname "$PARAMS_LOCAL_EXEC_PATH") 810 | exec=$(basename "$PARAMS_LOCAL_EXEC_PATH") 811 | PARAMS_DOCKER_EXEC_DIR="/server" 812 | PARAMS_DOCKER_EXEC_PATH=${PARAMS_DOCKER_EXEC_DIR}/${exec} 813 | echo -e " ${UNDERLINE}The path of FunASR in localhost is${PLAIN} ${GREEN}${PARAMS_LOCAL_EXEC_PATH}${PLAIN}" 814 | fi 815 | fi 816 | echo -e " ${UNDERLINE}Corresponding, the path of FunASR in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_EXEC_PATH}${PLAIN}" 817 | 818 | echo 819 | } 820 | 821 | # Configure FunASR server host port setting 822 | setupHostPort(){ 823 | echo -e "${UNDERLINE}${BOLD}[4/9]${PLAIN}" 824 | 825 | params_host_port=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 826 | if [ -z "$params_host_port" ]; then 827 | PARAMS_HOST_PORT="10095" 828 | else 829 | PARAMS_HOST_PORT=${params_host_port} 830 | fi 831 | 832 | while true 833 | do 834 | echo -e " ${YELLOW}Please input the opened port in the host used for FunASR server.${PLAIN}" 835 | echo -e " Default: ${CYAN}${PARAMS_HOST_PORT}${PLAIN}" 836 | read -p " Setting the opened host port [1-65535]: " PARAMS_HOST_PORT 837 | 838 | if [ -z "$PARAMS_HOST_PORT" ]; then 839 | params_host_port=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 840 | if [ -z "$params_host_port" ]; then 841 | PARAMS_HOST_PORT="10095" 842 | else 843 | PARAMS_HOST_PORT=${params_host_port} 844 | fi 845 | fi 846 | expr ${PARAMS_HOST_PORT} + 0 &>/dev/null 847 | if [ $? -eq 0 ]; then 848 | if [ ${PARAMS_HOST_PORT} -ge 1 ] && [ ${PARAMS_HOST_PORT} -le 65535 ]; then 849 | echo -e " ${UNDERLINE}The port of the host is${PLAIN} ${GREEN}${PARAMS_HOST_PORT}${PLAIN}" 850 | echo -e " ${UNDERLINE}The port in Docker for FunASR server is ${PLAIN}${GREEN}${PARAMS_DOCKER_PORT}${PLAIN}" 851 | break 852 | else 853 | echo -e " ${RED}Input error, please input correct number!${PLAIN}" 854 | fi 855 | else 856 | echo -e " ${RED}Input error, please input correct number!${PLAIN}" 857 | fi 858 | done 859 | echo 860 | } 861 | 862 | setupThreadNum(){ 863 | echo -e "${UNDERLINE}${BOLD}[5/9]${PLAIN}" 864 | 865 | params_decoder_thread_num=`sed '/^PARAMS_DECODER_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 866 | if [ -z "$params_decoder_thread_num" ]; then 867 | PARAMS_DECODER_THREAD_NUM=$CPUNUM 868 | else 869 | PARAMS_DECODER_THREAD_NUM=${params_decoder_thread_num} 870 | fi 871 | 872 | while true 873 | do 874 | echo -e " ${YELLOW}Please input thread number for FunASR decoder.${PLAIN}" 875 | echo -e " Default: ${CYAN}${PARAMS_DECODER_THREAD_NUM}${PLAIN}" 876 | read -p " Setting the number of decoder thread: " PARAMS_DECODER_THREAD_NUM 877 | 878 | if [ -z "$PARAMS_DECODER_THREAD_NUM" ]; then 879 | params_decoder_thread_num=`sed '/^PARAMS_DECODER_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 880 | if [ -z "$params_decoder_thread_num" ]; then 881 | PARAMS_DECODER_THREAD_NUM=$CPUNUM 882 | else 883 | PARAMS_DECODER_THREAD_NUM=${params_decoder_thread_num} 884 | fi 885 | fi 886 | expr ${PARAMS_DECODER_THREAD_NUM} + 0 &>/dev/null 887 | if [ $? -eq 0 ]; then 888 | if [ ${PARAMS_DECODER_THREAD_NUM} -ge 1 ] && [ ${PARAMS_DECODER_THREAD_NUM} -le 65535 ]; then 889 | break 890 | else 891 | echo -e " ${RED}Input error, please input correct number!${PLAIN}" 892 | fi 893 | else 894 | echo -e " ${RED}Input error, please input correct number!${PLAIN}" 895 | fi 896 | done 897 | echo 898 | 899 | multiple_io=4 900 | PARAMS_DECODER_THREAD_NUM=`expr $PARAMS_DECODER_THREAD_NUM + 0` 901 | PARAMS_IO_THREAD_NUM=`expr $PARAMS_DECODER_THREAD_NUM / $multiple_io` 902 | if [ $PARAMS_IO_THREAD_NUM -eq 0 ]; then 903 | PARAMS_IO_THREAD_NUM=1 904 | fi 905 | 906 | echo -e " ${UNDERLINE}The number of decoder threads is${PLAIN} ${GREEN}${PARAMS_DECODER_THREAD_NUM}${PLAIN}" 907 | echo -e " ${UNDERLINE}The number of IO threads is${PLAIN} ${GREEN}${PARAMS_IO_THREAD_NUM}${PLAIN}" 908 | echo 909 | } 910 | 911 | paramsFromDefault(){ 912 | echo -e "${UNDERLINE}${BOLD}[2-5/9]${PLAIN}" 913 | echo -e " ${YELLOW}Load parameters from ${FUNASR_CONFIG_FILE}${PLAIN}" 914 | echo 915 | 916 | PARAMS_DOCKER_IMAGE=`sed '/^PARAMS_DOCKER_IMAGE=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 917 | PARAMS_LOCAL_MODELS_DIR=`sed '/^PARAMS_LOCAL_MODELS_DIR=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 918 | PARAMS_DOWNLOAD_MODEL_DIR=`sed '/^PARAMS_DOWNLOAD_MODEL_DIR=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 919 | PARAMS_LOCAL_ASR_PATH=`sed '/^PARAMS_LOCAL_ASR_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 920 | PARAMS_DOCKER_ASR_PATH=`sed '/^PARAMS_DOCKER_ASR_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 921 | PARAMS_ASR_ID=`sed '/^PARAMS_ASR_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 922 | PARAMS_LOCAL_VAD_PATH=`sed '/^PARAMS_LOCAL_VAD_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 923 | PARAMS_DOCKER_VAD_PATH=`sed '/^PARAMS_DOCKER_VAD_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 924 | PARAMS_VAD_ID=`sed '/^PARAMS_VAD_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 925 | PARAMS_LOCAL_PUNC_PATH=`sed '/^PARAMS_LOCAL_PUNC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 926 | PARAMS_DOCKER_PUNC_PATH=`sed '/^PARAMS_DOCKER_PUNC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 927 | PARAMS_PUNC_ID=`sed '/^PARAMS_PUNC_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 928 | PARAMS_DOCKER_EXEC_PATH=`sed '/^PARAMS_DOCKER_EXEC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 929 | PARAMS_HOST_PORT=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 930 | PARAMS_DOCKER_PORT=`sed '/^PARAMS_DOCKER_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 931 | PARAMS_DECODER_THREAD_NUM=`sed '/^PARAMS_DECODER_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 932 | PARAMS_IO_THREAD_NUM=`sed '/^PARAMS_IO_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 933 | } 934 | 935 | saveParams(){ 936 | echo "$i" > $FUNASR_CONFIG_FILE 937 | echo -e " ${GREEN}Parameters are stored in the file ${FUNASR_CONFIG_FILE}${PLAIN}" 938 | 939 | echo "PARAMS_DOCKER_IMAGE=${PARAMS_DOCKER_IMAGE}" > $FUNASR_CONFIG_FILE 940 | echo "PARAMS_LOCAL_MODELS_DIR=${PARAMS_LOCAL_MODELS_DIR}" >> $FUNASR_CONFIG_FILE 941 | echo "PARAMS_DOWNLOAD_MODEL_DIR=${PARAMS_DOWNLOAD_MODEL_DIR}" >> $FUNASR_CONFIG_FILE 942 | 943 | echo "PARAMS_LOCAL_EXEC_PATH=${PARAMS_LOCAL_EXEC_PATH}" >> $FUNASR_CONFIG_FILE 944 | echo "PARAMS_LOCAL_EXEC_DIR=${PARAMS_LOCAL_EXEC_DIR}" >> $FUNASR_CONFIG_FILE 945 | echo "PARAMS_DOCKER_EXEC_PATH=${PARAMS_DOCKER_EXEC_PATH}" >> $FUNASR_CONFIG_FILE 946 | echo "PARAMS_DOCKER_EXEC_DIR=${PARAMS_DOCKER_EXEC_DIR}" >> $FUNASR_CONFIG_FILE 947 | 948 | echo "PARAMS_LOCAL_ASR_PATH=${PARAMS_LOCAL_ASR_PATH}" >> $FUNASR_CONFIG_FILE 949 | echo "PARAMS_LOCAL_ASR_DIR=${PARAMS_LOCAL_ASR_DIR}" >> $FUNASR_CONFIG_FILE 950 | echo "PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_PATH}" >> $FUNASR_CONFIG_FILE 951 | echo "PARAMS_DOCKER_ASR_DIR=${PARAMS_DOCKER_ASR_DIR}" >> $FUNASR_CONFIG_FILE 952 | echo "PARAMS_ASR_ID=${PARAMS_ASR_ID}" >> $FUNASR_CONFIG_FILE 953 | 954 | echo "PARAMS_LOCAL_PUNC_PATH=${PARAMS_LOCAL_PUNC_PATH}" >> $FUNASR_CONFIG_FILE 955 | echo "PARAMS_LOCAL_PUNC_DIR=${PARAMS_LOCAL_PUNC_DIR}" >> $FUNASR_CONFIG_FILE 956 | echo "PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_PATH}" >> $FUNASR_CONFIG_FILE 957 | echo "PARAMS_DOCKER_PUNC_DIR=${PARAMS_DOCKER_PUNC_DIR}" >> $FUNASR_CONFIG_FILE 958 | echo "PARAMS_PUNC_ID=${PARAMS_PUNC_ID}" >> $FUNASR_CONFIG_FILE 959 | 960 | echo "PARAMS_LOCAL_VAD_PATH=${PARAMS_LOCAL_VAD_PATH}" >> $FUNASR_CONFIG_FILE 961 | echo "PARAMS_LOCAL_VAD_DIR=${PARAMS_LOCAL_VAD_DIR}" >> $FUNASR_CONFIG_FILE 962 | echo "PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_PATH}" >> $FUNASR_CONFIG_FILE 963 | echo "PARAMS_DOCKER_VAD_DIR=${PARAMS_DOCKER_VAD_DIR}" >> $FUNASR_CONFIG_FILE 964 | echo "PARAMS_VAD_ID=${PARAMS_VAD_ID}" >> $FUNASR_CONFIG_FILE 965 | 966 | echo "PARAMS_HOST_PORT=${PARAMS_HOST_PORT}" >> $FUNASR_CONFIG_FILE 967 | echo "PARAMS_DOCKER_PORT=${PARAMS_DOCKER_PORT}" >> $FUNASR_CONFIG_FILE 968 | echo "PARAMS_DECODER_THREAD_NUM=${PARAMS_DECODER_THREAD_NUM}" >> $FUNASR_CONFIG_FILE 969 | echo "PARAMS_IO_THREAD_NUM=${PARAMS_IO_THREAD_NUM}" >> $FUNASR_CONFIG_FILE 970 | } 971 | 972 | showAllParams(){ 973 | echo -e "${UNDERLINE}${BOLD}[6/9]${PLAIN}" 974 | echo -e " ${YELLOW}Show parameters of FunASR server setting and confirm to run ...${PLAIN}" 975 | echo 976 | 977 | if [ ! -z "$PARAMS_DOCKER_IMAGE" ]; then 978 | echo -e " The current Docker image is : ${GREEN}${PARAMS_DOCKER_IMAGE}${PLAIN}" 979 | fi 980 | 981 | if [ ! -z "$PARAMS_LOCAL_MODELS_DIR" ]; then 982 | echo -e " The model is downloaded or stored to this directory in local : ${GREEN}${PARAMS_LOCAL_MODELS_DIR}${PLAIN}" 983 | fi 984 | if [ ! -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then 985 | echo -e " The model will be automatically downloaded to the directory : ${GREEN}${PARAMS_DOWNLOAD_MODEL_DIR}${PLAIN}" 986 | fi 987 | 988 | if [ ! -z "$PARAMS_ASR_ID" ]; then 989 | echo -e " The ASR model_id used : ${GREEN}${PARAMS_ASR_ID}${PLAIN}" 990 | fi 991 | if [ ! -z "$PARAMS_LOCAL_ASR_PATH" ]; then 992 | echo -e " The path to the local ASR model directory for the load : ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}" 993 | fi 994 | echo -e " The ASR model directory corresponds to the directory in Docker : ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}" 995 | 996 | if [ ! -z "$PARAMS_VAD_ID" ]; then 997 | echo -e " The VAD model_id used : ${GREEN}${PARAMS_VAD_ID}${PLAIN}" 998 | fi 999 | if [ ! -z "$PARAMS_LOCAL_VAD_PATH" ]; then 1000 | echo -e " The path to the local VAD model directory for the load : ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}" 1001 | fi 1002 | echo -e " The VAD model directory corresponds to the directory in Docker : ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}" 1003 | 1004 | if [ ! -z "$PARAMS_PUNC_ID" ]; then 1005 | echo -e " The PUNC model_id used : ${GREEN}${PARAMS_PUNC_ID}${PLAIN}" 1006 | fi 1007 | if [ ! -z "$PARAMS_LOCAL_PUNC_PATH" ]; then 1008 | echo -e " The path to the local PUNC model directory for the load : ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}" 1009 | fi 1010 | echo -e " The PUNC model directory corresponds to the directory in Docker: ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}" 1011 | echo 1012 | 1013 | if [ ! -z "$PARAMS_LOCAL_EXEC_PATH" ]; then 1014 | echo -e " The local path of the FunASR service executor : ${GREEN}${PARAMS_LOCAL_EXEC_PATH}${PLAIN}" 1015 | fi 1016 | echo -e " The path in the docker of the FunASR service executor : ${GREEN}${PARAMS_DOCKER_EXEC_PATH}${PLAIN}" 1017 | 1018 | echo -e " Set the host port used for use by the FunASR service : ${GREEN}${PARAMS_HOST_PORT}${PLAIN}" 1019 | echo -e " Set the docker port used by the FunASR service : ${GREEN}${PARAMS_DOCKER_PORT}${PLAIN}" 1020 | 1021 | echo -e " Set the number of threads used for decoding the FunASR service : ${GREEN}${PARAMS_DECODER_THREAD_NUM}${PLAIN}" 1022 | echo -e " Set the number of threads used for IO the FunASR service : ${GREEN}${PARAMS_IO_THREAD_NUM}${PLAIN}" 1023 | 1024 | echo 1025 | while true 1026 | do 1027 | params_confirm="y" 1028 | echo -e " ${YELLOW}Please input [Y/n] to confirm the parameters.${PLAIN}" 1029 | echo -e " [y] Verify that these parameters are correct and that the service will run." 1030 | echo -e " [n] The parameters set are incorrect, it will be rolled out, please rerun." 1031 | read -p " read confirmation[Y/n]: " params_confirm 1032 | 1033 | if [ -z "$params_confirm" ]; then 1034 | params_confirm="y" 1035 | fi 1036 | YES="Y" 1037 | yes="y" 1038 | NO="N" 1039 | no="n" 1040 | echo 1041 | if [ "$params_confirm" = "$YES" ] || [ "$params_confirm" = "$yes" ]; then 1042 | echo -e " ${GREEN}Will run FunASR server later ...${PLAIN}" 1043 | break 1044 | elif [ "$params_confirm" = "$NO" ] || [ "$params_confirm" = "$no" ]; then 1045 | echo -e " ${RED}The parameters set are incorrect, please rerun ...${PLAIN}" 1046 | exit 1 1047 | else 1048 | echo "again ..." 1049 | fi 1050 | done 1051 | 1052 | saveParams 1053 | echo 1054 | sleep 1 1055 | } 1056 | 1057 | # Install docker 1058 | installDocker(){ 1059 | echo -e "${UNDERLINE}${BOLD}[7/9]${PLAIN}" 1060 | 1061 | if [ $DOCKERINFOLEN -gt 30 ]; then 1062 | echo -e " ${YELLOW}Docker has installed.${PLAIN}" 1063 | else 1064 | lowercase_osid=$(echo $OSID | tr '[A-Z]' '[a-z]') 1065 | echo -e " ${YELLOW}Start install docker for $lowercase_osid ${PLAIN}" 1066 | DOCKER_INSTALL_CMD="curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun" 1067 | DOCKER_INSTALL_RUN_CMD="" 1068 | 1069 | case "$lowercase_osid" in 1070 | ubuntu) 1071 | DOCKER_INSTALL_CMD="curl -fsSL https://test.docker.com -o test-docker.sh" 1072 | DOCKER_INSTALL_RUN_CMD="sudo sh test-docker.sh" 1073 | ;; 1074 | centos) 1075 | DOCKER_INSTALL_CMD="curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun" 1076 | ;; 1077 | debian) 1078 | DOCKER_INSTALL_CMD="curl -fsSL https://get.docker.com -o get-docker.sh" 1079 | DOCKER_INSTALL_RUN_CMD="sudo sh get-docker.sh" 1080 | ;; 1081 | *) 1082 | echo "$lowercase_osid is not supported." 1083 | ;; 1084 | esac 1085 | 1086 | echo -e " Get docker installer: ${GREEN}$DOCKER_INSTALL_CMD${PLAIN}" 1087 | echo -e " Get docker run: ${GREEN}$DOCKER_INSTALL_RUN_CMD${PLAIN}" 1088 | 1089 | $DOCKER_INSTALL_CMD 1090 | if [ ! -z "$DOCKER_INSTALL_RUN_CMD" ]; then 1091 | $DOCKER_INSTALL_RUN_CMD 1092 | fi 1093 | 1094 | DOCKERINFO=$(sudo docker info | wc -l) 1095 | DOCKERINFOLEN=$(expr $DOCKERINFO) 1096 | if [ $DOCKERINFOLEN -gt 30 ]; then 1097 | echo -e " ${GREEN}Docker install success, start docker server.${PLAIN}" 1098 | sudo systemctl start docker 1099 | else 1100 | echo -e " ${RED}Docker install failed!${PLAIN}" 1101 | exit 1 1102 | fi 1103 | fi 1104 | 1105 | echo 1106 | sleep 1 1107 | } 1108 | 1109 | # Download docker image 1110 | downloadDockerImage(){ 1111 | echo -e "${UNDERLINE}${BOLD}[8/9]${PLAIN}" 1112 | echo -e " ${YELLOW}Pull docker image(${PARAMS_DOCKER_IMAGE})...${PLAIN}" 1113 | 1114 | sudo docker pull ${PARAMS_DOCKER_IMAGE} 1115 | 1116 | echo 1117 | sleep 1 1118 | } 1119 | 1120 | dockerRun(){ 1121 | echo -e "${UNDERLINE}${BOLD}[9/9]${PLAIN}" 1122 | echo -e " ${YELLOW}Construct command and run docker ...${PLAIN}" 1123 | 1124 | RUN_CMD="sudo docker run" 1125 | PORT_MAP=" -p ${PARAMS_HOST_PORT}:${PARAMS_DOCKER_PORT}" 1126 | DIR_PARAMS=" --privileged=true" 1127 | DIR_MAP_PARAMS="" 1128 | if [ ! -z "$PARAMS_LOCAL_ASR_DIR" ]; then 1129 | if [ -z "$DIR_MAP_PARAMS" ]; then 1130 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_ASR_DIR}:${PARAMS_DOCKER_ASR_DIR}" 1131 | else 1132 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_ASR_DIR}:${PARAMS_DOCKER_ASR_DIR}" 1133 | fi 1134 | fi 1135 | if [ ! -z "$PARAMS_LOCAL_VAD_DIR" ]; then 1136 | if [ -z "$DIR_MAP_PARAMS" ]; then 1137 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_VAD_DIR}:${PARAMS_DOCKER_VAD_DIR}" 1138 | else 1139 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_VAD_DIR}:${PARAMS_DOCKER_VAD_DIR}" 1140 | fi 1141 | fi 1142 | if [ ! -z "$PARAMS_LOCAL_PUNC_DIR" ]; then 1143 | if [ -z "$DIR_MAP_PARAMS" ]; then 1144 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_PUNC_DIR}:${PARAMS_DOCKER_PUNC_DIR}" 1145 | else 1146 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_VAD_DIR}:${PARAMS_DOCKER_VAD_DIR}" 1147 | fi 1148 | fi 1149 | if [ ! -z "$PARAMS_LOCAL_EXEC_DIR" ]; then 1150 | if [ -z "$DIR_MAP_PARAMS" ]; then 1151 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_EXEC_DIR}:${PARAMS_DOCKER_EXEC_DIR}" 1152 | else 1153 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_EXEC_DIR}:${PARAMS_DOCKER_EXEC_DIR}" 1154 | fi 1155 | fi 1156 | if [ ! -z "$PARAMS_LOCAL_MODELS_DIR" ]; then 1157 | if [ -z "$DIR_MAP_PARAMS" ]; then 1158 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_MODELS_DIR}:${PARAMS_DOWNLOAD_MODEL_DIR}" 1159 | else 1160 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_MODELS_DIR}:${PARAMS_DOWNLOAD_MODEL_DIR}" 1161 | fi 1162 | fi 1163 | 1164 | EXEC_PARAMS="\"exec\":\"${PARAMS_DOCKER_EXEC_PATH}\"" 1165 | if [ ! -z "$PARAMS_ASR_ID" ]; then 1166 | ASR_PARAMS="\"--model-dir\":\"${PARAMS_ASR_ID}\"" 1167 | else 1168 | ASR_PARAMS="\"--model-dir\":\"${PARAMS_DOCKER_ASR_PATH}\"" 1169 | fi 1170 | if [ ! -z "$PARAMS_VAD_ID" ]; then 1171 | VAD_PARAMS="\"--vad-dir\":\"${PARAMS_VAD_ID}\"" 1172 | else 1173 | VAD_PARAMS="\"--vad-dir\":\"${PARAMS_DOCKER_VAD_PATH}\"" 1174 | fi 1175 | if [ ! -z "$PARAMS_PUNC_ID" ]; then 1176 | PUNC_PARAMS="\"--punc-dir\":\"${PARAMS_PUNC_ID}\"" 1177 | else 1178 | PUNC_PARAMS="\"--punc-dir\":\"${PARAMS_DOCKER_PUNC_PATH}\"" 1179 | fi 1180 | DOWNLOAD_PARARMS="\"--download-model-dir\":\"${PARAMS_DOWNLOAD_MODEL_DIR}\"" 1181 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then 1182 | MODEL_PARAMS="${ASR_PARAMS},${VAD_PARAMS},${PUNC_PARAMS}" 1183 | else 1184 | MODEL_PARAMS="${ASR_PARAMS},${VAD_PARAMS},${PUNC_PARAMS},${DOWNLOAD_PARARMS}" 1185 | fi 1186 | 1187 | DECODER_PARAMS="\"--decoder-thread-num\":\"${PARAMS_DECODER_THREAD_NUM}\"" 1188 | IO_PARAMS="\"--io-thread-num\":\"${PARAMS_IO_THREAD_NUM}\"" 1189 | THREAD_PARAMS=${DECODER_PARAMS},${IO_PARAMS} 1190 | PORT_PARAMS="\"--port\":\"${PARAMS_DOCKER_PORT}\"" 1191 | CRT_PATH="\"--certfile\":\"/workspace/FunASR/funasr/runtime/ssl_key/server.crt\"" 1192 | KEY_PATH="\"--keyfile\":\"/workspace/FunASR/funasr/runtime/ssl_key/server.key\"" 1193 | 1194 | ENV_PARAMS=" -v /var/funasr:/workspace/.config" 1195 | ENV_PARAMS=" ${ENV_PARAMS} --env DAEMON_SERVER_CONFIG={\"server\":[{${EXEC_PARAMS},${MODEL_PARAMS},${THREAD_PARAMS},${PORT_PARAMS},${CRT_PATH},${KEY_PATH}}]}" 1196 | 1197 | RUN_CMD="${RUN_CMD}${PORT_MAP}${DIR_MAP_PARAMS}${ENV_PARAMS}" 1198 | RUN_CMD="${RUN_CMD} -it -d ${PARAMS_DOCKER_IMAGE}" 1199 | 1200 | # check Docker 1201 | checkDockerExist 1202 | result=$? 1203 | result=`expr $result + 0` 1204 | if [ ${result} -eq 50 ]; then 1205 | return 50 1206 | fi 1207 | 1208 | server_log="/var/funasr/server_console.log" 1209 | rm -f ${PROGRESS_TXT} 1210 | rm -f ${server_log} 1211 | 1212 | ${RUN_CMD} 1213 | 1214 | echo 1215 | echo -e " ${YELLOW}Loading models:${PLAIN}" 1216 | 1217 | # Hide the cursor, start draw progress. 1218 | printf "\e[?25l" 1219 | while true 1220 | do 1221 | ServerProgress 1222 | result=$? 1223 | stage=`expr $result + 0` 1224 | if [ ${stage} -eq 0 ]; then 1225 | break 1226 | elif [ ${stage} -gt 0 ] && [ ${stage} -lt 6 ]; then 1227 | sleep 0.1 1228 | # clear 3 lines 1229 | printf "\033[3A" 1230 | elif [ ${stage} -eq 6 ]; then 1231 | break 1232 | elif [ ${stage} -eq 98 ]; then 1233 | return 98 1234 | else 1235 | echo -e " ${RED}Starting FunASR server failed.${PLAIN}" 1236 | echo 1237 | # Display the cursor 1238 | printf "\e[?25h" 1239 | return 99 1240 | fi 1241 | done 1242 | # Display the cursor 1243 | printf "\e[?25h" 1244 | 1245 | echo -e " ${GREEN}The service has been started.${PLAIN}" 1246 | echo 1247 | echo -e " ${BOLD}If you want to see an example of how to use the client, you can run ${PLAIN}${GREEN}sudo bash funasr-runtime-deploy.sh -c${PLAIN} ." 1248 | echo 1249 | } 1250 | 1251 | checkDockerExist(){ 1252 | result=$(sudo docker ps | grep ${PARAMS_DOCKER_IMAGE} | wc -l) 1253 | result=`expr $result + 0` 1254 | if [ ${result} -ne 0 ]; then 1255 | echo 1256 | echo -e " ${RED}Docker: ${PARAMS_DOCKER_IMAGE} has been launched, please run (${PLAIN}${GREEN}sudo bash funasr-runtime-deploy.sh -p${PLAIN}${RED}) to stop Docker first.${PLAIN}" 1257 | return 50 1258 | fi 1259 | } 1260 | 1261 | dockerExit(){ 1262 | echo -e " ${YELLOW}Stop docker(${PARAMS_DOCKER_IMAGE}) server ...${PLAIN}" 1263 | sudo docker stop `sudo docker ps -a| grep ${PARAMS_DOCKER_IMAGE} | awk '{print $1}' ` 1264 | echo 1265 | sleep 1 1266 | } 1267 | 1268 | modelChange(){ 1269 | model_id=$1 1270 | 1271 | result=$(echo $1 | grep "asr") 1272 | if [[ "$result" != "" ]] 1273 | then 1274 | PARAMS_ASR_ID=$1 1275 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${PARAMS_ASR_ID} 1276 | return 0 1277 | fi 1278 | result=$(echo $1 | grep "vad") 1279 | if [[ "$result" != "" ]] 1280 | then 1281 | PARAMS_VAD_ID=$1 1282 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${PARAMS_VAD_ID} 1283 | retun 0 1284 | fi 1285 | result=$(echo $1 | grep "punc") 1286 | if [[ "$result" != "" ]] 1287 | then 1288 | PARAMS_PUNC_ID=$1 1289 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${PARAMS_PUNC_ID} 1290 | retun 0 1291 | fi 1292 | } 1293 | 1294 | sampleClientRun(){ 1295 | echo -e "${YELLOW}Will download sample tools for the client to show how speech recognition works.${PLAIN}" 1296 | 1297 | sample_name="funasr_samples" 1298 | sample_tar="funasr_samples.tar.gz" 1299 | sample_url="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/${sample_tar}" 1300 | DOWNLOAD_SAMPLE="curl -O ${sample_url}" 1301 | UNTAR_CMD="tar -zxf ${sample_tar}" 1302 | 1303 | if [ ! -f "${sample_tar}" ]; then 1304 | ${DOWNLOAD_SAMPLE} 1305 | fi 1306 | if [ -f "${sample_tar}" ]; then 1307 | ${UNTAR_CMD} 1308 | fi 1309 | if [ -d "${sample_name}" ]; then 1310 | 1311 | echo -e " Please select the client you want to run." 1312 | menuSelection ${SAMPLE_CLIENTS[*]} 1313 | result=$? 1314 | index=`expr $result - 1` 1315 | lang=${SAMPLE_CLIENTS[${index}]} 1316 | echo 1317 | 1318 | SERVER_IP="127.0.0.1" 1319 | read -p " Please enter the IP of server, default(${SERVER_IP}): " SERVER_IP 1320 | if [ -z "$SERVER_IP" ]; then 1321 | SERVER_IP="127.0.0.1" 1322 | fi 1323 | 1324 | HOST_PORT=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 1325 | if [ -z "$HOST_PORT" ]; then 1326 | HOST_PORT="10095" 1327 | fi 1328 | read -p " Please enter the port of server, default(${HOST_PORT}): " HOST_PORT 1329 | if [ -z "$HOST_PORT" ]; then 1330 | HOST_PORT=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}` 1331 | if [ -z "$HOST_PORT" ]; then 1332 | HOST_PORT="10095" 1333 | fi 1334 | fi 1335 | 1336 | WAV_PATH="${cur_dir}/funasr_samples/audio/asr_example.wav" 1337 | read -p " Please enter the audio path, default(${WAV_PATH}): " WAV_PATH 1338 | if [ -z "$WAV_PATH" ]; then 1339 | WAV_PATH="${cur_dir}/funasr_samples/audio/asr_example.wav" 1340 | fi 1341 | 1342 | echo 1343 | PRE_CMD=”“ 1344 | case "$lang" in 1345 | Linux_Cpp) 1346 | PRE_CMD="export LD_LIBRARY_PATH=${cur_dir}/funasr_samples/cpp/libs:\$LD_LIBRARY_PATH" 1347 | CLIENT_EXEC="${cur_dir}/funasr_samples/cpp/funasr-wss-client" 1348 | RUN_CMD="${CLIENT_EXEC} --server-ip ${SERVER_IP} --port ${HOST_PORT} --wav-path ${WAV_PATH}" 1349 | echo -e " Run ${BLUE}${PRE_CMD}${PLAIN}" 1350 | ${PRE_CMD} 1351 | echo 1352 | ;; 1353 | Python) 1354 | CLIENT_EXEC="${cur_dir}/funasr_samples/python/wss_client_asr.py" 1355 | RUN_CMD="python3 ${CLIENT_EXEC} --host ${SERVER_IP} --port ${HOST_PORT} --mode offline --audio_in ${WAV_PATH} --send_without_sleep --output_dir ./funasr_samples/python" 1356 | PRE_CMD="pip3 install click>=8.0.4" 1357 | echo -e " Run ${BLUE}${PRE_CMD}${PLAIN}" 1358 | ${PRE_CMD} 1359 | echo 1360 | PRE_CMD="pip3 install -r ${cur_dir}/funasr_samples/python/requirements_client.txt" 1361 | echo -e " Run ${BLUE}${PRE_CMD}${PLAIN}" 1362 | ${PRE_CMD} 1363 | echo 1364 | ;; 1365 | *) 1366 | echo "$lang is not supported." 1367 | ;; 1368 | esac 1369 | 1370 | echo -e " Run ${BLUE}${RUN_CMD}${PLAIN}" 1371 | ${RUN_CMD} 1372 | echo 1373 | echo -e " If failed, you can try (${GREEN}${RUN_CMD}${PLAIN}) in your Shell." 1374 | echo 1375 | fi 1376 | } 1377 | 1378 | # Install main function 1379 | installFunasrDocker(){ 1380 | installDocker 1381 | downloadDockerImage 1382 | } 1383 | 1384 | modelsConfigure(){ 1385 | setupModelType 1386 | setupAsrModelId 1387 | setupVadModelId 1388 | setupPuncModelId 1389 | } 1390 | 1391 | paramsConfigure(){ 1392 | selectDockerImages 1393 | result=$? 1394 | result=`expr $result + 0` 1395 | if [ ${result} -eq 50 ]; then 1396 | return 50 1397 | fi 1398 | 1399 | setupModelType 1400 | setupAsrModelId 1401 | setupVadModelId 1402 | setupPuncModelId 1403 | setupServerExec 1404 | setupHostPort 1405 | setupThreadNum 1406 | } 1407 | 1408 | # Display Help info 1409 | displayHelp(){ 1410 | echo -e "${UNDERLINE}Usage${PLAIN}:" 1411 | echo -e " $0 [OPTIONAL FLAGS]" 1412 | echo 1413 | echo -e "funasr-runtime-deploy.sh - a Bash script to install&run FunASR docker." 1414 | echo 1415 | echo -e "${UNDERLINE}Options${PLAIN}:" 1416 | echo -e " ${BOLD}-i, --install${PLAIN} Install and run FunASR docker." 1417 | echo -e " ${BOLD}-s, --start${PLAIN} Run FunASR docker with configuration that has already been set." 1418 | echo -e " ${BOLD}-p, --stop${PLAIN} Stop FunASR docker." 1419 | echo -e " ${BOLD}-r, --restart${PLAIN} Restart FunASR docker." 1420 | echo -e " ${BOLD}-u, --update${PLAIN} Update the model ID that has already been set, e.g: --update model XXXX." 1421 | echo -e " ${BOLD}-c, --client${PLAIN} Get a client example to show how to initiate speech recognition." 1422 | echo -e " ${BOLD}-v, --version${PLAIN} Display current script version." 1423 | echo -e " ${BOLD}-h, --help${PLAIN} Display this help." 1424 | echo 1425 | echo -e "${UNDERLINE}funasr-runtime-deploy.sh${PLAIN} - Version ${scriptVersion} " 1426 | echo -e "Modify Date ${scriptDate}" 1427 | } 1428 | 1429 | # OS 1430 | OSID=$(grep ^ID= /etc/os-release | cut -d= -f2) 1431 | OSVER=$(lsb_release -cs) 1432 | OSNUM=$(grep -oE "[0-9.]+" /etc/issue) 1433 | CPUNUM=$(cat /proc/cpuinfo |grep "processor"|wc -l) 1434 | DOCKERINFO=$(sudo docker info | wc -l) 1435 | DOCKERINFOLEN=$(expr $DOCKERINFO) 1436 | 1437 | # PARAMS 1438 | FUNASR_CONFIG_FILE="/var/funasr/config" 1439 | # The path of server executor in local 1440 | PARAMS_LOCAL_EXEC_PATH="" 1441 | # The dir stored server excutor in local 1442 | PARAMS_LOCAL_EXEC_DIR="" 1443 | # The server excutor in local 1444 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server" 1445 | # The dir stored server excutor in docker 1446 | PARAMS_DOCKER_EXEC_DIR="/workspace/FunASR/funasr/runtime/websocket/build/bin" 1447 | 1448 | # The dir of model in local 1449 | PARAMS_LOCAL_MODELS_DIR="" 1450 | # The dir for downloading model in docker 1451 | PARAMS_DOWNLOAD_MODEL_DIR="" 1452 | # The Docker image name 1453 | PARAMS_DOCKER_IMAGE="" 1454 | 1455 | # The dir stored punc model in local 1456 | PARAMS_LOCAL_PUNC_DIR="" 1457 | # The path of punc model in local 1458 | PARAMS_LOCAL_PUNC_PATH="" 1459 | # The dir stored punc model in docker 1460 | PARAMS_DOCKER_PUNC_DIR="" 1461 | # The path of punc model in docker 1462 | PARAMS_DOCKER_PUNC_PATH="" 1463 | # The punc model ID in ModelScope 1464 | PARAMS_PUNC_ID="" 1465 | 1466 | # The dir stored vad model in local 1467 | PARAMS_LOCAL_VAD_DIR="" 1468 | # The path of vad model in local 1469 | PARAMS_LOCAL_VAD_PATH="" 1470 | # The dir stored vad model in docker 1471 | PARAMS_DOCKER_VAD_DIR="" 1472 | # The path of vad model in docker 1473 | PARAMS_DOCKER_VAD_PATH="" 1474 | # The vad model ID in ModelScope 1475 | PARAMS_VAD_ID="" 1476 | 1477 | # The dir stored asr model in local 1478 | PARAMS_LOCAL_ASR_DIR="" 1479 | # The path of asr model in local 1480 | PARAMS_LOCAL_ASR_PATH="" 1481 | # The dir stored asr model in docker 1482 | PARAMS_DOCKER_ASR_DIR="" 1483 | # The path of asr model in docker 1484 | PARAMS_DOCKER_ASR_PATH="" 1485 | # The asr model ID in ModelScope 1486 | PARAMS_ASR_ID="" 1487 | 1488 | PARAMS_HOST_PORT="10095" 1489 | PARAMS_DOCKER_PORT="10095" 1490 | PARAMS_DECODER_THREAD_NUM="32" 1491 | PARAMS_IO_THREAD_NUM="8" 1492 | 1493 | 1494 | echo -e "#############################################################" 1495 | echo -e "# ${RED}OS${PLAIN}: $OSID $OSNUM $OSVER " 1496 | echo -e "# ${RED}Kernel${PLAIN}: $(uname -m) Linux $(uname -r)" 1497 | echo -e "# ${RED}CPU${PLAIN}: $(grep 'model name' /proc/cpuinfo | uniq | awk -F : '{print $2}' | sed 's/^[ \t]*//g' | sed 's/ \+/ /g') " 1498 | echo -e "# ${RED}CPU NUM${PLAIN}: $CPUNUM" 1499 | echo -e "# ${RED}RAM${PLAIN}: $(cat /proc/meminfo | grep 'MemTotal' | awk -F : '{print $2}' | sed 's/^[ \t]*//g') " 1500 | echo -e "#############################################################" 1501 | echo 1502 | 1503 | # Initialization step 1504 | case "$1" in 1505 | install|-i|--install) 1506 | rootNess 1507 | paramsConfigure 1508 | result=$? 1509 | result=`expr $result + 0` 1510 | if [ ${result} -ne 50 ]; then 1511 | showAllParams 1512 | installFunasrDocker 1513 | dockerRun 1514 | result=$? 1515 | stage=`expr $result + 0` 1516 | if [ ${stage} -eq 98 ]; then 1517 | dockerExit 1518 | dockerRun 1519 | fi 1520 | fi 1521 | ;; 1522 | start|-s|--start) 1523 | rootNess 1524 | paramsFromDefault 1525 | showAllParams 1526 | dockerRun 1527 | result=$? 1528 | stage=`expr $result + 0` 1529 | if [ ${stage} -eq 98 ]; then 1530 | dockerExit 1531 | dockerRun 1532 | fi 1533 | ;; 1534 | restart|-r|--restart) 1535 | rootNess 1536 | paramsFromDefault 1537 | showAllParams 1538 | dockerExit 1539 | dockerRun 1540 | result=$? 1541 | stage=`expr $result + 0` 1542 | if [ ${stage} -eq 98 ]; then 1543 | dockerExit 1544 | dockerRun 1545 | fi 1546 | ;; 1547 | stop|-p|--stop) 1548 | rootNess 1549 | paramsFromDefault 1550 | dockerExit 1551 | ;; 1552 | update|-u|--update) 1553 | rootNess 1554 | paramsFromDefault 1555 | 1556 | if [ $# -eq 1 ]; 1557 | then 1558 | modelsConfigure 1559 | elif [ $# -eq 3 ]; 1560 | then 1561 | type=$2 1562 | id=$3 1563 | MODEL="model" 1564 | if [ "$type" = "$MODEL" ]; then 1565 | modelChange $id 1566 | else 1567 | modelsConfigure 1568 | fi 1569 | else 1570 | modelsConfigure 1571 | fi 1572 | 1573 | saveParams 1574 | dockerExit 1575 | dockerRun 1576 | result=$? 1577 | stage=`expr $result + 0` 1578 | if [ ${stage} -eq 98 ]; then 1579 | dockerExit 1580 | dockerRun 1581 | fi 1582 | ;; 1583 | client|-c|--client) 1584 | rootNess 1585 | sampleClientRun 1586 | ;; 1587 | *) 1588 | clear 1589 | displayHelp 1590 | exit 0 1591 | ;; 1592 | esac 1593 | --------------------------------------------------------------------------------