├── ClipVideo
├── README.md
├── clipvideo
│ ├── Dataset_generator.py
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── argparse_tools.cpython-310.pyc
│ │ ├── subtitle_utils.cpython-310.pyc
│ │ ├── trans_utils.cpython-310.pyc
│ │ └── videoclipper.cpython-310.pyc
│ ├── argparse_tools.py
│ ├── gradio_service.py
│ ├── imagemagick_test.py
│ ├── subtitle_utils.py
│ ├── test.sh
│ ├── trans_utils.py
│ └── videoclipper.py
├── docs
│ └── images
│ │ └── show.png
└── requirments.txt
├── FunASR-APP-LICENSE
├── FunASR-APP-README.md
├── LICENSE
├── README.md
└── TransAudio
├── README.md
└── funasr-runtime-deploy.sh
/ClipVideo/README.md:
--------------------------------------------------------------------------------
1 | ## ClipVideo
2 |
3 | As the first application toolkit based of FunASR-APP, ClipVideo enables users to clip ```.mp4``` video files or ```.wav``` audio files with chosen text segments out of the recognition results generated by [Paraformer-long model](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary).
4 |
5 | Under the help of ClipVideo you can get the video clips easily with the following steps (in Gradio service):
6 | - Step1: Chose your video file (or try the example videos below)
7 | - Step2: Copy the text segments you need to 'Text to Clip'
8 | - Step3: Adjust subtitle settings (if needed)
9 | - Step4: Click 'Clip' or 'Clip and Generate Subtitles'
10 |
11 | ### Usage
12 | ```shell
13 | git clone https://github.com/alibaba-damo-academy/FunASR-APP.git
14 | cd FunASR-APP
15 | # install modelscope
16 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
17 | # python environments
18 | pip install -r ClipVideo/requirments.txt
19 | ```
20 | (Optional) If you want to clip video file with embedded subtitles
21 |
22 | 1. ffmpeg and imagemagick is required
23 |
24 | - On Ubuntu
25 | ```shell
26 | apt-get -y update && apt-get -y install ffmpeg imagemagick
27 | sed -i 's/none/read,write/g' /etc/ImageMagick-6/policy.xml
28 | ```
29 | - On MacOS
30 | ```shell
31 | brew install imagemagick
32 | sed -i 's/none/read,write/g' /usr/local/Cellar/imagemagick/7.1.1-8_1/etc/ImageMagick-7/policy.xml
33 | ```
34 | 2. Download font file to ClipVideo/font
35 |
36 | ```shell
37 | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/STHeitiMedium.ttc -O ClipVideo/font/STHeitiMedium.ttc
38 | ```
39 |
40 | #### Experience ClipVideo in Modelscope
41 | You can try ClipVideo in modelscope space: [link](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary).
42 |
43 | #### Use ClipVideo by Gradio Service
44 | You can establish your own ClipVideo service which is same as [Modelscope Space](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary) as follow:
45 | ```shell
46 | python clipvideo/gradio_service.py
47 | ```
48 | then visit ```localhost:7860``` you will get a Gradio service like below and you can use ClipVideo following the steps:
49 |
50 |
51 | #### Use ClipVideo in command line
52 | ClipVideo supports you to recognize and clip with commands:
53 | ```shell
54 | # working in ClipVideo/
55 | # step1: Recognize
56 | python clipvideo/videoclipper.py --stage 1 \
57 | --file examples/2022云栖大会_片段.mp4 \
58 | --output_dir ./output
59 | # now you can find recognition results and entire SRT file in ./output/
60 | # step2: Clip
61 | python clipvideo/videoclipper.py --stage 2 \
62 | --file examples/2022云栖大会_片段.mp4 \
63 | --output_dir ./output \
64 | --dest_text '我们把它跟乡村振兴去结合起来,利用我们的设计的能力' \
65 | --start_ost 0 \
66 | --end_ost 100 \
67 | --output_file './output/res.mp4'
68 | ```
69 |
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/Dataset_generator.py:
--------------------------------------------------------------------------------
1 | import librosa
2 | import soundfile as sf
3 | from videoclipper import VideoClipper
4 | import os
5 | from tqdm import tqdm
6 | import subprocess
7 | from modelscope.pipelines import pipeline
8 | from modelscope.utils.constant import Tasks
9 | from shutil import copyfile, rmtree
10 |
11 | current_directory = os.path.dirname(os.path.abspath(__file__))
12 | inference_pipeline = pipeline(
13 | task=Tasks.auto_speech_recognition,
14 | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
15 | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',
16 | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
17 | )
18 | video_tools = VideoClipper(inference_pipeline)
19 | audio_clipper = VideoClipper(None)
20 | STAGE_RECOGNIZE = 1
21 | STAGE_CLIP = 2
22 |
23 | # 开始偏移量
24 | START_OST = 0
25 | # 结束偏移量
26 | END_OST = 0
27 |
28 | FONT_SIZE = 32
29 | FONT_COLOR = 'white'
30 | ADD_SUB = False
31 | OUTPUT_SRT_FILE = f"{current_directory}/output/srt/"
32 | OUTPUT_MP4_FILE = f"{current_directory}/output/mp4/"
33 | OUTPUT_WAV_FILE = f"{current_directory}/output/wav/"
34 | INPUT_FILE_PATH = f"{current_directory}/video_files/"
35 | READY_INPUT_WAV_PATH = f"{current_directory}/input/mdx_extra/"
36 | DEMUCS_TARGET_INPUT_PATH = f"{current_directory}/input/"
37 | ORG_INPUT_WAV_PATH = f"{current_directory}/org_wav/"
38 |
39 |
40 | # 获取所有文件
41 | def get_all_files_in_directory(directory):
42 | print(f"from :{current_directory}{directory} ")
43 | file_paths_list = []
44 | for root, dirs, files in os.walk(directory):
45 | for file in files:
46 | file_path = os.path.join(root, file)
47 | file_paths_list.append(file_path)
48 | return file_paths_list
49 |
50 |
51 | # 获取所有的字幕和字幕文件路径
52 | def vidio_recognizing_to_get_srt_list():
53 | video_list = get_all_files_in_directory(READY_INPUT_WAV_PATH)
54 | srt_list = []
55 | srt_file_list = []
56 | state_json = {}
57 | for video_path in tqdm(video_list):
58 | try:
59 | wav = librosa.load(video_path, sr=16000)[0]
60 | res_text, res_srt, state = video_tools.recog((16000, wav))
61 | state_json.update({str(video_path): state})
62 | except Exception as e:
63 | print(f"音频转译错误 {video_path}: {e}")
64 | continue
65 | srt_file_name = os.path.splitext(os.path.basename(video_path))[0] + '.srt'
66 | srt_save_path = os.path.join(OUTPUT_SRT_FILE, srt_file_name)
67 | srt_list.append(res_srt)
68 | try:
69 | with open(srt_save_path, "w", encoding="utf-8") as f:
70 | f.write(res_srt)
71 | srt_file_list.append(srt_save_path)
72 | print(str(srt_save_path) + "写入")
73 | except Exception as e:
74 | print(f"Error writing SRT file {srt_save_path}: {e}")
75 | return srt_list, srt_file_list, state_json
76 |
77 |
78 | # 视频转音频
79 | def mp4_to_wav(input_path):
80 | output_file_name = os.path.splitext(os.path.basename(input_path))[0] + '.wav'
81 | output_path = os.path.join(ORG_INPUT_WAV_PATH, output_file_name)
82 | subprocess.run([
83 | "ffmpeg", "-i", input_path, "-acodec", "pcm_s16le", "-ar", "16000", output_path
84 | ])
85 | print(f"转换完成: {input_path} -> {output_path}")
86 |
87 |
88 | # 获取所有文字大列表
89 | def extract_subtitle_text_list_from_srt():
90 | srt_list, srt_file_path_list, state_json = vidio_recognizing_to_get_srt_list()
91 | all_srt_text_json = {}
92 | index = 0
93 | for srt_file_path in srt_file_path_list:
94 | single_subtitle_text_list = []
95 | with open(srt_file_path, "r", encoding="utf-8") as srt_file:
96 | lines = srt_file.readlines()
97 | for i in range(2, len(lines), 3):
98 | subtitle_text = lines[i].strip()
99 | single_subtitle_text_list.append(subtitle_text)
100 | index = index + 1
101 | wav_file_path = READY_INPUT_WAV_PATH + os.path.splitext(os.path.basename(srt_file_path))[0] + '.wav'
102 | all_srt_text_json.update({str(wav_file_path): single_subtitle_text_list})
103 | print(f"总文字数量:{index}")
104 | return all_srt_text_json, state_json
105 |
106 |
107 | # 通过文字去剪裁音频并保存
108 | def clip_audio_from_srt():
109 | all_srt_text_json, state_json = extract_subtitle_text_list_from_srt()
110 | print(str(state_json))
111 | if len(all_srt_text_json) == 0 or len(state_json) == 0:
112 | print("字幕文件为空,请检查音频文件内是否存在语音。")
113 | return None
114 | for wav_file_path in all_srt_text_json:
115 | for one_line_text in all_srt_text_json[wav_file_path]:
116 | state = state_json[str(wav_file_path)]
117 | wav_file_name = OUTPUT_WAV_FILE + str(one_line_text) + '_clip.wav'
118 | try:
119 | (sr, audio), message, srt_clip = video_tools.clip(dest_text=str(one_line_text), start_ost=START_OST,
120 | end_ost=END_OST,
121 | state=state)
122 | if "No period found in the speech" not in message:
123 | print(f"{one_line_text} in {wav_file_path} 完成")
124 | sf.write(wav_file_name, audio, 16000)
125 | except Exception as e:
126 | print(f"{one_line_text} in {wav_file_path}切割失败,原因:{e}")
127 |
128 |
129 | # 清空生成物,初始化文件夹
130 | def clean_files():
131 | rmtree(f"{current_directory}/input/mdx_extra/")
132 | os.mkdir(f"{current_directory}/input/mdx_extra/")
133 | rmtree(f"{current_directory}/org_wav/")
134 | os.mkdir(f"{current_directory}/org_wav/")
135 | rmtree(f"{current_directory}/output/mp4/")
136 | os.mkdir(f"{current_directory}/output/mp4/")
137 | rmtree(f"{current_directory}/output/srt/")
138 | os.mkdir(f"{current_directory}/output/srt/")
139 | rmtree(f"{current_directory}/output/wav/")
140 | os.mkdir(f"{current_directory}/output/wav/")
141 | print("初始化完成")
142 |
143 |
144 | # 降噪
145 | def demucs_wav():
146 | for org_wav_file in get_all_files_in_directory(ORG_INPUT_WAV_PATH):
147 | target_ready_wav_path = os.path.splitext(os.path.basename(org_wav_file))[0] + ".wav"
148 | os.system(
149 | f"demucs -n mdx_extra -o {DEMUCS_TARGET_INPUT_PATH} --filename {target_ready_wav_path} {org_wav_file}")
150 |
151 |
152 | # 预备……
153 | def ready_all_to_wav():
154 | files_list = get_all_files_in_directory(INPUT_FILE_PATH)
155 | for file in files_list:
156 | audio_suffixs = ['wav']
157 | video_suffixs = ['mp4']
158 | if file[-3:] in audio_suffixs:
159 | target_wav_path = os.path.join(ORG_INPUT_WAV_PATH, os.path.basename(file))
160 | copyfile(file, target_wav_path)
161 | elif file[-3:] in video_suffixs:
162 | mp4_to_wav(file)
163 | else:
164 | print(f"只支持.wav和.mp4!Only supports .wav and .mp4")
165 |
166 |
167 | # 润!
168 | def run():
169 | clean_files()
170 | ready_all_to_wav()
171 | demucs_wav()
172 | clip_audio_from_srt()
173 |
174 |
175 | # 面函数
176 | if __name__ == '__main__':
177 | run()
178 |
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__init__.py
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/__pycache__/argparse_tools.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/argparse_tools.cpython-310.pyc
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/__pycache__/subtitle_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/subtitle_utils.cpython-310.pyc
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/__pycache__/trans_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/trans_utils.cpython-310.pyc
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/__pycache__/videoclipper.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/clipvideo/__pycache__/videoclipper.cpython-310.pyc
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/argparse_tools.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from pathlib import Path
3 |
4 | import yaml
5 | import sys
6 |
7 |
8 | class ArgumentParser(argparse.ArgumentParser):
9 | """Simple implementation of ArgumentParser supporting config file
10 |
11 | This class is originated from https://github.com/bw2/ConfigArgParse,
12 | but this class is lack of some features that it has.
13 |
14 | - Not supporting multiple config files
15 | - Automatically adding "--config" as an option.
16 | - Not supporting any formats other than yaml
17 | - Not checking argument type
18 |
19 | """
20 |
21 | def __init__(self, *args, **kwargs):
22 | super().__init__(*args, **kwargs)
23 | self.add_argument("--config", help="Give config file in yaml format")
24 |
25 | def parse_known_args(self, args=None, namespace=None):
26 | # Once parsing for setting from "--config"
27 | _args, _ = super().parse_known_args(args, namespace)
28 | if _args.config is not None:
29 | if not Path(_args.config).exists():
30 | self.error(f"No such file: {_args.config}")
31 |
32 | with open(_args.config, "r", encoding="utf-8") as f:
33 | d = yaml.safe_load(f)
34 | if not isinstance(d, dict):
35 | self.error("Config file has non dict value: {_args.config}")
36 |
37 | for key in d:
38 | for action in self._actions:
39 | if key == action.dest:
40 | break
41 | else:
42 | self.error(f"unrecognized arguments: {key} (from {_args.config})")
43 |
44 | # NOTE(kamo): Ignore "--config" from a config file
45 | # NOTE(kamo): Unlike "configargparse", this module doesn't check type.
46 | # i.e. We can set any type value regardless of argument type.
47 | self.set_defaults(**d)
48 | return super().parse_known_args(args, namespace)
49 |
50 |
51 | def get_commandline_args():
52 | extra_chars = [
53 | " ",
54 | ";",
55 | "&",
56 | "(",
57 | ")",
58 | "|",
59 | "^",
60 | "<",
61 | ">",
62 | "?",
63 | "*",
64 | "[",
65 | "]",
66 | "$",
67 | "`",
68 | '"',
69 | "\\",
70 | "!",
71 | "{",
72 | "}",
73 | ]
74 |
75 | # Escape the extra characters for shell
76 | argv = [
77 | arg.replace("'", "'\\''")
78 | if all(char not in arg for char in extra_chars)
79 | else "'" + arg.replace("'", "'\\''") + "'"
80 | for arg in sys.argv
81 | ]
82 |
83 | return sys.executable + " " + " ".join(argv)
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/gradio_service.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | from modelscope.pipelines import pipeline
3 | from modelscope.utils.constant import Tasks
4 | from videoclipper import VideoClipper
5 |
6 |
7 | if __name__ == "__main__":
8 | inference_pipeline = pipeline(
9 | task=Tasks.auto_speech_recognition,
10 | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
11 | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',
12 | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
13 | )
14 | audio_clipper = VideoClipper(inference_pipeline)
15 |
16 | def audio_recog(audio_input):
17 | return audio_clipper.recog(audio_input)
18 |
19 | def audio_clip(dest_text, start_ost, end_ost, state):
20 | return audio_clipper.clip(dest_text, start_ost, end_ost, state)
21 |
22 | def video_recog(video_input):
23 | return audio_clipper.video_recog(video_input)
24 |
25 | def video_clip(dest_text, start_ost, end_ost, state):
26 | return audio_clipper.video_clip(dest_text, start_ost, end_ost, state)
27 |
28 | def video_clip_addsub(dest_text, start_ost, end_ost, state, font_size, font_color):
29 | return audio_clipper.video_clip(dest_text, start_ost, end_ost, state, font_size, font_color, add_sub=True)
30 |
31 | '''
32 | top_md_1 = ("""
33 | 基于达摩院自研Paraformer-长音频版的语音识别、端点检测、标点预测、时间戳功能
34 |
35 | 准确识别,自由复制所需段落并一键裁剪、添加字幕
36 |
37 | * Step1: 上传视频文件(或使用下方的用例体验),点击 **识别** 按钮
38 | * Step2: 复制识别结果中所需的文字至右上方,设置偏移与字幕配置(可选)
39 | * Step3: 点击 **裁剪** 按钮或 **裁剪并添加字幕** 按钮获得结果
40 | """)
41 | '''
42 |
43 | top_md_2 = ("""
44 | 受到网络传输与服务资源的限制,用于体验的视频最好大小在40mb以下
45 | 过大的视频可以尝试分离音轨使用音频剪辑,或 **通过源代码将您的ClipVideo服务部署在本地(推荐)** :
46 |
47 |
48 | FunASR_APP:

49 | 🌟支持我们:

50 |
51 |
52 | """)
53 |
54 | top_md_3 = ("""访问FunASR项目与论文能够帮助您深入了解ClipVideo中所使用的语音处理相关模型:
55 |
56 |
57 | FunASR:

58 | FunASR Paper:

59 | 🌟Star FunASR:

60 |
61 |
62 | """)
63 |
64 | # gradio interface
65 | with gr.Blocks() as demo:
66 | #gr.Image("./examples/guide.png", show_label=False)
67 | # gr.Markdown(top_md_1)
68 | #gr.Markdown(top_md_2)
69 | #gr.Markdown(top_md_3)
70 | video_state = gr.State()
71 | audio_state = gr.State()
72 | with gr.Tab("🎥✂️视频裁剪 Video Clipping"):
73 | with gr.Row():
74 | with gr.Column():
75 | video_input = gr.Video(label="🎥视频输入 Video Input")
76 | gr.Examples(['https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/%E4%B8%BA%E4%BB%80%E4%B9%88%E8%A6%81%E5%A4%9A%E8%AF%BB%E4%B9%A6%EF%BC%9F%E8%BF%99%E6%98%AF%E6%88%91%E5%90%AC%E8%BF%87%E6%9C%80%E5%A5%BD%E7%9A%84%E7%AD%94%E6%A1%88-%E7%89%87%E6%AE%B5.mp4',
77 | 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/2022%E4%BA%91%E6%A0%96%E5%A4%A7%E4%BC%9A_%E7%89%87%E6%AE%B5.mp4',
78 | 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/2022%E4%BA%91%E6%A0%96%E5%A4%A7%E4%BC%9A_%E7%89%87%E6%AE%B52.mp4',
79 | 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/%E4%BD%BF%E7%94%A8chatgpt_%E7%89%87%E6%AE%B5.mp4'],
80 | [video_input])
81 | recog_button2 = gr.Button("👂识别 Recognize")
82 | video_text_output = gr.Textbox(label="✏️识别结果 Recognition Result")
83 | video_srt_output = gr.Textbox(label="📖SRT字幕内容 RST Subtitles")
84 | with gr.Column():
85 | video_text_input = gr.Textbox(label="✏️待裁剪文本 Text to Clip (多段文本使用'#'连接)")
86 | with gr.Row():
87 | video_start_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, label="⏪开始位置偏移 Start Offset (ms)")
88 | video_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, label="⏩结束位置偏移 End Offset (ms)")
89 | with gr.Row():
90 | font_size = gr.Slider(minimum=10, maximum=100, value=32, step=2, label="🔠字幕字体大小 Subtitle Font Size")
91 | font_color = gr.Radio(["black", "white", "green", "red"], label="🌈字幕颜色 Subtitle Color", value='white')
92 | # font = gr.Radio(["黑体", "Alibaba Sans"], label="字体 Font")
93 | with gr.Row():
94 | clip_button2 = gr.Button("✂️裁剪\nClip")
95 | clip_button3 = gr.Button("✂️裁剪并添加字幕\nClip and Generate Subtitles")
96 | video_output = gr.Video(label="🎥裁剪结果 Audio Clipped")
97 | video_mess_output = gr.Textbox(label="ℹ️裁剪信息 Clipping Log")
98 | video_srt_clip_output = gr.Textbox(label="📖裁剪部分SRT字幕内容 Clipped RST Subtitles")
99 |
100 | with gr.Tab("🔊✂️音频裁剪 Audio Clipping"):
101 | with gr.Row():
102 | with gr.Column():
103 | audio_input = gr.Audio(label="🔊音频输入 Audio Input")
104 | gr.Examples(['https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/%E9%B2%81%E8%82%83%E9%87%87%E8%AE%BF%E7%89%87%E6%AE%B51.wav'], [audio_input])
105 | recog_button1 = gr.Button("👂识别 Recognize")
106 | audio_text_output = gr.Textbox(label="✏️识别结果 Recognition Result")
107 | audio_srt_output = gr.Textbox(label="📖SRT字幕内容 RST Subtitles")
108 | with gr.Column():
109 | audio_text_input = gr.Textbox(label="✏️待裁剪文本 Text to Clip (多段文本使用'#'连接)")
110 | with gr.Row():
111 | audio_start_ost = gr.Slider(minimum=-500, maximum=1000, value=0, step=50, label="⏪开始位置偏移 Start Offset (ms)")
112 | audio_end_ost = gr.Slider(minimum=-500, maximum=1000, value=100, step=50, label="⏩结束位置偏移 End Offset (ms)")
113 | with gr.Row():
114 | clip_button1 = gr.Button("✂️裁剪 Clip")
115 | audio_output = gr.Audio(label="🔊裁剪结果 Audio Clipped")
116 | audio_mess_output = gr.Textbox(label="ℹ️裁剪信息 Clipping Log")
117 | audio_srt_clip_output = gr.Textbox(label="📖裁剪部分SRT字幕内容 Clipped RST Subtitles")
118 |
119 | recog_button1.click(audio_recog,
120 | inputs=audio_input,
121 | outputs=[audio_text_output, audio_srt_output, audio_state])
122 | clip_button1.click(audio_clip,
123 | inputs=[audio_text_input, audio_start_ost, audio_end_ost, audio_state],
124 | outputs=[audio_output, audio_mess_output, audio_srt_clip_output])
125 |
126 | recog_button2.click(video_recog,
127 | inputs=video_input,
128 | outputs=[video_text_output, video_srt_output, video_state])
129 | clip_button2.click(video_clip,
130 | inputs=[video_text_input, video_start_ost, video_end_ost, video_state],
131 | outputs=[video_output, video_mess_output, video_srt_clip_output])
132 | clip_button3.click(video_clip_addsub,
133 | inputs=[video_text_input, video_start_ost, video_end_ost, video_state, font_size, font_color],
134 | outputs=[video_output, video_mess_output, video_srt_clip_output])
135 |
136 | # start gradio service in local
137 | demo.queue(concurrency_count=3).launch()
138 |
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/imagemagick_test.py:
--------------------------------------------------------------------------------
1 | from moviepy.editor import *
2 | from moviepy.video.tools.subtitles import SubtitlesClip
3 |
4 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=48, color='white')
5 | subs = [((0, 2), 'sub1中文字幕'),
6 | ((2, 4), 'subs2'),
7 | ((4, 6), 'subs3'),
8 | ((6, 8), 'subs4')]
9 |
10 | subtitles = SubtitlesClip(subs, generator)
11 |
12 | video = VideoFileClip("examples/2022云栖大会_片段.mp4.mp4")
13 | video = video.subclip(0, 8)
14 | video = CompositeVideoClip([video, subtitles.set_pos(('center','bottom'))])
15 |
16 | video.write_videofile("test_output.mp4")
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/subtitle_utils.py:
--------------------------------------------------------------------------------
1 | def time_convert(ms):
2 | ms = int(ms)
3 | tail = ms % 1000
4 | s = ms // 1000
5 | mi = s // 60
6 | s = s % 60
7 | h = mi // 60
8 | mi = mi % 60
9 | h = "00" if h == 0 else str(h)
10 | mi = "00" if mi == 0 else str(mi)
11 | s = "00" if s == 0 else str(s)
12 | tail = str(tail)
13 | if len(h) == 1: h = '0' + h
14 | if len(mi) == 1: mi = '0' + mi
15 | if len(s) == 1: s = '0' + s
16 | return "{}:{}:{},{}".format(h, mi, s, tail)
17 |
18 |
19 | class Text2SRT():
20 | def __init__(self, text_seg, ts_list, offset=0):
21 | self.token_list = [i for i in text_seg.split() if len(i)]
22 | self.ts_list = ts_list
23 | start, end = ts_list[0][0] - offset, ts_list[-1][1] - offset
24 | self.start_sec, self.end_sec = start, end
25 | self.start_time = time_convert(start)
26 | self.end_time = time_convert(end)
27 | def text(self):
28 | res = ""
29 | for word in self.token_list:
30 | if '\u4e00' <= word <= '\u9fff':
31 | res += word
32 | else:
33 | res += " " + word
34 | return res
35 | def len(self):
36 | return len(self.token_list)
37 | def srt(self, acc_ost=0.0):
38 | return "{} --> {}\n{}\n".format(
39 | time_convert(self.start_sec+acc_ost*1000),
40 | time_convert(self.end_sec+acc_ost*1000),
41 | self.text())
42 | def time(self, acc_ost=0.0):
43 | return (self.start_sec/1000+acc_ost, self.end_sec/1000+acc_ost)
44 |
45 |
46 | def generate_srt(sentence_list):
47 | srt_total = ''
48 | for i, d in enumerate(sentence_list):
49 | t2s = Text2SRT(d['text_seg'], d['ts_list'])
50 | srt_total += "{}\n{}".format(i, t2s.srt())
51 | return srt_total
52 |
53 | def generate_srt_clip(sentence_list, start, end, begin_index=0, time_acc_ost=0.0):
54 | start, end = int(start * 1000), int(end * 1000)
55 | srt_total = ''
56 | cc = 1 + begin_index
57 | subs = []
58 | for i, d in enumerate(sentence_list):
59 | if d['ts_list'][-1][1] <= start:
60 | continue
61 | if d['ts_list'][0][0] >= end:
62 | break
63 | # parts in between
64 | if (d['ts_list'][-1][1] <= end and d['ts_list'][0][0] > start) or (d['ts_list'][-1][1] == end and d['ts_list'][0][0] == start):
65 | t2s = Text2SRT(d['text_seg'], d['ts_list'], offset=start)
66 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost))
67 | subs.append((t2s.time(time_acc_ost), t2s.text()))
68 | cc += 1
69 | continue
70 | if d['ts_list'][0][0] <= start:
71 | if not d['ts_list'][-1][1] > end:
72 | for j, ts in enumerate(d['ts_list']):
73 | if ts[1] > start:
74 | break
75 | _text = " ".join(d['text_seg'].split()[j:])
76 | _ts = d['ts_list'][j:]
77 | else:
78 | for j, ts in enumerate(d['ts_list']):
79 | if ts[1] > start:
80 | _start = j
81 | break
82 | for j, ts in enumerate(d['ts_list']):
83 | if ts[1] > end:
84 | _end = j
85 | break
86 | _text = " ".join(d['text_seg'].split()[_start:_end])
87 | _ts = d['ts_list'][_start:_end]
88 | if len(ts):
89 | t2s = Text2SRT(_text, _ts, offset=start)
90 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost))
91 | subs.append((t2s.time(time_acc_ost), t2s.text()))
92 | cc += 1
93 | continue
94 | if d['ts_list'][-1][1] > end:
95 | for j, ts in enumerate(d['ts_list']):
96 | if ts[1] > end:
97 | break
98 | _text = " ".join(d['text_seg'].split()[:j])
99 | _ts = d['ts_list'][:j]
100 | if len(_ts):
101 | t2s = Text2SRT(_text, _ts, offset=start)
102 | srt_total += "{}\n{}".format(cc, t2s.srt(time_acc_ost))
103 | subs.append(
104 | (t2s.time(time_acc_ost), t2s.text())
105 | )
106 | cc += 1
107 | continue
108 | return srt_total, subs, cc
109 |
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/test.sh:
--------------------------------------------------------------------------------
1 | # step1: Recognize
2 | python videoclipper.py --stage 1 \
3 | --file ../examples/2022云栖大会_片段.mp4 \
4 | --output_dir ./output
5 | # now you can find recognition results and entire SRT file in ./output/
6 | # step2: Clip
7 | python videoclipper.py --stage 2 \
8 | --file ../examples/2022云栖大会_片段.mp4 \
9 | --output_dir ./output \
10 | --dest_text '所以这个是我们办这个奖的初心啊,我们也会一届一届的办下去' \
11 | --start_ost 0 \
12 | --end_ost 100 \
13 | --output_file './output/res.mp4'
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/trans_utils.py:
--------------------------------------------------------------------------------
1 | PUNC_LIST = [',', '。', '!', '?', '、']
2 |
3 |
4 | def pre_proc(text):
5 | res = ''
6 | for i in range(len(text)):
7 | if text[i] in PUNC_LIST:
8 | continue
9 | if '\u4e00' <= text[i] <= '\u9fff':
10 | if len(res) and res[-1] != " ":
11 | res += ' ' + text[i]+' '
12 | else:
13 | res += text[i]+' '
14 | else:
15 | res += text[i]
16 | if res[-1] == ' ':
17 | res = res[:-1]
18 | return res
19 |
20 | def proc(raw_text, timestamp, dest_text):
21 | # simple matching
22 | ld = len(dest_text.split())
23 | mi, ts = [], []
24 | offset = 0
25 | while True:
26 | fi = raw_text.find(dest_text, offset, len(raw_text))
27 | # import pdb; pdb.set_trace()
28 | ti = raw_text[:fi].count(' ')
29 | if fi == -1:
30 | break
31 | offset = fi + ld
32 | mi.append(fi)
33 | ts.append([timestamp[ti][0]*16, timestamp[ti+ld-1][1]*16])
34 | # import pdb; pdb.set_trace()
35 | return ts
36 |
37 |
38 | def write_state(output_dir, state):
39 | for key in ['/recog_res_raw', '/timestamp', '/sentences']:
40 | with open(output_dir+key, 'w', encoding="UTF-8") as fout:
41 | fout.write(str(state[key[1:]]))
42 |
43 |
44 | def load_state(output_dir):
45 | state = {}
46 | with open(output_dir+'/recog_res_raw', encoding='UTF-8') as fin:
47 | line = fin.read()
48 | state['recog_res_raw'] = line
49 | with open(output_dir+'/timestamp', encoding='UTF-8') as fin:
50 | line = fin.read()
51 | state['timestamp'] = eval(line)
52 | with open(output_dir+'/sentences', encoding='UTF-8') as fin:
53 | line = fin.read()
54 | state['sentences'] = eval(line)
55 | return state
56 |
57 |
--------------------------------------------------------------------------------
/ClipVideo/clipvideo/videoclipper.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import copy
4 | import librosa
5 | import logging
6 | import argparse
7 | import numpy as np
8 | import soundfile as sf
9 | import moviepy.editor as mpy
10 | from subtitle_utils import generate_srt, generate_srt_clip
11 | from trans_utils import pre_proc, proc, write_state, load_state
12 | from argparse_tools import ArgumentParser, get_commandline_args
13 |
14 | from moviepy.editor import *
15 | from moviepy.video.tools.subtitles import SubtitlesClip
16 |
17 |
18 | class VideoClipper():
19 | def __init__(self, asr_pipeline):
20 | logging.warning("Initializing VideoClipper.")
21 | self.asr_pipeline = asr_pipeline
22 |
23 | def recog(self, audio_input, state=None):
24 | if state is None:
25 | state = {}
26 | sr, data = audio_input
27 | assert sr == 16000, "16kHz sample rate required, {} given.".format(sr)
28 | if len(data.shape) == 2: # multi-channel wav input
29 | # logging.warning("Input wav shape: {}, only first channel reserved.").format(data.shape)
30 | data = data[:,0]
31 | state['audio_input'] = (sr, data)
32 | data = data.astype(np.float64)
33 | rec_result = self.asr_pipeline(audio_in=data)
34 | state['recog_res_raw'] = rec_result['text_postprocessed']
35 | state['timestamp'] = rec_result['time_stamp']
36 | state['sentences'] = rec_result['sentences']
37 | res_text = rec_result['text']
38 | res_srt = generate_srt(rec_result['sentences'])
39 | return res_text, res_srt, state
40 |
41 | def clip(self, dest_text, start_ost, end_ost, state):
42 | # get from state
43 | audio_input = state['audio_input']
44 | recog_res_raw = state['recog_res_raw']
45 | timestamp = state['timestamp']
46 | sentences = state['sentences']
47 | sr, data = audio_input
48 | data = data.astype(np.float64)
49 |
50 | all_ts = []
51 | for _dest_text in dest_text.split('#'):
52 | _dest_text = pre_proc(_dest_text)
53 | ts = proc(recog_res_raw, timestamp, _dest_text)
54 | for _ts in ts: all_ts.append(_ts)
55 | ts = all_ts
56 | srt_index = 0
57 | clip_srt = ""
58 | if len(ts):
59 | start, end = ts[0]
60 | start = min(max(0, start+start_ost*16), len(data))
61 | end = min(max(0, end+end_ost*16), len(data))
62 | res_audio = data[start:end]
63 | start_end_info = "from {} to {}".format(start/16000, end/16000)
64 | srt_clip, _, srt_index = generate_srt_clip(sentences, start/16000.0, end/16000.0, begin_index=srt_index)
65 | clip_srt += srt_clip
66 | for _ts in ts[1:]: # multiple sentence input or multiple output matched
67 | start, end = _ts
68 | start = min(max(0, start+start_ost*16), len(data))
69 | end = min(max(0, end+end_ost*16), len(data))
70 | start_end_info += ", from {} to {}".format(start, end)
71 | res_audio = np.concatenate([res_audio, data[start+start_ost*16:end+end_ost*16]], -1)
72 | srt_clip, _, srt_index = generate_srt_clip(sentences, start/16000.0, end/16000.0, begin_index=srt_index-1)
73 | clip_srt += srt_clip
74 | if len(ts):
75 | message = "{} periods found in the speech: ".format(len(ts)) + start_end_info
76 | else:
77 | message = "No period found in the speech, return raw speech. You may check the recognition result and try other destination text."
78 | res_audio = data
79 | return (sr, res_audio), message, clip_srt
80 |
81 | def video_recog(self, vedio_filename):
82 | vedio_filename = vedio_filename
83 | clip_video_file = vedio_filename[:-4] + '_clip.mp4'
84 | video = mpy.VideoFileClip(vedio_filename)
85 | audio_file = vedio_filename[:-3] + 'wav'
86 | video.audio.write_audiofile(audio_file)
87 | wav = librosa.load(audio_file, sr=16000)[0]
88 | state = {
89 | 'vedio_filename': vedio_filename,
90 | 'clip_video_file': clip_video_file,
91 | 'video': video,
92 | }
93 | # res_text, res_srt = self.recog((16000, wav), state)
94 | os.remove(audio_file)
95 | return self.recog((16000, wav), state)
96 |
97 | def video_clip(self, dest_text, start_ost, end_ost, state, font_size=32, font_color='white', add_sub=False):
98 | # get from state
99 | recog_res_raw = state['recog_res_raw']
100 | timestamp = state['timestamp']
101 | sentences = state['sentences']
102 | video = state['video']
103 | clip_video_file = state['clip_video_file']
104 | vedio_filename = state['vedio_filename']
105 |
106 | all_ts = []
107 | srt_index = 0
108 | time_acc_ost = 0.0
109 | for _dest_text in dest_text.split('#'):
110 | _dest_text = pre_proc(_dest_text)
111 | ts = proc(recog_res_raw, timestamp, _dest_text)
112 | for _ts in ts: all_ts.append(_ts)
113 | ts = all_ts
114 | clip_srt = ""
115 | if len(ts):
116 | start, end = ts[0][0] / 16000, ts[0][1] / 16000
117 | srt_clip, subs, srt_index = generate_srt_clip(sentences, start, end, begin_index=srt_index, time_acc_ost=time_acc_ost)
118 | start, end = start+start_ost/1000.0, end+end_ost/1000.0
119 | video_clip = video.subclip(start, end)
120 | start_end_info = "from {} to {}".format(start, end)
121 | clip_srt += srt_clip
122 | if add_sub:
123 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=font_size, color=font_color)
124 | subtitles = SubtitlesClip(subs, generator)
125 | video_clip = CompositeVideoClip([video_clip, subtitles.set_pos(('center','bottom'))])
126 | concate_clip = [video_clip]
127 | time_acc_ost += end+end_ost/1000.0 - (start+start_ost/1000.0)
128 | for _ts in ts[1:]:
129 | start, end = _ts[0] / 16000, _ts[1] / 16000
130 | srt_clip, subs, srt_index = generate_srt_clip(sentences, start, end, begin_index=srt_index-1, time_acc_ost=time_acc_ost)
131 | start, end = start+start_ost/1000.0, end+end_ost/1000.0
132 | _video_clip = video.subclip(start, end)
133 | start_end_info += ", from {} to {}".format(start, end)
134 | clip_srt += srt_clip
135 | if add_sub:
136 | generator = lambda txt: TextClip(txt, font='./font/STHeitiMedium.ttc', fontsize=font_size, color=font_color)
137 | subtitles = SubtitlesClip(subs, generator)
138 | _video_clip = CompositeVideoClip([_video_clip, subtitles.set_pos(('center','bottom'))])
139 | concate_clip.append(copy.copy(_video_clip))
140 | time_acc_ost += end+end_ost/1000.0 - (start+start_ost/1000.0)
141 | message = "{} periods found in the audio: ".format(len(ts)) + start_end_info
142 | logging.warning("Concating...")
143 | if len(concate_clip) > 1:
144 | video_clip = concatenate_videoclips(concate_clip)
145 | video_clip.write_videofile(clip_video_file, audio_codec="aac")
146 | else:
147 | clip_video_file = vedio_filename
148 | message = "No period found in the audio, return raw speech. You may check the recognition result and try other destination text."
149 | srt_clip = ''
150 | return clip_video_file, message, clip_srt
151 |
152 |
153 | def get_parser():
154 | parser = ArgumentParser(
155 | description="ClipVideo Argument",
156 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
157 | )
158 | parser.add_argument(
159 | "--stage",
160 | type=int,
161 | choices=(1, 2),
162 | help="Stage, 0 for recognizing and 1 for clipping",
163 | required=True
164 | )
165 | parser.add_argument(
166 | "--file",
167 | type=str,
168 | default=None,
169 | help="Input file path",
170 | required=True
171 | )
172 | parser.add_argument(
173 | "--output_dir",
174 | type=str,
175 | default='./output/mp4',
176 | help="Output files path",
177 | )
178 | parser.add_argument(
179 | "--dest_text",
180 | type=str,
181 | default=None,
182 | help="Destination text string for clipping",
183 | )
184 | parser.add_argument(
185 | "--start_ost",
186 | type=int,
187 | default=0,
188 | help="Offset time in ms at beginning for clipping"
189 | )
190 | parser.add_argument(
191 | "--end_ost",
192 | type=int,
193 | default=0,
194 | help="Offset time in ms at ending for clipping"
195 | )
196 | parser.add_argument(
197 | "--output_file",
198 | type=str,
199 | default=None,
200 | help="Output file path"
201 | )
202 | return parser
203 |
204 |
205 | def runner(stage, file, output_dir, dest_text, start_ost, end_ost, output_file, config=None):
206 | audio_suffixs = ['wav']
207 | video_suffixs = ['mp4']
208 | if file[-3:] in audio_suffixs:
209 | mode = 'audio'
210 | elif file[-3:] in video_suffixs:
211 | mode = 'video'
212 | else:
213 | logging.error("Unsupported file format: {}".format(file))
214 | while output_dir.endswith('/'):
215 | output_dir = output_dir[:-1]
216 | if stage == 1:
217 | from modelscope.pipelines import pipeline
218 | from modelscope.utils.constant import Tasks
219 | # initialize modelscope asr pipeline
220 | logging.warning("Initializing modelscope asr pipeline.")
221 | inference_pipeline = pipeline(
222 | task=Tasks.auto_speech_recognition,
223 | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
224 | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',
225 | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
226 | output_dir=output_dir,
227 | )
228 | audio_clipper = VideoClipper(inference_pipeline)
229 | if mode == 'audio':
230 | logging.warning("Recognizing audio file: {}".format(file))
231 | wav, sr = librosa.load(file, sr=16000)
232 | res_text, res_srt, state = audio_clipper.recog((sr, wav))
233 | if mode == 'video':
234 | logging.warning("Recognizing video file: {}".format(file))
235 | res_text, res_srt, state = audio_clipper.video_recog(file)
236 | total_srt_file = output_dir + '/total.srt'
237 | with open(total_srt_file, 'w') as fout:
238 | fout.write(res_srt)
239 | logging.warning("Write total subtitile to {}".format(total_srt_file))
240 | write_state(output_dir, state)
241 | logging.warning("Recognition successed. You can copy the text segment from below and use stage 2.")
242 | print(res_text)
243 | if stage == 2:
244 | audio_clipper = VideoClipper(None)
245 | if mode == 'audio':
246 | state = load_state(output_dir)
247 | wav, sr = librosa.load(file, sr=16000)
248 | state['audio_input'] = (sr, wav)
249 | (sr, audio), message, srt_clip = audio_clipper.clip(dest_text, start_ost, end_ost, state)
250 | if output_file is None:
251 | output_file = output_dir + '/result.wav'
252 | clip_srt_file = output_file[:-3] + 'srt'
253 | logging.warning(message)
254 | sf.write(output_file, audio, 16000)
255 | assert output_file.endswith('.wav'), "output_file must ends with '.wav'"
256 | logging.warning("Save clipped wav file to {}".format(output_file))
257 | with open(clip_srt_file, 'w') as fout:
258 | fout.write(srt_clip)
259 | logging.warning("Write clipped subtitile to {}".format(clip_srt_file))
260 | if mode == 'video':
261 | state = load_state(output_dir)
262 | state['vedio_filename'] = file
263 | if output_file is None:
264 | state['clip_video_file'] = file[:-4] + '_clip.mp4'
265 | else:
266 | state['clip_video_file'] = output_file
267 | clip_srt_file = state['clip_video_file'][:-3] + 'srt'
268 | state['video'] = mpy.VideoFileClip(file)
269 | clip_video_file, message, srt_clip = audio_clipper.video_clip(dest_text, start_ost, end_ost, state)
270 | logging.warning("Clipping Log: {}".format(message))
271 | logging.warning("Save clipped mp4 file to {}".format(clip_video_file))
272 | with open(clip_srt_file, 'w') as fout:
273 | fout.write(srt_clip)
274 | logging.warning("Write clipped subtitile to {}".format(clip_srt_file))
275 |
276 |
277 | def main(cmd=None):
278 | print(get_commandline_args(), file=sys.stderr)
279 | parser = get_parser()
280 | args = parser.parse_args(cmd)
281 | kwargs = vars(args)
282 | runner(**kwargs)
283 |
284 |
285 | if __name__ == '__main__':
286 | main()
--------------------------------------------------------------------------------
/ClipVideo/docs/images/show.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/ClipVideo/docs/images/show.png
--------------------------------------------------------------------------------
/ClipVideo/requirments.txt:
--------------------------------------------------------------------------------
1 | librosa
2 | soundfile
3 | funasr>=0.5.5
4 | moviepy
5 | numpy
6 | gradio
--------------------------------------------------------------------------------
/FunASR-APP-LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Alibaba
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/FunASR-APP-README.md:
--------------------------------------------------------------------------------
1 | # FunASR-APP
2 |
3 | FunASR-APP is a comprehensive speech application toolkit designed to facilitate the application and integration of [FunASR](https://github.com/alibaba-damo-academy/FunASR)'s open-source speech models. Its primary goal is to package the models into convenient application packages, enabling easy application and seamless integration.
4 |
5 | ## ClipVideo
6 |
7 | As the first application toolkit of FunASR-APP, ClipVideo enables users to clip ```.mp4``` video files or ```.wav``` audio files with chosen text segments out of the recognition results generated by [Paraformer-long model](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary).
8 |
9 | Under the help of ClipVideo you can get the video clips easily with the following steps (in Gradio service):
10 | - Step1: Upload your video file (or try the example videos below)
11 | - Step2: Copy the text segments you need to 'Text to Clip'
12 | - Step3: Adjust subtitle settings (if needed)
13 | - Step4: Click 'Clip' or 'Clip and Generate Subtitles'
14 |
15 | ### Usage
16 | ```shell
17 | git clone https://github.com/alibaba-damo-academy/FunASR-APP.git
18 | cd FunASR-APP
19 | # install modelscope
20 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
21 | # python environments
22 | pip install -r ClipVideo/requirments.txt
23 | ```
24 | (Optional) If you want to clip video file with embedded subtitles
25 |
26 | 1. ffmpeg and imagemagick is required
27 |
28 | - On Ubuntu
29 | ```shell
30 | apt-get -y update && apt-get -y install ffmpeg imagemagick
31 | sed -i 's/none/read,write/g' /etc/ImageMagick-6/policy.xml
32 | ```
33 | - On MacOS
34 | ```shell
35 | brew install imagemagick
36 | sed -i 's/none/read,write/g' /usr/local/Cellar/imagemagick/7.1.1-8_1/etc/ImageMagick-7/policy.xml
37 | ```
38 | 2. Download font file to ClipVideo/font
39 |
40 | ```shell
41 | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/STHeitiMedium.ttc -O ClipVideo/font/STHeitiMedium.ttc
42 | ```
43 |
44 | #### Experience ClipVideo in Modelscope
45 | You can try ClipVideo in modelscope space: [link](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary).
46 |
47 | #### Use ClipVideo as Gradio Service
48 | You can establish your own ClipVideo service which is same as [Modelscope Space](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary) as follow:
49 | ```shell
50 | python ClipVideo/clipvideo/gradio_service.py
51 | ```
52 | then visit ```localhost:7860``` you will get a Gradio service like below and you can use ClipVideo following the steps:
53 |
54 |
55 | #### Use ClipVideo in command line
56 | ClipVideo supports you to recognize and clip with commands:
57 | ```shell
58 | # working in ClipVideo/
59 | # step1: Recognize
60 | python clipvideo/videoclipper.py --stage 1 \
61 | --file examples/2022云栖大会_片段.mp4 \
62 | --output_dir ./output
63 | # now you can find recognition results and entire SRT file in ./output/
64 | # step2: Clip
65 | python clipvideo/videoclipper.py --stage 2 \
66 | --file examples/2022云栖大会_片段.mp4 \
67 | --output_dir ./output \
68 | --dest_text '我们把它跟乡村振兴去结合起来,利用我们的设计的能力' \
69 | --start_ost 0 \
70 | --end_ost 100 \
71 | --output_file './output/res.mp4'
72 | ```
73 |
74 | ### Study Speech Related Models in FunASR
75 |
76 | [FunASR](https://github.com/alibaba-damo-academy/FunASR) hopes to build a bridge between academic research and industrial applications on speech recognition. By supporting the training & finetuning of the industrial-grade speech recognition model released on ModelScope, researchers and developers can conduct research and production of speech recognition models more conveniently, and promote the development of speech recognition ecology. ASR for Fun!
77 |
78 | 📚FunASR Paper:
79 | 🌟Support FunASR:
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Fatfish588
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dataset_Generator_For_VITS
2 | 基于达摩院视频切割技术的视频转换为短音频的vits数据集生成工具
3 | A VITS Dataset Generation Tool for Converting Video to Short Audio Based on Damo Academy Video Cutting Technology
4 |
5 | # 介绍
6 | >在现在的vits语音模型训练的步骤中,标注、清洗和训练框架都有不少很棒的项目可供学习和使用,但是在数据集的准备这一项好像还没有多少人去探究。手动准备动辄几百成千上万条短音频的数据集,对于一些只想听听自己喜欢的主播、恋人等声音的vits普通爱好者来说是基本不现实的。此前(2023.10)唯一开源且能用的音视频切片机还是VITS-fast-fine-tuning快速微调中内嵌的一套基于whisper的切片机,因为耦合度较大,导致也只能在快速微调自己的项目中使用,而无法适配其他如bert-vits2等项目。
7 |
8 | >综上所诉,我尝试着基于阿里达摩院FunASR-APP的ClipVideo作简单修改,制作了这个小工具,希望能补上国内vits训练圈子的一块拼图,您只需要一个人物或者角色的音频与视频,不管多长无论多大,有一点背景音乐也无妨,就算音视频混在一起放入指定目录都没问题,然后点击运行,就可以获得一系列1~10秒的短音频,这些音频完全可以直接送入其他用于标注的项目进行下一步准备。
9 |
10 | 1、本项目基于阿里达摩院FunASR-APP的ClipVideo简单修改制作,其原理是使用ClipVideo通过文字去一个视频中裁剪出对应的那句话的音视频(美妙的技术),并且可以指定音频的开始和结尾偏移量,在中文方面比whisper效果更好,不会出现尾音最后一个音只有一半的情况。
11 | 2、本项目的输出都在根目录下ClipVideo/output/中,包含了切割后的音视频和每个视频对应的字幕文件,需要的话可以使用。
12 | 3、本项目比较简单,所以从输入视频到输出音频数据集是连续的,如果有参数和自己想要的不一样可以自行修改,例如要修改第一条中的偏移量,可在Dataset_generator.py的前半部分修改,默认是不偏移直接截取。
13 |
14 | 下图是效果展示,使用GPU加速的情况下2两分钟生成600句短音频.
15 | 
16 |
17 | # 更新日志
18 | 2023/10/12
19 | 1、现在可以把音频(.wav)和视频(.mp4)一起混合放进video_files中直接运行了!生成器会安排好所有事!
20 | 2、增加了demucs人声分离,现在可以接受浅bgm的视频与音频进行处理,哦对,不可以用在唱中文的背景音乐视频,因为歌词也会被识别并切片。
21 | 3、增加了初始化输目录的功能,生成器会先把除了video_files以外的目录清空再运行,你只需要开始运行就好了,一切交给生成器!
22 |
23 | # 教程
24 | 1、克隆此仓库(python版本3.10,3.8往后应该都行)
25 |
26 | ```bash
27 | git clone https://github.com/Fatfish588/Dataset_Denerator_For_VITS.git
28 | ```
29 |
30 | 创建所需要的目录,windows直接新建文件夹就好了
31 | ```bash
32 | mkdir ClipVideo/font
33 | mkdir ClipVideo/clipvideo/video_files
34 | mkdir ClipVideo/clipvideo/output
35 | mkdir ClipVideo/clipvideo/input
36 | mkdir ClipVideo/clipvideo/input/mdx_extra
37 | mkdir ClipVideo/clipvideo/org_wav
38 | mkdir ClipVideo/clipvideo/output/mp4
39 | mkdir ClipVideo/clipvideo/output/srt
40 | mkdir ClipVideo/clipvideo/output/wav
41 | ```
42 | 此时,这部分结构应该如下图,以下操作都处于Dataset_Generator_For_VITS根目录下
43 | 
44 |
45 |
46 |
47 | 2、安装环境依赖
48 | ```bash
49 | # install modelscope
50 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
51 | # python environments
52 | pip install -r ClipVideo/requirments.txt
53 | pip install torchaudio
54 | pip install demucs~=4.0.0
55 | pip install umap
56 | pip install hdbscan
57 | # 下载字体(给webUI的字幕镶嵌用的)
58 | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ClipVideo/STHeitiMedium.ttc -O ClipVideo/font/STHeitiMedium.ttc
59 | ```
60 | 如果想用GPU加速(尤其是降噪)需要对应的torch和cuda,因为大家的显卡各不相同,这里只提供一个我的作为参考:RTX 4090 + cuda11.7
61 | ```bash
62 | pip install torch==2.0.1+cu117 torchaudio==2.0.2+cu117 torchvision==0.15.2+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
63 | ```
64 | 3、启动一次webUI,这一步是为了让FunASR-APP自动下载视频转文字的相关模型,此步骤下载模型比较耗时,进度条卡住不动是正常情况,稍等就好,模型来自阿里的服务器,可能需要关掉魔法。
65 |
66 | ```bash
67 | python ClipVideo/clipvideo/gradio_service.py
68 | ```
69 | 成功打开webUI则说明FunASR-APP的依赖准备完成了
70 |
71 | 4、将要处理的音频或视频全部放入video_files目录下,支持多个视频音频混合处理,只要确保都是一个人的声音就好。
72 | 
73 |
74 | 5、开始运行
75 |
76 | ```bash
77 | python ClipVideo/clipvideo/Dataset_generator.py
78 | ```
79 | 6、运行完成后,结果保存在ClipVideo/clipvideo/output/wav目录下
80 | 
81 |
82 |
83 | # 后续计划
84 | 1、添加降噪模型,将支持带背景音乐的视频输入。(已完成)
85 | 2、一键恢复初始化状态,免得每次都要手动清空。(已完成)
86 | 3、优化代码,目前是步骤太多太繁杂了。
87 | 4、从视频一键生成到音频数据集和标注训练集(大概率鸽了)
88 | # 碎碎念
89 | 1、本项目目前只支持中文的,背景音乐不大或者是纯音乐的音频与视频,例如有声书、教程类的视频、科普类视频、虚拟主播的聊天回(天呐她们能和弹幕聊整整6个小时)等等。
90 | 2、本项目只是将长视频生成几秒钟的短音频数据集,减少了手动切片的时间,并不带标注、重采样、生成训练集的功能。
91 | 3、本项目生成的文件名字是Paraformer模型听写出来的,只是用作区分文件而已,并不是百分百准确,不推荐直接拿文件名去当训练集。
92 | 4、请确保ClipVideo/clipvideo/目录下的video_files、output/mp4、output/srt、output/wav这4个目录存在,生成器现在会在每次点击运行时去清空它们,但是需要在第一次运行前先创建好它们,在这之后就不用管它们了。
93 | 5、代码超简单的,每个方法都有备注,有些功能不需要比如降噪部分可以自己修改。
94 | 6、关注永雏塔菲喵,关注永雏塔菲谢谢喵。
95 | # 相关链接:
96 | [Paraformer视频自动切片与字幕(创空间)———阿里达摩院](https://modelscope.cn/studios/damo/funasr_app_clipvideo/summary)
97 | [FunASR-APP(GitHub)](https://github.com/alibaba-damo-academy/FunASR-APP)
98 |
--------------------------------------------------------------------------------
/TransAudio/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Fatfish588/Dataset_Generator_For_VITS/2f012ac453a99723c29ffbf119b4ff545492ebb7/TransAudio/README.md
--------------------------------------------------------------------------------
/TransAudio/funasr-runtime-deploy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | scriptVersion="0.0.3"
4 | scriptDate="20230629"
5 |
6 | clear
7 |
8 |
9 | # Set color
10 | RED="\033[31;1m"
11 | GREEN="\033[32;1m"
12 | YELLOW="\033[33;1m"
13 | BLUE="\033[34;1m"
14 | CYAN="\033[36;1m"
15 | PLAIN="\033[0m"
16 |
17 | # Info messages
18 | ERROR="${RED}[ERROR]${PLAIN}"
19 | WARNING="${YELLOW}[WARNING]${PLAIN}"
20 |
21 | # Font Format
22 | BOLD="\033[1m"
23 | UNDERLINE="\033[4m"
24 |
25 | # Current folder
26 | cur_dir=`pwd`
27 |
28 |
29 | checkConfigFileAndTouch(){
30 | mkdir -p /var/funasr
31 | if [ ! -f $FUNASR_CONFIG_FILE ]; then
32 | touch $FUNASR_CONFIG_FILE
33 | fi
34 | }
35 |
36 | SAMPLE_CLIENTS=( \
37 | "Python" \
38 | "Linux_Cpp" \
39 | )
40 | ASR_MODELS=( \
41 | "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx" \
42 | "model_name" \
43 | "model_path" \
44 | )
45 | VAD_MODELS=( \
46 | "damo/speech_fsmn_vad_zh-cn-16k-common-onnx" \
47 | "model_name" \
48 | "model_path" \
49 | )
50 | PUNC_MODELS=( \
51 | "damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx" \
52 | "model_name" \
53 | "model_path" \
54 | )
55 | DOCKER_IMAGES=( \
56 | "registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest" \
57 | "registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.1.0" \
58 | )
59 | menuSelection(){
60 | local menu
61 | menu=($(echo "$@"))
62 | result=1
63 | show_no=1
64 | menu_no=0
65 | len=${#menu[@]}
66 |
67 | while true
68 | do
69 | echo -e " ${BOLD}${show_no})${PLAIN} ${menu[menu_no]}"
70 |
71 | let show_no++
72 | let menu_no++
73 | if [ ${menu_no} -ge ${len} ]; then
74 | break
75 | fi
76 | done
77 |
78 | while true
79 | do
80 | read -p " Enter your choice: " result
81 |
82 | expr ${result} + 0 &>/dev/null
83 | if [ $? -eq 0 ]; then
84 | if [ ${result} -ge 1 ] && [ ${result} -le ${len} ]; then
85 | break
86 | else
87 | echo -e " ${RED}Input error, please input correct number!${PLAIN}"
88 | fi
89 | else
90 | echo -e " ${RED}Input error, please input correct number!${PLAIN}"
91 | fi
92 | done
93 |
94 | return $result
95 | }
96 |
97 | DrawProgress(){
98 | model=$1
99 | title=$2
100 | percent_str=$3
101 | speed=$4
102 | revision=$5
103 | latest_percent=$6
104 |
105 | progress=0
106 | if [ ! -z "$percent_str" ]; then
107 | progress=`expr $percent_str + 0`
108 | latest_percent=`expr $latest_percent + 0`
109 | if [ $progress -ne 0 ] && [ $progress -lt $latest_percent ]; then
110 | progress=$latest_percent
111 | fi
112 | fi
113 |
114 | LOADING_FLAG="Loading"
115 | if [ "$title" = "$LOADING_FLAG" ]; then
116 | progress=100
117 | fi
118 |
119 | i=0
120 | str=""
121 | let max=progress/2
122 | while [ $i -lt $max ]
123 | do
124 | let i++
125 | str+='='
126 | done
127 | let color=36
128 | let index=max*2
129 | if [ -z "$speed" ]; then
130 | printf "\r \e[0;$color;1m[%s][%-11s][%-50s][%d%%][%s]\e[0m" "$model" "$title" "$str" "$$index" "$revision"
131 | else
132 | printf "\r \e[0;$color;1m[%s][%-11s][%-50s][%3d%%][%8s][%s]\e[0m" "$model" "$title" "$str" "$index" "$speed" "$revision"
133 | fi
134 | printf "\n"
135 |
136 | return $progress
137 | }
138 |
139 | PROGRESS_TXT="/var/funasr/progress.txt"
140 | ASR_PERCENT_INT=0
141 | VAD_PERCENT_INT=0
142 | PUNC_PERCENT_INT=0
143 | ASR_TITLE="Downloading"
144 | ASR_PERCENT="0"
145 | ASR_SPEED="0KB/s"
146 | ASR_REVISION=""
147 | VAD_TITLE="Downloading"
148 | VAD_PERCENT="0"
149 | VAD_SPEED="0KB/s"
150 | VAD_REVISION=""
151 | PUNC_TITLE="Downloading"
152 | PUNC_PERCENT="0"
153 | PUNC_SPEED="0KB/s"
154 | PUNC_REVISION=""
155 | ServerProgress(){
156 | status_flag="STATUS:"
157 | stage=0
158 | wait=0
159 | server_status=""
160 |
161 | while true
162 | do
163 | if [ -f "$PROGRESS_TXT" ]; then
164 | break
165 | else
166 | sleep 1
167 | let wait=wait+1
168 | if [ ${wait} -ge 10 ]; then
169 | break
170 | fi
171 | fi
172 | done
173 |
174 | if [ ! -f "$PROGRESS_TXT" ]; then
175 | echo -e " ${RED}The note of progress does not exist.(${PROGRESS_TXT}) ${PLAIN}"
176 | return 98
177 | fi
178 |
179 | stage=1
180 | while read line
181 | do
182 | if [ $stage -eq 1 ]; then
183 | result=$(echo $line | grep "STATUS:")
184 | if [[ "$result" != "" ]]
185 | then
186 | stage=2
187 | server_status=${line#*:}
188 | status=`expr $server_status + 0`
189 | if [ $status -eq 99 ]; then
190 | stage=99
191 | fi
192 | continue
193 | fi
194 | elif [ $stage -eq 2 ]; then
195 | result=$(echo $line | grep "ASR")
196 | if [[ "$result" != "" ]]
197 | then
198 | stage=3
199 | continue
200 | fi
201 | elif [ $stage -eq 3 ]; then
202 | result=$(echo $line | grep "VAD")
203 | if [[ "$result" != "" ]]
204 | then
205 | stage=4
206 | continue
207 | fi
208 | result=$(echo $line | grep "title:")
209 | if [[ "$result" != "" ]]
210 | then
211 | ASR_TITLE=${line#*:}
212 | continue
213 | fi
214 | result=$(echo $line | grep "percent:")
215 | if [[ "$result" != "" ]]
216 | then
217 | ASR_PERCENT=${line#*:}
218 | continue
219 | fi
220 | result=$(echo $line | grep "speed:")
221 | if [[ "$result" != "" ]]
222 | then
223 | ASR_SPEED=${line#*:}
224 | continue
225 | fi
226 | result=$(echo $line | grep "revision:")
227 | if [[ "$result" != "" ]]
228 | then
229 | ASR_REVISION=${line#*:}
230 | continue
231 | fi
232 | elif [ $stage -eq 4 ]; then
233 | result=$(echo $line | grep "PUNC")
234 | if [[ "$result" != "" ]]
235 | then
236 | stage=5
237 | continue
238 | fi
239 | result=$(echo $line | grep "title:")
240 | if [[ "$result" != "" ]]
241 | then
242 | VAD_TITLE=${line#*:}
243 | continue
244 | fi
245 | result=$(echo $line | grep "percent:")
246 | if [[ "$result" != "" ]]
247 | then
248 | VAD_PERCENT=${line#*:}
249 | continue
250 | fi
251 | result=$(echo $line | grep "speed:")
252 | if [[ "$result" != "" ]]
253 | then
254 | VAD_SPEED=${line#*:}
255 | continue
256 | fi
257 | result=$(echo $line | grep "revision:")
258 | if [[ "$result" != "" ]]
259 | then
260 | VAD_REVISION=${line#*:}
261 | continue
262 | fi
263 | elif [ $stage -eq 5 ]; then
264 | result=$(echo $line | grep "DONE")
265 | if [[ "$result" != "" ]]
266 | then
267 | # Done and break.
268 | stage=6
269 | break
270 | fi
271 | result=$(echo $line | grep "title:")
272 | if [[ "$result" != "" ]]
273 | then
274 | PUNC_TITLE=${line#*:}
275 | continue
276 | fi
277 | result=$(echo $line | grep "percent:")
278 | if [[ "$result" != "" ]]
279 | then
280 | PUNC_PERCENT=${line#*:}
281 | continue
282 | fi
283 | result=$(echo $line | grep "speed:")
284 | if [[ "$result" != "" ]]
285 | then
286 | PUNC_SPEED=${line#*:}
287 | continue
288 | fi
289 | result=$(echo $line | grep "revision:")
290 | if [[ "$result" != "" ]]
291 | then
292 | PUNC_REVISION=${line#*:}
293 | continue
294 | fi
295 | elif [ $stage -eq 99 ]; then
296 | echo -e " ${RED}ERROR: $line${PLAIN}"
297 | fi
298 | done < $PROGRESS_TXT
299 |
300 | if [ $stage -ne 99 ]; then
301 | DrawProgress "ASR " $ASR_TITLE $ASR_PERCENT $ASR_SPEED $ASR_REVISION $ASR_PERCENT_INT
302 | ASR_PERCENT_INT=$?
303 | DrawProgress "VAD " $VAD_TITLE $VAD_PERCENT $VAD_SPEED $VAD_REVISION $VAD_PERCENT_INT
304 | VAD_PERCENT_INT=$?
305 | DrawProgress "PUNC" $PUNC_TITLE $PUNC_PERCENT $PUNC_SPEED $PUNC_REVISION $PUNC_PERCENT_INT
306 | PUNC_PERCENT_INT=$?
307 | fi
308 |
309 | return $stage
310 | }
311 |
312 | # Make sure root user
313 | rootNess(){
314 | echo -e "${UNDERLINE}${BOLD}[0/9]${PLAIN}"
315 | echo -e " ${YELLOW}Please check root access.${PLAIN}"
316 | echo
317 |
318 | echo -e " ${WARNING} MUST RUN AS ${RED}ROOT${PLAIN} USER!"
319 | if [[ $EUID -ne 0 ]]; then
320 | echo -e " ${ERROR} MUST RUN AS ${RED}ROOT${PLAIN} USER!"
321 | fi
322 |
323 | checkConfigFileAndTouch
324 | cd ${cur_dir}
325 | echo
326 | }
327 |
328 | selectDockerImages(){
329 | echo -e "${UNDERLINE}${BOLD}[1/9]${PLAIN}"
330 | echo -e " ${YELLOW}Please choose the Docker image.${PLAIN}"
331 |
332 | menuSelection ${DOCKER_IMAGES[*]}
333 | result=$?
334 | index=`expr $result - 1`
335 |
336 | PARAMS_DOCKER_IMAGE=${DOCKER_IMAGES[${index}]}
337 | echo -e " ${UNDERLINE}You have chosen the Docker image:${PLAIN} ${GREEN}${PARAMS_DOCKER_IMAGE}${PLAIN}"
338 |
339 | checkDockerExist
340 | result=$?
341 | result=`expr $result + 0`
342 | if [ ${result} -eq 50 ]; then
343 | return 50
344 | fi
345 |
346 | echo
347 | }
348 |
349 | setupModelType(){
350 | echo -e "${UNDERLINE}${BOLD}[2/9]${PLAIN}"
351 | echo -e " ${YELLOW}Please input [Y/n] to confirm whether to automatically download model_id in ModelScope or use a local model.${PLAIN}"
352 | echo -e " [y] With the model in ModelScope, the model will be automatically downloaded to Docker(${CYAN}/workspace/models${PLAIN})."
353 | echo -e " If you select both the local model and the model in ModelScope, select [y]."
354 | echo " [n] Use the models on the localhost, the directory where the model is located will be mapped to Docker."
355 |
356 | while true
357 | do
358 | read -p " Setting confirmation[Y/n]: " model_id_flag
359 |
360 | if [ -z "$model_id_flag" ]; then
361 | model_id_flag="y"
362 | fi
363 | YES="Y"
364 | yes="y"
365 | NO="N"
366 | no="n"
367 | if [ "$model_id_flag" = "$YES" ] || [ "$model_id_flag" = "$yes" ]; then
368 | # please set model_id later.
369 | PARAMS_DOWNLOAD_MODEL_DIR="/workspace/models"
370 | echo -e " ${UNDERLINE}You have chosen to use the model in ModelScope, please set the model ID in the next steps, and the model will be automatically downloaded in (${PARAMS_DOWNLOAD_MODEL_DIR}) during the run.${PLAIN}"
371 |
372 | params_local_models_dir=`sed '/^PARAMS_LOCAL_MODELS_DIR=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
373 | if [ -z "$params_local_models_dir" ]; then
374 | params_local_models_dir="${cur_dir}/models"
375 | mkdir -p ${params_local_models_dir}
376 | fi
377 | while true
378 | do
379 | echo
380 | echo -e " ${YELLOW}Please enter the local path to download models, the corresponding path in Docker is ${PARAMS_DOWNLOAD_MODEL_DIR}.${PLAIN}"
381 | read -p " Setting the local path to download models, default(${params_local_models_dir}): " PARAMS_LOCAL_MODELS_DIR
382 | if [ -z "$PARAMS_LOCAL_MODELS_DIR" ]; then
383 | if [ -z "$params_local_models_dir" ]; then
384 | echo -e " ${RED}The local path set is empty, please setup again.${PLAIN}"
385 | continue
386 | else
387 | PARAMS_LOCAL_MODELS_DIR=$params_local_models_dir
388 | fi
389 | fi
390 | if [ ! -d "$PARAMS_LOCAL_MODELS_DIR" ]; then
391 | echo -e " ${RED}The local model path(${PARAMS_LOCAL_MODELS_DIR}) set does not exist, please setup again.${PLAIN}"
392 | else
393 | echo -e " The local path(${GREEN}${PARAMS_LOCAL_MODELS_DIR}${PLAIN}) set will store models during the run."
394 | break
395 | fi
396 | done
397 |
398 | break
399 | elif [ "$model_id_flag" = "$NO" ] || [ "$model_id_flag" = "$no" ]; then
400 | # download_model_dir is empty, will use models in localhost.
401 | PARAMS_DOWNLOAD_MODEL_DIR=""
402 | PARAMS_LOCAL_MODELS_DIR=""
403 | echo -e " ${UNDERLINE}You have chosen to use models from the localhost, set the path to each model in the localhost in the next steps.${PLAIN}"
404 | echo
405 | break
406 | fi
407 | done
408 |
409 | echo
410 | }
411 |
412 | # Set asr model for FunASR server
413 | setupAsrModelId(){
414 | echo -e " ${UNDERLINE}${BOLD}[2.1/9]${PLAIN}"
415 |
416 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then
417 | # download_model_dir is empty, will use models in localhost.
418 | params_local_asr_path=`sed '/^PARAMS_LOCAL_ASR_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
419 | if [ -z "$params_local_asr_path" ]; then
420 | PARAMS_LOCAL_ASR_PATH=""
421 | else
422 | PARAMS_LOCAL_ASR_PATH=${params_local_asr_path}
423 | fi
424 |
425 | echo -e " ${YELLOW}Please input ASR model path in local for FunASR server.${PLAIN}"
426 | echo -e " Default: ${CYAN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}"
427 |
428 | while true
429 | do
430 | read -p " Setting ASR model path in localhost: " PARAMS_LOCAL_ASR_PATH
431 |
432 | if [ -z "$PARAMS_LOCAL_ASR_PATH" ]; then
433 | PARAMS_LOCAL_ASR_PATH=${params_local_asr_path}
434 | fi
435 | if [ -z "$PARAMS_LOCAL_ASR_PATH" ]; then
436 | # use default asr model in Docker
437 | PARAMS_LOCAL_ASR_DIR=""
438 | PARAMS_DOCKER_ASR_DIR=""
439 | PARAMS_DOCKER_ASR_PATH="/workspace/models/asr"
440 | echo -e " ${RED}Donnot set the local ASR model path, will use ASR model(${CYAN}/workspace/models/asr${PLAIN}${RED}) in Docker.${PLAIN}"
441 |
442 | echo -e " ${UNDERLINE}You have chosen the default model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}"
443 | echo -e " ${UNDERLINE}The defalut model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}"
444 | break
445 | else
446 | if [ ! -d "$PARAMS_LOCAL_ASR_PATH" ]; then
447 | echo -e " ${RED}The ASR model path set does not exist, please setup again.${PLAIN}"
448 | else
449 | # use asr model in localhost
450 | PARAMS_LOCAL_ASR_DIR=$(dirname "$PARAMS_LOCAL_ASR_PATH")
451 | asr_name=$(basename "$PARAMS_LOCAL_ASR_PATH")
452 | PARAMS_DOCKER_ASR_DIR="/workspace/user_asr"
453 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${asr_name}
454 |
455 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}"
456 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}"
457 | break
458 | fi
459 | fi
460 | done
461 |
462 | PARAMS_ASR_ID=""
463 | else
464 | # please set model_id later.
465 | echo -e " ${YELLOW}Please select ASR model_id in ModelScope from the list below.${PLAIN}"
466 |
467 | menuSelection ${ASR_MODELS[*]}
468 | result=$?
469 | index=`expr $result - 1`
470 | PARAMS_ASR_ID=${ASR_MODELS[${index}]}
471 |
472 | OTHERS="model_name"
473 | LOCAL_MODEL="model_path"
474 | if [ "$PARAMS_ASR_ID" = "$OTHERS" ]; then
475 | params_asr_id=`sed '/^PARAMS_ASR_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
476 | if [ -z "$params_asr_id" ]; then
477 | PARAMS_ASR_ID=""
478 | else
479 | PARAMS_ASR_ID=${params_asr_id}
480 | fi
481 |
482 | echo -e " Default: ${CYAN}${PARAMS_ASR_ID}${PLAIN}"
483 |
484 | while true
485 | do
486 | read -p " Setting ASR model_id in ModelScope: " PARAMS_ASR_ID
487 |
488 | PARAMS_LOCAL_ASR_DIR=""
489 | PARAMS_LOCAL_ASR_PATH=""
490 | PARAMS_DOCKER_ASR_DIR=""
491 | if [ -z "$PARAMS_ASR_ID" ]; then
492 | echo -e " ${RED}The ASR model ID is empty, please setup again.${PLAIN}"
493 | else
494 | break
495 | fi
496 | done
497 | elif [ "$PARAMS_ASR_ID" = "$LOCAL_MODEL" ]; then
498 | PARAMS_ASR_ID=""
499 | echo -e " Please input ASR model path in local for FunASR server."
500 |
501 | while true
502 | do
503 | read -p " Setting ASR model path in localhost: " PARAMS_LOCAL_ASR_PATH
504 | if [ -z "$PARAMS_LOCAL_ASR_PATH" ]; then
505 | # use default asr model in Docker
506 | echo -e " ${RED}Please do not set an empty path in localhost.${PLAIN}"
507 | else
508 | if [ ! -d "$PARAMS_LOCAL_ASR_PATH" ]; then
509 | echo -e " ${RED}The ASR model path set does not exist, please setup again.${PLAIN}"
510 | else
511 | # use asr model in localhost
512 | PARAMS_LOCAL_ASR_DIR=$(dirname "$PARAMS_LOCAL_ASR_PATH")
513 | asr_name=$(basename "$PARAMS_LOCAL_ASR_PATH")
514 | PARAMS_DOCKER_ASR_DIR="/workspace/user_asr"
515 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${asr_name}
516 |
517 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}"
518 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}"
519 | echo
520 | return 0
521 | fi
522 | fi
523 | done
524 | fi
525 |
526 | PARAMS_DOCKER_ASR_DIR=$PARAMS_DOWNLOAD_MODEL_DIR
527 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${PARAMS_ASR_ID}
528 |
529 | echo -e " ${UNDERLINE}The model ID is${PLAIN} ${GREEN}${PARAMS_ASR_ID}${PLAIN}"
530 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}"
531 | fi
532 |
533 | echo
534 | }
535 |
536 | # Set vad model for FunASR server
537 | setupVadModelId(){
538 | echo -e " ${UNDERLINE}${BOLD}[2.2/9]${PLAIN}"
539 |
540 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then
541 | # download_model_dir is empty, will use models in localhost.
542 | params_local_vad_path=`sed '/^PARAMS_LOCAL_VAD_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
543 | if [ -z "$params_local_vad_path" ]; then
544 | PARAMS_LOCAL_VAD_PATH=""
545 | else
546 | PARAMS_LOCAL_VAD_PATH=${params_local_vad_path}
547 | fi
548 |
549 | echo -e " ${YELLOW}Please input VAD model path in local for FunASR server.${PLAIN}"
550 | echo -e " Default: ${CYAN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}"
551 |
552 | while true
553 | do
554 | read -p " Setting VAD model path in localhost: " PARAMS_LOCAL_VAD_PATH
555 |
556 | if [ -z "$PARAMS_LOCAL_VAD_PATH" ]; then
557 | PARAMS_LOCAL_VAD_PATH=${params_local_vad_path}
558 | fi
559 | if [ -z "$PARAMS_LOCAL_VAD_PATH" ]; then
560 | # use default vad model in Docker
561 | PARAMS_LOCAL_VAD_DIR=""
562 | PARAMS_DOCKER_VAD_DIR=""
563 | PARAMS_DOCKER_VAD_PATH="/workspace/models/vad"
564 | echo -e " ${RED}Donnot set the local VAD model path, will use VAD model(${CYAN}/workspace/models/vad${PLAIN}${RED}) in Docker.${PLAIN}"
565 |
566 | echo -e " ${UNDERLINE}You have chosen the default model dir in localhost${PLAIN}: ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}"
567 | echo -e " ${UNDERLINE}The defalut model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}"
568 | break
569 | else
570 | if [ ! -d "$PARAMS_LOCAL_VAD_PATH" ]; then
571 | echo -e " ${RED}The VAD model path set does not exist, please setup again.${PLAIN}"
572 | else
573 | # use vad model in localhost
574 | PARAMS_LOCAL_VAD_DIR=$(dirname "$PARAMS_LOCAL_VAD_PATH")
575 | vad_name=$(basename "$PARAMS_LOCAL_VAD_PATH")
576 | PARAMS_DOCKER_VAD_DIR="/workspace/user_vad"
577 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${vad_name}
578 |
579 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}"
580 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}"
581 | break
582 | fi
583 | fi
584 | done
585 |
586 | PARAMS_VAD_ID=""
587 | else
588 | # please set model_id later.
589 | echo -e " ${YELLOW}Please select VAD model_id in ModelScope from the list below.${PLAIN}"
590 |
591 | menuSelection ${VAD_MODELS[*]}
592 | result=$?
593 | index=`expr $result - 1`
594 | PARAMS_VAD_ID=${VAD_MODELS[${index}]}
595 |
596 | OTHERS="model_name"
597 | LOCAL_MODEL="model_path"
598 | if [ "$PARAMS_VAD_ID" = "$OTHERS" ]; then
599 | params_vad_id=`sed '/^PARAMS_VAD_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
600 | if [ -z "$params_vad_id" ]; then
601 | PARAMS_VAD_ID=""
602 | else
603 | PARAMS_VAD_ID=${params_vad_id}
604 | fi
605 |
606 | echo -e " Default: ${CYAN}${PARAMS_VAD_ID}${PLAIN}"
607 |
608 | while true
609 | do
610 | read -p " Setting VAD model_id in ModelScope: " PARAMS_VAD_ID
611 |
612 | PARAMS_LOCAL_VAD_DIR=""
613 | PARAMS_LOCAL_VAD_PATH=""
614 | PARAMS_DOCKER_VAD_DIR=""
615 | if [ -z "$PARAMS_VAD_ID" ]; then
616 | echo -e " ${RED}The VAD model ID is empty, please setup again.${PLAIN}"
617 | else
618 | break
619 | fi
620 | done
621 | elif [ "$PARAMS_VAD_ID" = "$LOCAL_MODEL" ]; then
622 | PARAMS_VAD_ID=""
623 | echo -e " Please input VAD model path in local for FunASR server."
624 |
625 | while true
626 | do
627 | read -p " Setting VAD model path in localhost: " PARAMS_LOCAL_VAD_PATH
628 | if [ -z "$PARAMS_LOCAL_VAD_PATH" ]; then
629 | # use default vad model in Docker
630 | echo -e " ${RED}Please do not set an empty path in localhost.${PLAIN}"
631 | else
632 | if [ ! -d "$PARAMS_LOCAL_VAD_PATH" ]; then
633 | echo -e " ${RED}The VAD model path set does not exist, please setup again.${PLAIN}"
634 | else
635 | # use vad model in localhost
636 | PARAMS_LOCAL_VAD_DIR=$(dirname "$PARAMS_LOCAL_VAD_PATH")
637 | vad_name=$(basename "$PARAMS_LOCAL_VAD_PATH")
638 | PARAMS_DOCKER_VAD_DIR="/workspace/user_vad"
639 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${vad_name}
640 |
641 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}"
642 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}"
643 | echo
644 | return 0
645 | fi
646 | fi
647 | done
648 | fi
649 |
650 | PARAMS_DOCKER_VAD_DIR=$PARAMS_DOWNLOAD_MODEL_DIR
651 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${PARAMS_VAD_ID}
652 |
653 | echo -e " ${UNDERLINE}The model ID is${PLAIN} ${GREEN}${PARAMS_VAD_ID}${PLAIN}"
654 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}"
655 | fi
656 |
657 | echo
658 | }
659 |
660 | # Set punc model for FunASR server
661 | setupPuncModelId(){
662 | echo -e " ${UNDERLINE}${BOLD}[2.3/9]${PLAIN}"
663 |
664 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then
665 | # download_model_dir is empty, will use models in localhost.
666 | params_local_punc_path=`sed '/^PARAMS_LOCAL_PUNC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
667 | if [ -z "$params_local_punc_path" ]; then
668 | PARAMS_LOCAL_PUNC_PATH=""
669 | else
670 | PARAMS_LOCAL_PUNC_PATH=${params_local_punc_path}
671 | fi
672 |
673 | echo -e " ${YELLOW}Please input PUNC model path in local for FunASR server.${PLAIN}"
674 | echo -e " Default: ${CYAN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}"
675 |
676 | while true
677 | do
678 | read -p " Setting PUNC model path in localhost: " PARAMS_LOCAL_PUNC_PATH
679 |
680 | if [ -z "$PARAMS_LOCAL_PUNC_PATH" ]; then
681 | PARAMS_LOCAL_PUNC_PATH=${params_local_punc_path}
682 | fi
683 | if [ -z "$PARAMS_LOCAL_PUNC_PATH" ]; then
684 | # use default punc model in Docker
685 | PARAMS_LOCAL_PUNC_DIR=""
686 | PARAMS_DOCKER_PUNC_DIR=""
687 | PARAMS_DOCKER_PUNC_PATH="/workspace/models/punc"
688 | echo -e " ${RED}Donnot set the local PUNC model path, will use PUNC model(${CYAN}/workspace/models/punc${PLAIN}${RED}) in Docker.${PLAIN}"
689 |
690 | echo -e " ${UNDERLINE}You have chosen the default model dir in localhost: ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}"
691 | echo -e " ${UNDERLINE}The defalut model dir in Docker is ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}"
692 | break
693 | else
694 | if [ ! -d "$PARAMS_LOCAL_PUNC_PATH" ]; then
695 | echo -e " ${RED}The PUNC model path set does not exist, please setup again.${PLAIN}"
696 | else
697 | # use punc model in localhost
698 | PARAMS_LOCAL_PUNC_DIR=$(dirname "$PARAMS_LOCAL_PUNC_PATH")
699 | punc_name=$(basename "$PARAMS_LOCAL_PUNC_PATH")
700 | PARAMS_DOCKER_PUNC_DIR="/workspace/user_punc"
701 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${punc_name}
702 |
703 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}"
704 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}"
705 | break
706 | fi
707 | fi
708 | done
709 |
710 | PARAMS_PUNC_ID=""
711 | else
712 | # please set model_id later.
713 | echo -e " ${YELLOW}Please select PUNC model_id in ModelScope from the list below.${PLAIN}"
714 |
715 | menuSelection ${PUNC_MODELS[*]}
716 | result=$?
717 | index=`expr $result - 1`
718 | PARAMS_PUNC_ID=${PUNC_MODELS[${index}]}
719 |
720 | OTHERS="model_name"
721 | LOCAL_MODEL="model_path"
722 | if [ "$PARAMS_PUNC_ID" = "$OTHERS" ]; then
723 | params_punc_id=`sed '/^PARAMS_PUNC_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
724 | if [ -z "$params_punc_id" ]; then
725 | PARAMS_PUNC_ID=""
726 | else
727 | PARAMS_PUNC_ID=${params_punc_id}
728 | fi
729 |
730 | echo -e " Default: ${CYAN}${PARAMS_PUNC_ID}${PLAIN}"
731 |
732 | while true
733 | do
734 | read -p " Setting PUNC model_id in ModelScope: " PARAMS_PUNC_ID
735 |
736 | PARAMS_LOCAL_PUNC_DIR=""
737 | PARAMS_LOCAL_PUNC_PATH=""
738 | PARAMS_DOCKER_PUNC_DIR=""
739 | if [ -z "$PARAMS_PUNC_ID" ]; then
740 | echo -e " ${RED}The PUNC model ID is empty, please setup again.${PLAIN}"
741 | else
742 | break
743 | fi
744 | done
745 | elif [ "$PARAMS_PUNC_ID" = "$LOCAL_MODEL" ]; then
746 | PARAMS_PUNC_ID=""
747 | echo -e " Please input PUNC model path in local for FunASR server."
748 |
749 | while true
750 | do
751 | read -p " Setting PUNC model path in localhost: " PARAMS_LOCAL_PUNC_PATH
752 | if [ -z "$PARAMS_LOCAL_PUNC_PATH" ]; then
753 | # use default punc model in Docker
754 | echo -e " ${RED}Please do not set an empty path in localhost.${PLAIN}"
755 | else
756 | if [ ! -d "$PARAMS_LOCAL_PUNC_PATH" ]; then
757 | echo -e " ${RED}The PUNC model path set does not exist, please setup again.${PLAIN}"
758 | else
759 | # use punc model in localhost
760 | PARAMS_LOCAL_PUNC_DIR=$(dirname "$PARAMS_LOCAL_PUNC_PATH")
761 | punc_name=$(basename "$PARAMS_LOCAL_PUNC_PATH")
762 | PARAMS_DOCKER_PUNC_DIR="/workspace/user_punc"
763 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${punc_name}
764 |
765 | echo -e " ${UNDERLINE}You have chosen the model dir in localhost:${PLAIN} ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}"
766 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}"
767 | echo
768 | return 0
769 | fi
770 | fi
771 | done
772 | fi
773 |
774 | PARAMS_DOCKER_PUNC_DIR=$PARAMS_DOWNLOAD_MODEL_DIR
775 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${PARAMS_PUNC_ID}
776 |
777 | echo -e " ${UNDERLINE}The model ID is${PLAIN} ${GREEN}${PARAMS_PUNC_ID}${PLAIN}"
778 | echo -e " ${UNDERLINE}The model dir in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}"
779 | fi
780 |
781 | echo
782 | }
783 |
784 | # Set server exec for FunASR
785 | setupServerExec(){
786 | echo -e "${UNDERLINE}${BOLD}[3/9]${PLAIN}"
787 |
788 | params_docker_exec_path=`sed '/^PARAMS_DOCKER_EXEC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
789 | if [ -z "$params_docker_exec_path" ]; then
790 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server"
791 | else
792 | PARAMS_DOCKER_EXEC_PATH=${params_docker_exec_path}
793 | fi
794 |
795 | echo -e " ${YELLOW}Please enter the path to the excutor of the FunASR service on the localhost.${PLAIN}"
796 | echo -e " If not set, the default ${CYAN}/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server${PLAIN} in Docker is used."
797 | read -p " Setting the path to the excutor of the FunASR service on the localhost: " PARAMS_LOCAL_EXEC_PATH
798 |
799 | if [ -z "$PARAMS_LOCAL_EXEC_PATH" ]; then
800 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server"
801 | else
802 | if [ ! -d "$PARAMS_LOCAL_EXEC_PATH" ]; then
803 | echo -e " ${RED}The FunASR server path set does not exist, will use default.${PLAIN}"
804 | PARAMS_LOCAL_EXEC_PATH=""
805 | PARAMS_LOCAL_EXEC_DIR=""
806 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server"
807 | PARAMS_DOCKER_EXEC_DIR="/workspace/FunASR/funasr/runtime/websocket/build/bin"
808 | else
809 | PARAMS_LOCAL_EXEC_DIR=$(dirname "$PARAMS_LOCAL_EXEC_PATH")
810 | exec=$(basename "$PARAMS_LOCAL_EXEC_PATH")
811 | PARAMS_DOCKER_EXEC_DIR="/server"
812 | PARAMS_DOCKER_EXEC_PATH=${PARAMS_DOCKER_EXEC_DIR}/${exec}
813 | echo -e " ${UNDERLINE}The path of FunASR in localhost is${PLAIN} ${GREEN}${PARAMS_LOCAL_EXEC_PATH}${PLAIN}"
814 | fi
815 | fi
816 | echo -e " ${UNDERLINE}Corresponding, the path of FunASR in Docker is${PLAIN} ${GREEN}${PARAMS_DOCKER_EXEC_PATH}${PLAIN}"
817 |
818 | echo
819 | }
820 |
821 | # Configure FunASR server host port setting
822 | setupHostPort(){
823 | echo -e "${UNDERLINE}${BOLD}[4/9]${PLAIN}"
824 |
825 | params_host_port=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
826 | if [ -z "$params_host_port" ]; then
827 | PARAMS_HOST_PORT="10095"
828 | else
829 | PARAMS_HOST_PORT=${params_host_port}
830 | fi
831 |
832 | while true
833 | do
834 | echo -e " ${YELLOW}Please input the opened port in the host used for FunASR server.${PLAIN}"
835 | echo -e " Default: ${CYAN}${PARAMS_HOST_PORT}${PLAIN}"
836 | read -p " Setting the opened host port [1-65535]: " PARAMS_HOST_PORT
837 |
838 | if [ -z "$PARAMS_HOST_PORT" ]; then
839 | params_host_port=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
840 | if [ -z "$params_host_port" ]; then
841 | PARAMS_HOST_PORT="10095"
842 | else
843 | PARAMS_HOST_PORT=${params_host_port}
844 | fi
845 | fi
846 | expr ${PARAMS_HOST_PORT} + 0 &>/dev/null
847 | if [ $? -eq 0 ]; then
848 | if [ ${PARAMS_HOST_PORT} -ge 1 ] && [ ${PARAMS_HOST_PORT} -le 65535 ]; then
849 | echo -e " ${UNDERLINE}The port of the host is${PLAIN} ${GREEN}${PARAMS_HOST_PORT}${PLAIN}"
850 | echo -e " ${UNDERLINE}The port in Docker for FunASR server is ${PLAIN}${GREEN}${PARAMS_DOCKER_PORT}${PLAIN}"
851 | break
852 | else
853 | echo -e " ${RED}Input error, please input correct number!${PLAIN}"
854 | fi
855 | else
856 | echo -e " ${RED}Input error, please input correct number!${PLAIN}"
857 | fi
858 | done
859 | echo
860 | }
861 |
862 | setupThreadNum(){
863 | echo -e "${UNDERLINE}${BOLD}[5/9]${PLAIN}"
864 |
865 | params_decoder_thread_num=`sed '/^PARAMS_DECODER_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
866 | if [ -z "$params_decoder_thread_num" ]; then
867 | PARAMS_DECODER_THREAD_NUM=$CPUNUM
868 | else
869 | PARAMS_DECODER_THREAD_NUM=${params_decoder_thread_num}
870 | fi
871 |
872 | while true
873 | do
874 | echo -e " ${YELLOW}Please input thread number for FunASR decoder.${PLAIN}"
875 | echo -e " Default: ${CYAN}${PARAMS_DECODER_THREAD_NUM}${PLAIN}"
876 | read -p " Setting the number of decoder thread: " PARAMS_DECODER_THREAD_NUM
877 |
878 | if [ -z "$PARAMS_DECODER_THREAD_NUM" ]; then
879 | params_decoder_thread_num=`sed '/^PARAMS_DECODER_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
880 | if [ -z "$params_decoder_thread_num" ]; then
881 | PARAMS_DECODER_THREAD_NUM=$CPUNUM
882 | else
883 | PARAMS_DECODER_THREAD_NUM=${params_decoder_thread_num}
884 | fi
885 | fi
886 | expr ${PARAMS_DECODER_THREAD_NUM} + 0 &>/dev/null
887 | if [ $? -eq 0 ]; then
888 | if [ ${PARAMS_DECODER_THREAD_NUM} -ge 1 ] && [ ${PARAMS_DECODER_THREAD_NUM} -le 65535 ]; then
889 | break
890 | else
891 | echo -e " ${RED}Input error, please input correct number!${PLAIN}"
892 | fi
893 | else
894 | echo -e " ${RED}Input error, please input correct number!${PLAIN}"
895 | fi
896 | done
897 | echo
898 |
899 | multiple_io=4
900 | PARAMS_DECODER_THREAD_NUM=`expr $PARAMS_DECODER_THREAD_NUM + 0`
901 | PARAMS_IO_THREAD_NUM=`expr $PARAMS_DECODER_THREAD_NUM / $multiple_io`
902 | if [ $PARAMS_IO_THREAD_NUM -eq 0 ]; then
903 | PARAMS_IO_THREAD_NUM=1
904 | fi
905 |
906 | echo -e " ${UNDERLINE}The number of decoder threads is${PLAIN} ${GREEN}${PARAMS_DECODER_THREAD_NUM}${PLAIN}"
907 | echo -e " ${UNDERLINE}The number of IO threads is${PLAIN} ${GREEN}${PARAMS_IO_THREAD_NUM}${PLAIN}"
908 | echo
909 | }
910 |
911 | paramsFromDefault(){
912 | echo -e "${UNDERLINE}${BOLD}[2-5/9]${PLAIN}"
913 | echo -e " ${YELLOW}Load parameters from ${FUNASR_CONFIG_FILE}${PLAIN}"
914 | echo
915 |
916 | PARAMS_DOCKER_IMAGE=`sed '/^PARAMS_DOCKER_IMAGE=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
917 | PARAMS_LOCAL_MODELS_DIR=`sed '/^PARAMS_LOCAL_MODELS_DIR=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
918 | PARAMS_DOWNLOAD_MODEL_DIR=`sed '/^PARAMS_DOWNLOAD_MODEL_DIR=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
919 | PARAMS_LOCAL_ASR_PATH=`sed '/^PARAMS_LOCAL_ASR_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
920 | PARAMS_DOCKER_ASR_PATH=`sed '/^PARAMS_DOCKER_ASR_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
921 | PARAMS_ASR_ID=`sed '/^PARAMS_ASR_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
922 | PARAMS_LOCAL_VAD_PATH=`sed '/^PARAMS_LOCAL_VAD_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
923 | PARAMS_DOCKER_VAD_PATH=`sed '/^PARAMS_DOCKER_VAD_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
924 | PARAMS_VAD_ID=`sed '/^PARAMS_VAD_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
925 | PARAMS_LOCAL_PUNC_PATH=`sed '/^PARAMS_LOCAL_PUNC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
926 | PARAMS_DOCKER_PUNC_PATH=`sed '/^PARAMS_DOCKER_PUNC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
927 | PARAMS_PUNC_ID=`sed '/^PARAMS_PUNC_ID=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
928 | PARAMS_DOCKER_EXEC_PATH=`sed '/^PARAMS_DOCKER_EXEC_PATH=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
929 | PARAMS_HOST_PORT=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
930 | PARAMS_DOCKER_PORT=`sed '/^PARAMS_DOCKER_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
931 | PARAMS_DECODER_THREAD_NUM=`sed '/^PARAMS_DECODER_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
932 | PARAMS_IO_THREAD_NUM=`sed '/^PARAMS_IO_THREAD_NUM=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
933 | }
934 |
935 | saveParams(){
936 | echo "$i" > $FUNASR_CONFIG_FILE
937 | echo -e " ${GREEN}Parameters are stored in the file ${FUNASR_CONFIG_FILE}${PLAIN}"
938 |
939 | echo "PARAMS_DOCKER_IMAGE=${PARAMS_DOCKER_IMAGE}" > $FUNASR_CONFIG_FILE
940 | echo "PARAMS_LOCAL_MODELS_DIR=${PARAMS_LOCAL_MODELS_DIR}" >> $FUNASR_CONFIG_FILE
941 | echo "PARAMS_DOWNLOAD_MODEL_DIR=${PARAMS_DOWNLOAD_MODEL_DIR}" >> $FUNASR_CONFIG_FILE
942 |
943 | echo "PARAMS_LOCAL_EXEC_PATH=${PARAMS_LOCAL_EXEC_PATH}" >> $FUNASR_CONFIG_FILE
944 | echo "PARAMS_LOCAL_EXEC_DIR=${PARAMS_LOCAL_EXEC_DIR}" >> $FUNASR_CONFIG_FILE
945 | echo "PARAMS_DOCKER_EXEC_PATH=${PARAMS_DOCKER_EXEC_PATH}" >> $FUNASR_CONFIG_FILE
946 | echo "PARAMS_DOCKER_EXEC_DIR=${PARAMS_DOCKER_EXEC_DIR}" >> $FUNASR_CONFIG_FILE
947 |
948 | echo "PARAMS_LOCAL_ASR_PATH=${PARAMS_LOCAL_ASR_PATH}" >> $FUNASR_CONFIG_FILE
949 | echo "PARAMS_LOCAL_ASR_DIR=${PARAMS_LOCAL_ASR_DIR}" >> $FUNASR_CONFIG_FILE
950 | echo "PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_PATH}" >> $FUNASR_CONFIG_FILE
951 | echo "PARAMS_DOCKER_ASR_DIR=${PARAMS_DOCKER_ASR_DIR}" >> $FUNASR_CONFIG_FILE
952 | echo "PARAMS_ASR_ID=${PARAMS_ASR_ID}" >> $FUNASR_CONFIG_FILE
953 |
954 | echo "PARAMS_LOCAL_PUNC_PATH=${PARAMS_LOCAL_PUNC_PATH}" >> $FUNASR_CONFIG_FILE
955 | echo "PARAMS_LOCAL_PUNC_DIR=${PARAMS_LOCAL_PUNC_DIR}" >> $FUNASR_CONFIG_FILE
956 | echo "PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_PATH}" >> $FUNASR_CONFIG_FILE
957 | echo "PARAMS_DOCKER_PUNC_DIR=${PARAMS_DOCKER_PUNC_DIR}" >> $FUNASR_CONFIG_FILE
958 | echo "PARAMS_PUNC_ID=${PARAMS_PUNC_ID}" >> $FUNASR_CONFIG_FILE
959 |
960 | echo "PARAMS_LOCAL_VAD_PATH=${PARAMS_LOCAL_VAD_PATH}" >> $FUNASR_CONFIG_FILE
961 | echo "PARAMS_LOCAL_VAD_DIR=${PARAMS_LOCAL_VAD_DIR}" >> $FUNASR_CONFIG_FILE
962 | echo "PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_PATH}" >> $FUNASR_CONFIG_FILE
963 | echo "PARAMS_DOCKER_VAD_DIR=${PARAMS_DOCKER_VAD_DIR}" >> $FUNASR_CONFIG_FILE
964 | echo "PARAMS_VAD_ID=${PARAMS_VAD_ID}" >> $FUNASR_CONFIG_FILE
965 |
966 | echo "PARAMS_HOST_PORT=${PARAMS_HOST_PORT}" >> $FUNASR_CONFIG_FILE
967 | echo "PARAMS_DOCKER_PORT=${PARAMS_DOCKER_PORT}" >> $FUNASR_CONFIG_FILE
968 | echo "PARAMS_DECODER_THREAD_NUM=${PARAMS_DECODER_THREAD_NUM}" >> $FUNASR_CONFIG_FILE
969 | echo "PARAMS_IO_THREAD_NUM=${PARAMS_IO_THREAD_NUM}" >> $FUNASR_CONFIG_FILE
970 | }
971 |
972 | showAllParams(){
973 | echo -e "${UNDERLINE}${BOLD}[6/9]${PLAIN}"
974 | echo -e " ${YELLOW}Show parameters of FunASR server setting and confirm to run ...${PLAIN}"
975 | echo
976 |
977 | if [ ! -z "$PARAMS_DOCKER_IMAGE" ]; then
978 | echo -e " The current Docker image is : ${GREEN}${PARAMS_DOCKER_IMAGE}${PLAIN}"
979 | fi
980 |
981 | if [ ! -z "$PARAMS_LOCAL_MODELS_DIR" ]; then
982 | echo -e " The model is downloaded or stored to this directory in local : ${GREEN}${PARAMS_LOCAL_MODELS_DIR}${PLAIN}"
983 | fi
984 | if [ ! -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then
985 | echo -e " The model will be automatically downloaded to the directory : ${GREEN}${PARAMS_DOWNLOAD_MODEL_DIR}${PLAIN}"
986 | fi
987 |
988 | if [ ! -z "$PARAMS_ASR_ID" ]; then
989 | echo -e " The ASR model_id used : ${GREEN}${PARAMS_ASR_ID}${PLAIN}"
990 | fi
991 | if [ ! -z "$PARAMS_LOCAL_ASR_PATH" ]; then
992 | echo -e " The path to the local ASR model directory for the load : ${GREEN}${PARAMS_LOCAL_ASR_PATH}${PLAIN}"
993 | fi
994 | echo -e " The ASR model directory corresponds to the directory in Docker : ${GREEN}${PARAMS_DOCKER_ASR_PATH}${PLAIN}"
995 |
996 | if [ ! -z "$PARAMS_VAD_ID" ]; then
997 | echo -e " The VAD model_id used : ${GREEN}${PARAMS_VAD_ID}${PLAIN}"
998 | fi
999 | if [ ! -z "$PARAMS_LOCAL_VAD_PATH" ]; then
1000 | echo -e " The path to the local VAD model directory for the load : ${GREEN}${PARAMS_LOCAL_VAD_PATH}${PLAIN}"
1001 | fi
1002 | echo -e " The VAD model directory corresponds to the directory in Docker : ${GREEN}${PARAMS_DOCKER_VAD_PATH}${PLAIN}"
1003 |
1004 | if [ ! -z "$PARAMS_PUNC_ID" ]; then
1005 | echo -e " The PUNC model_id used : ${GREEN}${PARAMS_PUNC_ID}${PLAIN}"
1006 | fi
1007 | if [ ! -z "$PARAMS_LOCAL_PUNC_PATH" ]; then
1008 | echo -e " The path to the local PUNC model directory for the load : ${GREEN}${PARAMS_LOCAL_PUNC_PATH}${PLAIN}"
1009 | fi
1010 | echo -e " The PUNC model directory corresponds to the directory in Docker: ${GREEN}${PARAMS_DOCKER_PUNC_PATH}${PLAIN}"
1011 | echo
1012 |
1013 | if [ ! -z "$PARAMS_LOCAL_EXEC_PATH" ]; then
1014 | echo -e " The local path of the FunASR service executor : ${GREEN}${PARAMS_LOCAL_EXEC_PATH}${PLAIN}"
1015 | fi
1016 | echo -e " The path in the docker of the FunASR service executor : ${GREEN}${PARAMS_DOCKER_EXEC_PATH}${PLAIN}"
1017 |
1018 | echo -e " Set the host port used for use by the FunASR service : ${GREEN}${PARAMS_HOST_PORT}${PLAIN}"
1019 | echo -e " Set the docker port used by the FunASR service : ${GREEN}${PARAMS_DOCKER_PORT}${PLAIN}"
1020 |
1021 | echo -e " Set the number of threads used for decoding the FunASR service : ${GREEN}${PARAMS_DECODER_THREAD_NUM}${PLAIN}"
1022 | echo -e " Set the number of threads used for IO the FunASR service : ${GREEN}${PARAMS_IO_THREAD_NUM}${PLAIN}"
1023 |
1024 | echo
1025 | while true
1026 | do
1027 | params_confirm="y"
1028 | echo -e " ${YELLOW}Please input [Y/n] to confirm the parameters.${PLAIN}"
1029 | echo -e " [y] Verify that these parameters are correct and that the service will run."
1030 | echo -e " [n] The parameters set are incorrect, it will be rolled out, please rerun."
1031 | read -p " read confirmation[Y/n]: " params_confirm
1032 |
1033 | if [ -z "$params_confirm" ]; then
1034 | params_confirm="y"
1035 | fi
1036 | YES="Y"
1037 | yes="y"
1038 | NO="N"
1039 | no="n"
1040 | echo
1041 | if [ "$params_confirm" = "$YES" ] || [ "$params_confirm" = "$yes" ]; then
1042 | echo -e " ${GREEN}Will run FunASR server later ...${PLAIN}"
1043 | break
1044 | elif [ "$params_confirm" = "$NO" ] || [ "$params_confirm" = "$no" ]; then
1045 | echo -e " ${RED}The parameters set are incorrect, please rerun ...${PLAIN}"
1046 | exit 1
1047 | else
1048 | echo "again ..."
1049 | fi
1050 | done
1051 |
1052 | saveParams
1053 | echo
1054 | sleep 1
1055 | }
1056 |
1057 | # Install docker
1058 | installDocker(){
1059 | echo -e "${UNDERLINE}${BOLD}[7/9]${PLAIN}"
1060 |
1061 | if [ $DOCKERINFOLEN -gt 30 ]; then
1062 | echo -e " ${YELLOW}Docker has installed.${PLAIN}"
1063 | else
1064 | lowercase_osid=$(echo $OSID | tr '[A-Z]' '[a-z]')
1065 | echo -e " ${YELLOW}Start install docker for $lowercase_osid ${PLAIN}"
1066 | DOCKER_INSTALL_CMD="curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun"
1067 | DOCKER_INSTALL_RUN_CMD=""
1068 |
1069 | case "$lowercase_osid" in
1070 | ubuntu)
1071 | DOCKER_INSTALL_CMD="curl -fsSL https://test.docker.com -o test-docker.sh"
1072 | DOCKER_INSTALL_RUN_CMD="sudo sh test-docker.sh"
1073 | ;;
1074 | centos)
1075 | DOCKER_INSTALL_CMD="curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun"
1076 | ;;
1077 | debian)
1078 | DOCKER_INSTALL_CMD="curl -fsSL https://get.docker.com -o get-docker.sh"
1079 | DOCKER_INSTALL_RUN_CMD="sudo sh get-docker.sh"
1080 | ;;
1081 | *)
1082 | echo "$lowercase_osid is not supported."
1083 | ;;
1084 | esac
1085 |
1086 | echo -e " Get docker installer: ${GREEN}$DOCKER_INSTALL_CMD${PLAIN}"
1087 | echo -e " Get docker run: ${GREEN}$DOCKER_INSTALL_RUN_CMD${PLAIN}"
1088 |
1089 | $DOCKER_INSTALL_CMD
1090 | if [ ! -z "$DOCKER_INSTALL_RUN_CMD" ]; then
1091 | $DOCKER_INSTALL_RUN_CMD
1092 | fi
1093 |
1094 | DOCKERINFO=$(sudo docker info | wc -l)
1095 | DOCKERINFOLEN=$(expr $DOCKERINFO)
1096 | if [ $DOCKERINFOLEN -gt 30 ]; then
1097 | echo -e " ${GREEN}Docker install success, start docker server.${PLAIN}"
1098 | sudo systemctl start docker
1099 | else
1100 | echo -e " ${RED}Docker install failed!${PLAIN}"
1101 | exit 1
1102 | fi
1103 | fi
1104 |
1105 | echo
1106 | sleep 1
1107 | }
1108 |
1109 | # Download docker image
1110 | downloadDockerImage(){
1111 | echo -e "${UNDERLINE}${BOLD}[8/9]${PLAIN}"
1112 | echo -e " ${YELLOW}Pull docker image(${PARAMS_DOCKER_IMAGE})...${PLAIN}"
1113 |
1114 | sudo docker pull ${PARAMS_DOCKER_IMAGE}
1115 |
1116 | echo
1117 | sleep 1
1118 | }
1119 |
1120 | dockerRun(){
1121 | echo -e "${UNDERLINE}${BOLD}[9/9]${PLAIN}"
1122 | echo -e " ${YELLOW}Construct command and run docker ...${PLAIN}"
1123 |
1124 | RUN_CMD="sudo docker run"
1125 | PORT_MAP=" -p ${PARAMS_HOST_PORT}:${PARAMS_DOCKER_PORT}"
1126 | DIR_PARAMS=" --privileged=true"
1127 | DIR_MAP_PARAMS=""
1128 | if [ ! -z "$PARAMS_LOCAL_ASR_DIR" ]; then
1129 | if [ -z "$DIR_MAP_PARAMS" ]; then
1130 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_ASR_DIR}:${PARAMS_DOCKER_ASR_DIR}"
1131 | else
1132 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_ASR_DIR}:${PARAMS_DOCKER_ASR_DIR}"
1133 | fi
1134 | fi
1135 | if [ ! -z "$PARAMS_LOCAL_VAD_DIR" ]; then
1136 | if [ -z "$DIR_MAP_PARAMS" ]; then
1137 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_VAD_DIR}:${PARAMS_DOCKER_VAD_DIR}"
1138 | else
1139 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_VAD_DIR}:${PARAMS_DOCKER_VAD_DIR}"
1140 | fi
1141 | fi
1142 | if [ ! -z "$PARAMS_LOCAL_PUNC_DIR" ]; then
1143 | if [ -z "$DIR_MAP_PARAMS" ]; then
1144 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_PUNC_DIR}:${PARAMS_DOCKER_PUNC_DIR}"
1145 | else
1146 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_VAD_DIR}:${PARAMS_DOCKER_VAD_DIR}"
1147 | fi
1148 | fi
1149 | if [ ! -z "$PARAMS_LOCAL_EXEC_DIR" ]; then
1150 | if [ -z "$DIR_MAP_PARAMS" ]; then
1151 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_EXEC_DIR}:${PARAMS_DOCKER_EXEC_DIR}"
1152 | else
1153 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_EXEC_DIR}:${PARAMS_DOCKER_EXEC_DIR}"
1154 | fi
1155 | fi
1156 | if [ ! -z "$PARAMS_LOCAL_MODELS_DIR" ]; then
1157 | if [ -z "$DIR_MAP_PARAMS" ]; then
1158 | DIR_MAP_PARAMS="${DIR_PARAMS} -v ${PARAMS_LOCAL_MODELS_DIR}:${PARAMS_DOWNLOAD_MODEL_DIR}"
1159 | else
1160 | DIR_MAP_PARAMS="${DIR_MAP_PARAMS} -v ${PARAMS_LOCAL_MODELS_DIR}:${PARAMS_DOWNLOAD_MODEL_DIR}"
1161 | fi
1162 | fi
1163 |
1164 | EXEC_PARAMS="\"exec\":\"${PARAMS_DOCKER_EXEC_PATH}\""
1165 | if [ ! -z "$PARAMS_ASR_ID" ]; then
1166 | ASR_PARAMS="\"--model-dir\":\"${PARAMS_ASR_ID}\""
1167 | else
1168 | ASR_PARAMS="\"--model-dir\":\"${PARAMS_DOCKER_ASR_PATH}\""
1169 | fi
1170 | if [ ! -z "$PARAMS_VAD_ID" ]; then
1171 | VAD_PARAMS="\"--vad-dir\":\"${PARAMS_VAD_ID}\""
1172 | else
1173 | VAD_PARAMS="\"--vad-dir\":\"${PARAMS_DOCKER_VAD_PATH}\""
1174 | fi
1175 | if [ ! -z "$PARAMS_PUNC_ID" ]; then
1176 | PUNC_PARAMS="\"--punc-dir\":\"${PARAMS_PUNC_ID}\""
1177 | else
1178 | PUNC_PARAMS="\"--punc-dir\":\"${PARAMS_DOCKER_PUNC_PATH}\""
1179 | fi
1180 | DOWNLOAD_PARARMS="\"--download-model-dir\":\"${PARAMS_DOWNLOAD_MODEL_DIR}\""
1181 | if [ -z "$PARAMS_DOWNLOAD_MODEL_DIR" ]; then
1182 | MODEL_PARAMS="${ASR_PARAMS},${VAD_PARAMS},${PUNC_PARAMS}"
1183 | else
1184 | MODEL_PARAMS="${ASR_PARAMS},${VAD_PARAMS},${PUNC_PARAMS},${DOWNLOAD_PARARMS}"
1185 | fi
1186 |
1187 | DECODER_PARAMS="\"--decoder-thread-num\":\"${PARAMS_DECODER_THREAD_NUM}\""
1188 | IO_PARAMS="\"--io-thread-num\":\"${PARAMS_IO_THREAD_NUM}\""
1189 | THREAD_PARAMS=${DECODER_PARAMS},${IO_PARAMS}
1190 | PORT_PARAMS="\"--port\":\"${PARAMS_DOCKER_PORT}\""
1191 | CRT_PATH="\"--certfile\":\"/workspace/FunASR/funasr/runtime/ssl_key/server.crt\""
1192 | KEY_PATH="\"--keyfile\":\"/workspace/FunASR/funasr/runtime/ssl_key/server.key\""
1193 |
1194 | ENV_PARAMS=" -v /var/funasr:/workspace/.config"
1195 | ENV_PARAMS=" ${ENV_PARAMS} --env DAEMON_SERVER_CONFIG={\"server\":[{${EXEC_PARAMS},${MODEL_PARAMS},${THREAD_PARAMS},${PORT_PARAMS},${CRT_PATH},${KEY_PATH}}]}"
1196 |
1197 | RUN_CMD="${RUN_CMD}${PORT_MAP}${DIR_MAP_PARAMS}${ENV_PARAMS}"
1198 | RUN_CMD="${RUN_CMD} -it -d ${PARAMS_DOCKER_IMAGE}"
1199 |
1200 | # check Docker
1201 | checkDockerExist
1202 | result=$?
1203 | result=`expr $result + 0`
1204 | if [ ${result} -eq 50 ]; then
1205 | return 50
1206 | fi
1207 |
1208 | server_log="/var/funasr/server_console.log"
1209 | rm -f ${PROGRESS_TXT}
1210 | rm -f ${server_log}
1211 |
1212 | ${RUN_CMD}
1213 |
1214 | echo
1215 | echo -e " ${YELLOW}Loading models:${PLAIN}"
1216 |
1217 | # Hide the cursor, start draw progress.
1218 | printf "\e[?25l"
1219 | while true
1220 | do
1221 | ServerProgress
1222 | result=$?
1223 | stage=`expr $result + 0`
1224 | if [ ${stage} -eq 0 ]; then
1225 | break
1226 | elif [ ${stage} -gt 0 ] && [ ${stage} -lt 6 ]; then
1227 | sleep 0.1
1228 | # clear 3 lines
1229 | printf "\033[3A"
1230 | elif [ ${stage} -eq 6 ]; then
1231 | break
1232 | elif [ ${stage} -eq 98 ]; then
1233 | return 98
1234 | else
1235 | echo -e " ${RED}Starting FunASR server failed.${PLAIN}"
1236 | echo
1237 | # Display the cursor
1238 | printf "\e[?25h"
1239 | return 99
1240 | fi
1241 | done
1242 | # Display the cursor
1243 | printf "\e[?25h"
1244 |
1245 | echo -e " ${GREEN}The service has been started.${PLAIN}"
1246 | echo
1247 | echo -e " ${BOLD}If you want to see an example of how to use the client, you can run ${PLAIN}${GREEN}sudo bash funasr-runtime-deploy.sh -c${PLAIN} ."
1248 | echo
1249 | }
1250 |
1251 | checkDockerExist(){
1252 | result=$(sudo docker ps | grep ${PARAMS_DOCKER_IMAGE} | wc -l)
1253 | result=`expr $result + 0`
1254 | if [ ${result} -ne 0 ]; then
1255 | echo
1256 | echo -e " ${RED}Docker: ${PARAMS_DOCKER_IMAGE} has been launched, please run (${PLAIN}${GREEN}sudo bash funasr-runtime-deploy.sh -p${PLAIN}${RED}) to stop Docker first.${PLAIN}"
1257 | return 50
1258 | fi
1259 | }
1260 |
1261 | dockerExit(){
1262 | echo -e " ${YELLOW}Stop docker(${PARAMS_DOCKER_IMAGE}) server ...${PLAIN}"
1263 | sudo docker stop `sudo docker ps -a| grep ${PARAMS_DOCKER_IMAGE} | awk '{print $1}' `
1264 | echo
1265 | sleep 1
1266 | }
1267 |
1268 | modelChange(){
1269 | model_id=$1
1270 |
1271 | result=$(echo $1 | grep "asr")
1272 | if [[ "$result" != "" ]]
1273 | then
1274 | PARAMS_ASR_ID=$1
1275 | PARAMS_DOCKER_ASR_PATH=${PARAMS_DOCKER_ASR_DIR}/${PARAMS_ASR_ID}
1276 | return 0
1277 | fi
1278 | result=$(echo $1 | grep "vad")
1279 | if [[ "$result" != "" ]]
1280 | then
1281 | PARAMS_VAD_ID=$1
1282 | PARAMS_DOCKER_VAD_PATH=${PARAMS_DOCKER_VAD_DIR}/${PARAMS_VAD_ID}
1283 | retun 0
1284 | fi
1285 | result=$(echo $1 | grep "punc")
1286 | if [[ "$result" != "" ]]
1287 | then
1288 | PARAMS_PUNC_ID=$1
1289 | PARAMS_DOCKER_PUNC_PATH=${PARAMS_DOCKER_PUNC_DIR}/${PARAMS_PUNC_ID}
1290 | retun 0
1291 | fi
1292 | }
1293 |
1294 | sampleClientRun(){
1295 | echo -e "${YELLOW}Will download sample tools for the client to show how speech recognition works.${PLAIN}"
1296 |
1297 | sample_name="funasr_samples"
1298 | sample_tar="funasr_samples.tar.gz"
1299 | sample_url="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/${sample_tar}"
1300 | DOWNLOAD_SAMPLE="curl -O ${sample_url}"
1301 | UNTAR_CMD="tar -zxf ${sample_tar}"
1302 |
1303 | if [ ! -f "${sample_tar}" ]; then
1304 | ${DOWNLOAD_SAMPLE}
1305 | fi
1306 | if [ -f "${sample_tar}" ]; then
1307 | ${UNTAR_CMD}
1308 | fi
1309 | if [ -d "${sample_name}" ]; then
1310 |
1311 | echo -e " Please select the client you want to run."
1312 | menuSelection ${SAMPLE_CLIENTS[*]}
1313 | result=$?
1314 | index=`expr $result - 1`
1315 | lang=${SAMPLE_CLIENTS[${index}]}
1316 | echo
1317 |
1318 | SERVER_IP="127.0.0.1"
1319 | read -p " Please enter the IP of server, default(${SERVER_IP}): " SERVER_IP
1320 | if [ -z "$SERVER_IP" ]; then
1321 | SERVER_IP="127.0.0.1"
1322 | fi
1323 |
1324 | HOST_PORT=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
1325 | if [ -z "$HOST_PORT" ]; then
1326 | HOST_PORT="10095"
1327 | fi
1328 | read -p " Please enter the port of server, default(${HOST_PORT}): " HOST_PORT
1329 | if [ -z "$HOST_PORT" ]; then
1330 | HOST_PORT=`sed '/^PARAMS_HOST_PORT=/!d;s/.*=//' ${FUNASR_CONFIG_FILE}`
1331 | if [ -z "$HOST_PORT" ]; then
1332 | HOST_PORT="10095"
1333 | fi
1334 | fi
1335 |
1336 | WAV_PATH="${cur_dir}/funasr_samples/audio/asr_example.wav"
1337 | read -p " Please enter the audio path, default(${WAV_PATH}): " WAV_PATH
1338 | if [ -z "$WAV_PATH" ]; then
1339 | WAV_PATH="${cur_dir}/funasr_samples/audio/asr_example.wav"
1340 | fi
1341 |
1342 | echo
1343 | PRE_CMD=”“
1344 | case "$lang" in
1345 | Linux_Cpp)
1346 | PRE_CMD="export LD_LIBRARY_PATH=${cur_dir}/funasr_samples/cpp/libs:\$LD_LIBRARY_PATH"
1347 | CLIENT_EXEC="${cur_dir}/funasr_samples/cpp/funasr-wss-client"
1348 | RUN_CMD="${CLIENT_EXEC} --server-ip ${SERVER_IP} --port ${HOST_PORT} --wav-path ${WAV_PATH}"
1349 | echo -e " Run ${BLUE}${PRE_CMD}${PLAIN}"
1350 | ${PRE_CMD}
1351 | echo
1352 | ;;
1353 | Python)
1354 | CLIENT_EXEC="${cur_dir}/funasr_samples/python/wss_client_asr.py"
1355 | RUN_CMD="python3 ${CLIENT_EXEC} --host ${SERVER_IP} --port ${HOST_PORT} --mode offline --audio_in ${WAV_PATH} --send_without_sleep --output_dir ./funasr_samples/python"
1356 | PRE_CMD="pip3 install click>=8.0.4"
1357 | echo -e " Run ${BLUE}${PRE_CMD}${PLAIN}"
1358 | ${PRE_CMD}
1359 | echo
1360 | PRE_CMD="pip3 install -r ${cur_dir}/funasr_samples/python/requirements_client.txt"
1361 | echo -e " Run ${BLUE}${PRE_CMD}${PLAIN}"
1362 | ${PRE_CMD}
1363 | echo
1364 | ;;
1365 | *)
1366 | echo "$lang is not supported."
1367 | ;;
1368 | esac
1369 |
1370 | echo -e " Run ${BLUE}${RUN_CMD}${PLAIN}"
1371 | ${RUN_CMD}
1372 | echo
1373 | echo -e " If failed, you can try (${GREEN}${RUN_CMD}${PLAIN}) in your Shell."
1374 | echo
1375 | fi
1376 | }
1377 |
1378 | # Install main function
1379 | installFunasrDocker(){
1380 | installDocker
1381 | downloadDockerImage
1382 | }
1383 |
1384 | modelsConfigure(){
1385 | setupModelType
1386 | setupAsrModelId
1387 | setupVadModelId
1388 | setupPuncModelId
1389 | }
1390 |
1391 | paramsConfigure(){
1392 | selectDockerImages
1393 | result=$?
1394 | result=`expr $result + 0`
1395 | if [ ${result} -eq 50 ]; then
1396 | return 50
1397 | fi
1398 |
1399 | setupModelType
1400 | setupAsrModelId
1401 | setupVadModelId
1402 | setupPuncModelId
1403 | setupServerExec
1404 | setupHostPort
1405 | setupThreadNum
1406 | }
1407 |
1408 | # Display Help info
1409 | displayHelp(){
1410 | echo -e "${UNDERLINE}Usage${PLAIN}:"
1411 | echo -e " $0 [OPTIONAL FLAGS]"
1412 | echo
1413 | echo -e "funasr-runtime-deploy.sh - a Bash script to install&run FunASR docker."
1414 | echo
1415 | echo -e "${UNDERLINE}Options${PLAIN}:"
1416 | echo -e " ${BOLD}-i, --install${PLAIN} Install and run FunASR docker."
1417 | echo -e " ${BOLD}-s, --start${PLAIN} Run FunASR docker with configuration that has already been set."
1418 | echo -e " ${BOLD}-p, --stop${PLAIN} Stop FunASR docker."
1419 | echo -e " ${BOLD}-r, --restart${PLAIN} Restart FunASR docker."
1420 | echo -e " ${BOLD}-u, --update${PLAIN} Update the model ID that has already been set, e.g: --update model XXXX."
1421 | echo -e " ${BOLD}-c, --client${PLAIN} Get a client example to show how to initiate speech recognition."
1422 | echo -e " ${BOLD}-v, --version${PLAIN} Display current script version."
1423 | echo -e " ${BOLD}-h, --help${PLAIN} Display this help."
1424 | echo
1425 | echo -e "${UNDERLINE}funasr-runtime-deploy.sh${PLAIN} - Version ${scriptVersion} "
1426 | echo -e "Modify Date ${scriptDate}"
1427 | }
1428 |
1429 | # OS
1430 | OSID=$(grep ^ID= /etc/os-release | cut -d= -f2)
1431 | OSVER=$(lsb_release -cs)
1432 | OSNUM=$(grep -oE "[0-9.]+" /etc/issue)
1433 | CPUNUM=$(cat /proc/cpuinfo |grep "processor"|wc -l)
1434 | DOCKERINFO=$(sudo docker info | wc -l)
1435 | DOCKERINFOLEN=$(expr $DOCKERINFO)
1436 |
1437 | # PARAMS
1438 | FUNASR_CONFIG_FILE="/var/funasr/config"
1439 | # The path of server executor in local
1440 | PARAMS_LOCAL_EXEC_PATH=""
1441 | # The dir stored server excutor in local
1442 | PARAMS_LOCAL_EXEC_DIR=""
1443 | # The server excutor in local
1444 | PARAMS_DOCKER_EXEC_PATH="/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server"
1445 | # The dir stored server excutor in docker
1446 | PARAMS_DOCKER_EXEC_DIR="/workspace/FunASR/funasr/runtime/websocket/build/bin"
1447 |
1448 | # The dir of model in local
1449 | PARAMS_LOCAL_MODELS_DIR=""
1450 | # The dir for downloading model in docker
1451 | PARAMS_DOWNLOAD_MODEL_DIR=""
1452 | # The Docker image name
1453 | PARAMS_DOCKER_IMAGE=""
1454 |
1455 | # The dir stored punc model in local
1456 | PARAMS_LOCAL_PUNC_DIR=""
1457 | # The path of punc model in local
1458 | PARAMS_LOCAL_PUNC_PATH=""
1459 | # The dir stored punc model in docker
1460 | PARAMS_DOCKER_PUNC_DIR=""
1461 | # The path of punc model in docker
1462 | PARAMS_DOCKER_PUNC_PATH=""
1463 | # The punc model ID in ModelScope
1464 | PARAMS_PUNC_ID=""
1465 |
1466 | # The dir stored vad model in local
1467 | PARAMS_LOCAL_VAD_DIR=""
1468 | # The path of vad model in local
1469 | PARAMS_LOCAL_VAD_PATH=""
1470 | # The dir stored vad model in docker
1471 | PARAMS_DOCKER_VAD_DIR=""
1472 | # The path of vad model in docker
1473 | PARAMS_DOCKER_VAD_PATH=""
1474 | # The vad model ID in ModelScope
1475 | PARAMS_VAD_ID=""
1476 |
1477 | # The dir stored asr model in local
1478 | PARAMS_LOCAL_ASR_DIR=""
1479 | # The path of asr model in local
1480 | PARAMS_LOCAL_ASR_PATH=""
1481 | # The dir stored asr model in docker
1482 | PARAMS_DOCKER_ASR_DIR=""
1483 | # The path of asr model in docker
1484 | PARAMS_DOCKER_ASR_PATH=""
1485 | # The asr model ID in ModelScope
1486 | PARAMS_ASR_ID=""
1487 |
1488 | PARAMS_HOST_PORT="10095"
1489 | PARAMS_DOCKER_PORT="10095"
1490 | PARAMS_DECODER_THREAD_NUM="32"
1491 | PARAMS_IO_THREAD_NUM="8"
1492 |
1493 |
1494 | echo -e "#############################################################"
1495 | echo -e "# ${RED}OS${PLAIN}: $OSID $OSNUM $OSVER "
1496 | echo -e "# ${RED}Kernel${PLAIN}: $(uname -m) Linux $(uname -r)"
1497 | echo -e "# ${RED}CPU${PLAIN}: $(grep 'model name' /proc/cpuinfo | uniq | awk -F : '{print $2}' | sed 's/^[ \t]*//g' | sed 's/ \+/ /g') "
1498 | echo -e "# ${RED}CPU NUM${PLAIN}: $CPUNUM"
1499 | echo -e "# ${RED}RAM${PLAIN}: $(cat /proc/meminfo | grep 'MemTotal' | awk -F : '{print $2}' | sed 's/^[ \t]*//g') "
1500 | echo -e "#############################################################"
1501 | echo
1502 |
1503 | # Initialization step
1504 | case "$1" in
1505 | install|-i|--install)
1506 | rootNess
1507 | paramsConfigure
1508 | result=$?
1509 | result=`expr $result + 0`
1510 | if [ ${result} -ne 50 ]; then
1511 | showAllParams
1512 | installFunasrDocker
1513 | dockerRun
1514 | result=$?
1515 | stage=`expr $result + 0`
1516 | if [ ${stage} -eq 98 ]; then
1517 | dockerExit
1518 | dockerRun
1519 | fi
1520 | fi
1521 | ;;
1522 | start|-s|--start)
1523 | rootNess
1524 | paramsFromDefault
1525 | showAllParams
1526 | dockerRun
1527 | result=$?
1528 | stage=`expr $result + 0`
1529 | if [ ${stage} -eq 98 ]; then
1530 | dockerExit
1531 | dockerRun
1532 | fi
1533 | ;;
1534 | restart|-r|--restart)
1535 | rootNess
1536 | paramsFromDefault
1537 | showAllParams
1538 | dockerExit
1539 | dockerRun
1540 | result=$?
1541 | stage=`expr $result + 0`
1542 | if [ ${stage} -eq 98 ]; then
1543 | dockerExit
1544 | dockerRun
1545 | fi
1546 | ;;
1547 | stop|-p|--stop)
1548 | rootNess
1549 | paramsFromDefault
1550 | dockerExit
1551 | ;;
1552 | update|-u|--update)
1553 | rootNess
1554 | paramsFromDefault
1555 |
1556 | if [ $# -eq 1 ];
1557 | then
1558 | modelsConfigure
1559 | elif [ $# -eq 3 ];
1560 | then
1561 | type=$2
1562 | id=$3
1563 | MODEL="model"
1564 | if [ "$type" = "$MODEL" ]; then
1565 | modelChange $id
1566 | else
1567 | modelsConfigure
1568 | fi
1569 | else
1570 | modelsConfigure
1571 | fi
1572 |
1573 | saveParams
1574 | dockerExit
1575 | dockerRun
1576 | result=$?
1577 | stage=`expr $result + 0`
1578 | if [ ${stage} -eq 98 ]; then
1579 | dockerExit
1580 | dockerRun
1581 | fi
1582 | ;;
1583 | client|-c|--client)
1584 | rootNess
1585 | sampleClientRun
1586 | ;;
1587 | *)
1588 | clear
1589 | displayHelp
1590 | exit 0
1591 | ;;
1592 | esac
1593 |
--------------------------------------------------------------------------------