├── LICENSE ├── README.md ├── ban.txt ├── clean_list.py ├── input └── example.srt ├── merge.py ├── pack.py └── split.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 AliceNavigator 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Alice_split_toolset 2 | Split audio using the .srt file, clean up annotations, then merge and package into a format suitable for bert-vits2 in a standard manner. 3 | 使用.srt文件分割音频并清洗标注,合并封装至适用于bert-vits2的一个较为标准的格式 4 | 5 | ### usage 使用 6 | - 将同名wav和srt文件放入input,依照顺序执行,更多详细参数见-h 7 | - Place the wav and srt files with the same name into the 'input' folder, execute in sequence, and see -h for more detailed parameters. 8 | ```bash 9 | python split.py --mono 10 | python clean_list.py --filter_english 11 | python merge.py 12 | python pack.py baki 13 | ``` 14 | -------------------------------------------------------------------------------- /ban.txt: -------------------------------------------------------------------------------- 1 | 啧 -------------------------------------------------------------------------------- /clean_list.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import re 4 | 5 | 6 | def process_mapping(mapping_path, filter_english, ban_file): 7 | with open(mapping_path, 'r', encoding='utf-8') as file: 8 | lines = file.readlines() 9 | 10 | banned_phrases = [] 11 | if ban_file and os.path.exists(ban_file): 12 | with open(ban_file, 'r', encoding='utf-8') as bf: 13 | banned_phrases = [line.strip() for line in bf.readlines()] 14 | 15 | clean_mapping = [] 16 | 17 | for line in lines: 18 | filename, text = line.strip().split("|") 19 | 20 | if filter_english and re.search(r"[a-zA-Z]", text): 21 | print(f'drop non-kanji text : {text}') 22 | continue 23 | 24 | if any(ban_phrase in text for ban_phrase in banned_phrases): 25 | print(f'drop ban text : {text}') 26 | continue 27 | 28 | clean_mapping.append(line) 29 | 30 | with open(f'{mapping_path[:-12]}/clean_mapping.list', 'w', encoding='utf-8') as file: 31 | for line in clean_mapping: 32 | file.write(line) 33 | 34 | 35 | if __name__ == "__main__": 36 | parser = argparse.ArgumentParser(description="Process and clean mapping.list based on criteria") 37 | parser.add_argument("--filter_english", action="store_true", default=False, help="Remove entries with English text") 38 | parser.add_argument("--ban_file", default="ban.txt", help="Path to file with banned phrases") 39 | args = parser.parse_args() 40 | 41 | for root, dirs, files in os.walk('output'): 42 | for folder in dirs: 43 | process_mapping(f"./output/{folder}/mapping.list", args.filter_english, args.ban_file) 44 | -------------------------------------------------------------------------------- /input/example.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:00,200 --> 00:00:01,200 3 | 直播搞好 4 | 5 | 2 6 | 00:00:01,566 --> 00:00:03,866 7 | 先要把我推流搞定 -------------------------------------------------------------------------------- /merge.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pydub import AudioSegment 4 | from tqdm import tqdm 5 | import shutil 6 | 7 | def merge_segments(mapping_path, folder_name, max_length): 8 | merge_path = f"merge/{folder_name}" 9 | if not os.path.exists(merge_path): 10 | os.makedirs(merge_path) 11 | else: 12 | print(f'检测到{merge_path}已存在,执行删除') 13 | shutil.rmtree(merge_path) 14 | os.makedirs(merge_path) 15 | 16 | with open(mapping_path, 'r', encoding='utf-8') as file: 17 | lines = file.readlines() 18 | 19 | segments_to_merge = [] 20 | current_text_length = 0 21 | new_mapping = [] 22 | 23 | for line in tqdm(lines, desc=f"Processing {folder_name}", unit="line"): 24 | filename, text = line.strip().split("|") 25 | current_text_length += len(text) 26 | 27 | segments_to_merge.append((filename, text)) 28 | 29 | if current_text_length > max_length: 30 | merged_audio = AudioSegment.empty() 31 | merged_text = [] 32 | 33 | for seg_file, seg_text in segments_to_merge: 34 | audio_path = os.path.join(os.path.dirname(mapping_path), seg_file) 35 | segment_audio = AudioSegment.from_wav(audio_path) 36 | merged_audio += segment_audio 37 | merged_text.append(seg_text) 38 | 39 | merged_filename = f"{segments_to_merge[0][0]}_to_{segments_to_merge[-1][0]}" 40 | merged_audio.export(os.path.join(merge_path, merged_filename), format="wav") 41 | new_mapping.append(f"{merged_filename}|{','.join(merged_text)}") 42 | 43 | segments_to_merge = [] 44 | current_text_length = 0 45 | 46 | if segments_to_merge: 47 | merged_audio = AudioSegment.empty() 48 | merged_text = [] 49 | 50 | for seg_file, seg_text in segments_to_merge: 51 | audio_path = os.path.join(os.path.dirname(mapping_path), seg_file) 52 | segment_audio = AudioSegment.from_wav(audio_path) 53 | merged_audio += segment_audio 54 | merged_text.append(seg_text) 55 | 56 | merged_filename = f"{segments_to_merge[0][0]}_to_{segments_to_merge[-1][0]}" 57 | merged_audio.export(os.path.join(merge_path, merged_filename), format="wav") 58 | new_mapping.append(f"{merged_filename}|{' '.join(merged_text)}") 59 | 60 | with open(os.path.join(merge_path, "new_mapping.list"), 'w', encoding='utf-8') as file: 61 | for line in new_mapping: 62 | file.write(line + "\n") 63 | 64 | if __name__ == "__main__": 65 | parser = argparse.ArgumentParser(description="Merge short segments from mapping.list") 66 | parser.add_argument("--max", type=int, default=20, help="Maximum text length for a segment") 67 | args = parser.parse_args() 68 | 69 | for root, dirs, files in os.walk('output'): 70 | for folder in tqdm(dirs, desc="Merging folders", unit="folder"): 71 | merge_segments(f"./output/{folder}/clean_mapping.list", folder, args.max) 72 | -------------------------------------------------------------------------------- /pack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import shutil 4 | from shutil import copyfile 5 | from tqdm import tqdm 6 | 7 | 8 | def process_and_rename(character_name): 9 | dataset_path = f"./dataset/{character_name}" 10 | if not os.path.exists(dataset_path): 11 | os.makedirs(dataset_path) 12 | else: 13 | print(f'检测到"{dataset_path}" 已存在,执行删除') 14 | shutil.rmtree(dataset_path) 15 | os.makedirs(dataset_path) 16 | 17 | counter = 1 18 | 19 | all_folders = [folder for r, d, f in os.walk('./merge') for folder in d] 20 | for folder in tqdm(all_folders, desc="Processing folders", unit="folder"): 21 | mapping_path = os.path.join('./merge', folder, "new_mapping.list") 22 | if os.path.exists(mapping_path): 23 | with open(mapping_path, 'r', encoding='utf-8') as file: 24 | lines = file.readlines() 25 | 26 | for line in tqdm(lines, desc=f"Processing files in {folder}", unit="file", leave=False): 27 | old_filename, text = line.strip().split("|") 28 | old_filepath = os.path.join('./merge', folder, old_filename) 29 | 30 | new_filename = f"{character_name}_{counter}.wav" 31 | new_filepath = os.path.join(dataset_path, new_filename) 32 | new_mapping_entry = f"./dataset/{character_name}/{new_filename}|{character_name}|ZH|{text}" 33 | 34 | # Copy and rename the file 35 | copyfile(old_filepath, new_filepath) 36 | 37 | with open(os.path.join(dataset_path, "dataset_mapping.list"), 'a', encoding='utf-8') as dataset_file: 38 | dataset_file.write(new_mapping_entry + "\n") 39 | 40 | counter += 1 41 | 42 | 43 | if __name__ == "__main__": 44 | parser = argparse.ArgumentParser(description="Rename and restructure files based on character_name") 45 | parser.add_argument("character_name", type=str, help="Name of the character for restructuring") 46 | args = parser.parse_args() 47 | 48 | process_and_rename(args.character_name) 49 | -------------------------------------------------------------------------------- /split.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import shutil 4 | 5 | from pydub import AudioSegment 6 | from tqdm import tqdm 7 | 8 | 9 | def time_to_milliseconds(time_str): 10 | h, m, s = map(float, time_str.split(":")) 11 | return int(h * 3600000 + m * 60000 + s * 1000) 12 | 13 | 14 | def sanitize_filename(filename): 15 | # 过滤掉Windows上不允许的字符,并限制文件名的长度 16 | illegal_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*'] 17 | for char in illegal_chars: 18 | filename = filename.replace(char, '_') 19 | return filename[:247] # 247是为了保证后续可以添加后缀和索引 20 | 21 | 22 | def split_wav_by_srt(srt_path, wav_path, output_folder, sample_rate, mono, use_subtitle_as_name): 23 | if not os.path.exists(output_folder): 24 | os.makedirs(output_folder) 25 | else: 26 | print(f'检测到"{output_folder}" 已存在,执行删除') 27 | shutil.rmtree(output_folder) 28 | os.makedirs(output_folder) 29 | 30 | mapping = [] 31 | 32 | with open(srt_path, 'r', encoding='utf-8') as file: 33 | content = file.read() 34 | blocks = content.strip().split("\n\n") 35 | audio = AudioSegment.from_wav(wav_path) 36 | prj_name = os.path.basename(wav_path)[:-4] 37 | 38 | for block in tqdm(blocks, desc=f"Processing {prj_name}"): 39 | lines = block.split("\n") 40 | times = lines[1].split("-->") 41 | start_time, end_time = [time_to_milliseconds(t.strip().replace(",", ".")) for t in times] 42 | subtitle = " ".join(lines[2:]) 43 | 44 | segment = audio[start_time:end_time] 45 | 46 | if mono: 47 | segment = segment.set_channels(1) 48 | 49 | if sample_rate: 50 | segment = segment.set_frame_rate(sample_rate) 51 | 52 | if use_subtitle_as_name: 53 | filename = sanitize_filename(subtitle) + ".wav" 54 | idx = 1 55 | while os.path.exists(os.path.join(output_folder, prj_name, filename)): 56 | filename = sanitize_filename(subtitle) + f"_{idx}.wav" 57 | idx += 1 58 | else: 59 | filename = f"{start_time}_{end_time}.wav" 60 | mapping.append(f"{filename}|{subtitle}") 61 | 62 | if not os.path.exists(os.path.join(output_folder, prj_name)): 63 | os.makedirs(os.path.join(output_folder, prj_name)) 64 | segment.export(os.path.join(output_folder, prj_name, filename), format="wav", parameters=["-sample_fmt", "s16"]) 65 | 66 | if not use_subtitle_as_name: 67 | with open(os.path.join(output_folder, prj_name, "mapping.list"), "a", encoding="utf-8") as f: 68 | for line in mapping: 69 | f.write(line + "\n") 70 | 71 | 72 | if __name__ == "__main__": 73 | parser = argparse.ArgumentParser(description="Split WAVs based on SRT timings in a folder") 74 | parser.add_argument("--input_folder", type=str, default="input", help="Path to the input folder containing SRT and WAV files") 75 | parser.add_argument("--output_folder", type=str, default="output", help="Output folder path") 76 | parser.add_argument("--sample_rate", type=int, default=44100, help="Sample rate for output WAVs") 77 | parser.add_argument("--mono", action="store_true", help="Convert to mono") 78 | parser.add_argument("--use_subtitle_as_name", action="store_true", help="Use subtitle as filename") 79 | 80 | args = parser.parse_args() 81 | 82 | for root, dirs, files in os.walk(args.input_folder): 83 | for file in files: 84 | if file.endswith(".srt"): 85 | wav_file = file.replace(".srt", ".wav") 86 | if wav_file in files: 87 | split_wav_by_srt(os.path.join(root, file), os.path.join(root, wav_file), args.output_folder, 88 | args.sample_rate, args.mono, args.use_subtitle_as_name) 89 | --------------------------------------------------------------------------------