├── ASR-LLM-TTS ├── asr-model │ └── tts.txt ├── llm-model │ └── tts.txt ├── tts-model │ └── tts.txt ├── api.py ├── tts-api.py ├── new_TTS.py ├── ASR.py ├── GLM4调用API.py └── pureness.py ├── requirements.txt ├── README.md ├── demo ├── README.md └── openai_api.py ├── GLM4 ├── README.md └── GLM4-API.py └── video └── index.html /ASR-LLM-TTS/asr-model/tts.txt: -------------------------------------------------------------------------------- 1 | 把v3模型放这里 -------------------------------------------------------------------------------- /ASR-LLM-TTS/llm-model/tts.txt: -------------------------------------------------------------------------------- 1 | 把llm模型放这里 -------------------------------------------------------------------------------- /ASR-LLM-TTS/tts-model/tts.txt: -------------------------------------------------------------------------------- 1 | 把sovits模型放这里 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai==0.28.0 2 | keyboard 3 | faster-whisper 4 | fastapi==0.104.1 5 | uvicorn==0.24.0.post1 6 | requests 7 | modelscope==1.9.5 8 | transformers==4.41.2 9 | streamlit==1.24.0 10 | sentencepiece==0.1.99 11 | accelerate==0.24.1 12 | tiktoken==0.7.0 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SenseAI 2 | 3 | SenseAI 是一个集成了 ASR(自动语音识别)、LLM(大型语言模型)、TTS(文本转语音)以及监控功能的多功能 AI 机器人。它支持所有基于 OpenAI API 调用格式的模型,具有 LLM 模型流式输出的能力,并支持对话的打断功能。 4 | 5 | ## 快速开始 6 | 7 | ### 1. 构建虚拟环境 8 | 9 | ```bash 10 | conda create -n senseai python=3.10 -y 11 | conda activate senseai 12 | ``` 13 | 14 | ### 2. 安装依赖 15 | 16 | ```bash 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | 不写了,麻烦死了 21 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | `openai_api.py` 文件展示了一个基本的对话打断功能。 2 | 3 | 在该代码中,输出采用了流式处理方式,即模型的响应内容会逐步输出,而不是等到模型生成完整内容后再统一展示。这种流式输出的特性允许你在对话过程中通过外部操作(如按键)来打断模型的输出。 4 | 5 | 你可以通过以下命令运行该文件: 6 | 7 | ```bash 8 | python openai_api.py 9 | 10 | ``` 11 | 在模型输出内容时,你可以通过按下键盘上的 Tab 键来打断模型的讲话。即使你打断了对话,模型仍然会记住先前被打断的内容,允许你在后续继续询问与被打断内容相关的问题。 12 | 13 |  14 | -------------------------------------------------------------------------------- /ASR-LLM-TTS/api.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import uvicorn 4 | 5 | from llamafactory.api.app import create_app 6 | from llamafactory.chat import ChatModel 7 | 8 | 9 | def main(): 10 | chat_model = ChatModel() 11 | app = create_app(chat_model) 12 | api_host = os.environ.get("API_HOST", "0.0.0.0") 13 | api_port = int(os.environ.get("API_PORT", "8000")) 14 | print("Visit http://localhost:{}/docs for API document.".format(api_port)) 15 | uvicorn.run(app, host=api_host, port=api_port) 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /GLM4/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 代码`GLM4-API.py`是一个同步代码,支持在模型还在输出文字的时候,直接打断(按住tab键)以及可以通过输入clear清除和模型的对话历史 4 | 5 | 6 | 7 | 第一步,前往autodl打开这个机子 8 | 9 |  10 | 11 | 第二步,打开页面,复制指令: 12 | 13 |  14 | 15 | 最后一步,运行API指令 16 | 17 |  18 | 19 |  20 | 21 | 自此,即可在本地电脑运行此代码: 22 | 23 | ```bash 24 | python GLM4-API.py 25 | ``` 26 | -------------------------------------------------------------------------------- /demo/openai_api.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import keyboard 3 | 4 | api_key = "sk-zk2c80daf6787e81e9f45971926d7372e636644470813455" 5 | openai.api_base = "https://api.zhizengzeng.com/v1/" 6 | stop_flag = False 7 | 8 | 9 | def tab_key(): 10 | global stop_flag 11 | stop_flag = True 12 | print("\nAssistant: 已被打断.") 13 | 14 | 15 | keyboard.add_hotkey('tab', tab_key) 16 | 17 | 18 | def chat_with_gpt3_5(messages): 19 | global stop_flag 20 | response = openai.ChatCompletion.create( 21 | model="gpt-4o-2024-05-13", 22 | messages=messages, 23 | api_key=api_key, 24 | stream=True 25 | ) 26 | full_response = "" 27 | for chunk in response: 28 | if stop_flag: 29 | break 30 | content = chunk['choices'][0].get('delta', {}).get('content', '') 31 | if content: 32 | print(content, end='', flush=True) 33 | full_response += content 34 | print() # 处理完所有流时打印新行 35 | return full_response 36 | 37 | 38 | conversation = [ 39 | {"role": "system", "content": "你是一个聪明的AI"} 40 | ] 41 | 42 | while True: 43 | user_input = input("You: ") 44 | if user_input.lower() == '退出': 45 | print("Assistant: 再见!") 46 | break 47 | 48 | stop_flag = False # 每次新的输入前重置 stop_flag 49 | conversation.append({"role": "user", "content": user_input}) 50 | print("Assistant: ", end='', flush=True) 51 | assistant_message = chat_with_gpt3_5(conversation) 52 | 53 | # 即使被中断,也将部分响应添加到对话历史中 54 | if assistant_message: 55 | conversation.append({"role": "assistant", "content": assistant_message}) 56 | 57 | print("Assistant: 会话已终止。") -------------------------------------------------------------------------------- /GLM4/GLM4-API.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import argparse 4 | import keyboard 5 | 6 | stop_flag = False 7 | 8 | def interrupt_response(): 9 | global stop_flag 10 | stop_flag = True 11 | 12 | keyboard.add_hotkey('tab', interrupt_response) 13 | 14 | def stream_response(url, prompt, history): 15 | headers = {'Content-Type': 'application/json'} 16 | data = { 17 | "prompt": prompt, 18 | "history": history, 19 | "max_length": 2048, 20 | "top_p": 0.8, 21 | "temperature": 0.6 22 | } 23 | with requests.post(url, headers=headers, json=data, stream=True) as response: 24 | response.raise_for_status() 25 | for line in response.iter_lines(): 26 | if line: 27 | line = line.decode('utf-8') 28 | if line.startswith('data:'): 29 | yield json.loads(line[5:]) 30 | 31 | def chat_with_ai(url, prompt, history): 32 | global stop_flag 33 | full_response = "" 34 | print("AI: ", end="", flush=True) 35 | try: 36 | for chunk in stream_response(url, prompt, history): 37 | if stop_flag: 38 | break 39 | if 'response' in chunk: 40 | content = chunk['response'] 41 | print(content, end="", flush=True) 42 | full_response += content 43 | elif 'end_of_stream' in chunk: 44 | break 45 | except requests.RequestException as e: 46 | print(f"\n错误: {e}") 47 | print() 48 | return full_response 49 | 50 | def main(url): 51 | history = [] 52 | print(f"同步文本聊天系统已启动,使用服务器地址: {url}") 53 | print("开始聊天... (按Tab键中断AI的回应)") 54 | print("输入'clear'清除聊天历史") 55 | 56 | while True: 57 | user_input = input("你: ").strip() 58 | if user_input.lower() == 'clear': 59 | history = [] 60 | print("聊天历史已清除。开始新的对话。") 61 | continue 62 | 63 | global stop_flag 64 | stop_flag = False 65 | full_response = chat_with_ai(url, user_input, history) 66 | if full_response: 67 | history.append((user_input, full_response)) 68 | elif not stop_flag: 69 | print("警告: 未收到AI的回应。") 70 | 71 | keyboard.remove_hotkey('tab') 72 | 73 | if __name__ == "__main__": 74 | parser = argparse.ArgumentParser(description="同步文本聊天系统") 75 | parser.add_argument("--url", type=str, default="https://u456499-b362-14f1ece3.nma1.seetacloud.com:8448", 76 | help="服务器地址") 77 | args = parser.parse_args() 78 | main(args.url) 79 | -------------------------------------------------------------------------------- /ASR-LLM-TTS/tts-api.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os.path 3 | 4 | import ngrok 5 | from flask import Flask 6 | from flask_apscheduler import APScheduler 7 | from flask_login import LoginManager 8 | from flask_wtf import CSRFProtect 9 | 10 | from utils.data_utils import clean_folder, check_is_none 11 | from utils.phrases_dict import phrases_dict_init 12 | from tts_app.frontend.views import frontend 13 | from tts_app.voice_api.views import voice_api 14 | from tts_app.auth.views import auth 15 | from tts_app.admin.views import admin 16 | 17 | from contants import config 18 | 19 | app = Flask(__name__, template_folder=os.path.join(os.path.dirname(__file__), 'tts_app', 'templates'), 20 | static_folder=os.path.join(os.path.dirname(__file__), 'tts_app', 'static')) 21 | 22 | app.config.from_pyfile("config.py") 23 | # app.config.update(config) 24 | 25 | phrases_dict_init() 26 | 27 | csrf = CSRFProtect(app) 28 | # 禁用tts api请求的CSRF防护 29 | csrf.exempt(voice_api) 30 | 31 | if config.system.is_admin_enabled: 32 | login_manager = LoginManager() 33 | login_manager.init_app(app) 34 | login_manager.login_view = 'auth.login' 35 | 36 | 37 | @login_manager.user_loader 38 | def load_user(user_id): 39 | admin = config.admin 40 | if admin.get_id() == user_id: 41 | return admin 42 | return None 43 | 44 | # Initialize scheduler 45 | scheduler = APScheduler() 46 | scheduler.init_app(app) 47 | if config.system.clean_interval_seconds > 0: 48 | scheduler.start() 49 | 50 | app.register_blueprint(frontend, url_prefix='/') 51 | app.register_blueprint(voice_api, url_prefix='/voice') 52 | if config.system.is_admin_enabled: 53 | app.register_blueprint(auth, url_prefix=config.system.admin_route) 54 | app.register_blueprint(admin, url_prefix=config.system.admin_route) 55 | 56 | 57 | def create_folders(paths): 58 | for path in paths: 59 | if not os.path.exists(path): 60 | os.makedirs(path, exist_ok=True) 61 | 62 | 63 | create_folders([os.path.join(config.abs_path, config.system.upload_folder), 64 | os.path.join(config.abs_path, config.system.cache_path), ]) 65 | 66 | 67 | # regular cleaning 68 | @scheduler.task('interval', id='clean_task', seconds=config.system.clean_interval_seconds, 69 | misfire_grace_time=900) 70 | def clean_task(): 71 | clean_folder(os.path.join(config.abs_path, config.system.upload_folder)) 72 | clean_folder(os.path.join(config.abs_path, config.system.cache_path)) 73 | 74 | 75 | if __name__ == '__main__': 76 | try: 77 | if not check_is_none(config.ngrok_config.auth_token): 78 | listener = ngrok.forward(config.http_service.port, authtoken=config.ngrok_config.auth_token) 79 | 80 | logging.info(f"Ingress established at {listener.url()}") 81 | else: 82 | logging.info(f"Not using ngrok.") 83 | except Exception as e: 84 | logging.error(f"Not using ngrok. Authtoken error:{e}") 85 | 86 | app.run(host=config.http_service.host, port=config.http_service.port, debug=config.http_service.debug) 87 | -------------------------------------------------------------------------------- /ASR-LLM-TTS/new_TTS.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import io 3 | import pygame 4 | import keyboard 5 | import pureness 6 | import threading 7 | import queue 8 | import re 9 | import time 10 | 11 | c3 = pureness.CUT200() 12 | BASE_URL = "http://192.168.110.68:23456" 13 | stop_flag = threading.Event() 14 | audio_queue = queue.Queue() 15 | text_buffer = "" 16 | audio_playing = threading.Event() 17 | processing_lock = threading.Lock() 18 | 19 | def interrupt_playback(): 20 | global text_buffer 21 | stop_flag.set() 22 | print("\n操作被中断.") 23 | pygame.mixer.music.stop() 24 | clear_audio_queue() 25 | with processing_lock: 26 | text_buffer = "" # 清除待处理的文本 27 | 28 | keyboard.add_hotkey('tab', interrupt_playback) 29 | 30 | def clear_audio_queue(): 31 | with audio_queue.mutex: 32 | audio_queue.queue.clear() 33 | 34 | def play_audio_thread(): 35 | pygame.mixer.init() 36 | while True: 37 | try: 38 | audio_stream = audio_queue.get(timeout=1) 39 | if stop_flag.is_set(): 40 | continue 41 | audio_playing.set() 42 | pygame.mixer.music.load(audio_stream) 43 | pygame.mixer.music.play() 44 | while pygame.mixer.music.get_busy() and not stop_flag.is_set(): 45 | pygame.time.Clock().tick(10) 46 | pygame.mixer.music.stop() 47 | if audio_queue.empty(): 48 | audio_playing.clear() 49 | except queue.Empty: 50 | audio_playing.clear() 51 | 52 | def text_to_speech_stream(text, speaker_id=1): 53 | if stop_flag.is_set(): 54 | return None 55 | params = { 56 | "id": speaker_id, 57 | "segment_size": 5, 58 | "streaming": True, 59 | "prompt_lang": "auto", 60 | "prompt_text": "都红到耳朵根了,现在要是用嘴亲亲你的小脸蛋?", 61 | "preset": "default", 62 | "text": text 63 | } 64 | try: 65 | with requests.get(f"{BASE_URL}/voice/gpt-sovits", params=params, stream=True) as response: 66 | response.raise_for_status() 67 | return io.BytesIO(response.content) if not stop_flag.is_set() else None 68 | except requests.RequestException as e: 69 | print(f"网络请求错误: {e}") 70 | return None 71 | 72 | def text_to_speech(text, speaker_id=1, flush=False): 73 | global text_buffer 74 | with processing_lock: 75 | if stop_flag.is_set(): 76 | return 77 | text_buffer += text 78 | 79 | segments = re.split(r'([。?!.?!])', text_buffer) 80 | complete_segments = [''.join(segments[i:i+2]) for i in range(0, len(segments)-1, 2) if segments[i]] 81 | 82 | for segment in complete_segments: 83 | if stop_flag.is_set(): 84 | break 85 | audio_stream = text_to_speech_stream(segment, speaker_id) 86 | if audio_stream: 87 | audio_queue.put(audio_stream) 88 | 89 | text_buffer = segments[-1] if len(segments) % 2 == 1 else '' 90 | 91 | if flush and text_buffer and not stop_flag.is_set(): 92 | audio_stream = text_to_speech_stream(text_buffer, speaker_id) 93 | if audio_stream: 94 | audio_queue.put(audio_stream) 95 | text_buffer = "" 96 | 97 | def flush_tts_buffer(): 98 | text_to_speech("", flush=True) 99 | 100 | def wait_for_audio_complete(): 101 | while not audio_queue.empty() or audio_playing.is_set(): 102 | time.sleep(0.1) 103 | if stop_flag.is_set(): 104 | break 105 | 106 | def reset_stop_flag(): 107 | stop_flag.clear() 108 | clear_audio_queue() 109 | 110 | # 启动音频播放线程 111 | playback_thread = threading.Thread(target=play_audio_thread, daemon=True) 112 | playback_thread.start() -------------------------------------------------------------------------------- /ASR-LLM-TTS/ASR.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import numpy as np 3 | import webrtcvad 4 | from faster_whisper import WhisperModel 5 | import time 6 | import argparse 7 | from typing import List, Tuple 8 | 9 | 10 | class AudioRecorder: 11 | def __init__(self, chunk: int = 320, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 16000): 12 | self.chunk = chunk 13 | self.format = format 14 | self.channels = channels 15 | self.rate = rate 16 | self.p = pyaudio.PyAudio() 17 | self.vad = webrtcvad.Vad(3) # 使用最严格的VAD模式 18 | 19 | def record(self) -> np.ndarray: 20 | stream = self.p.open(format=self.format, channels=self.channels, rate=self.rate, input=True, 21 | frames_per_buffer=self.chunk) 22 | frames = [] 23 | num_silent_frames = 0 24 | triggered = False 25 | 26 | try: 27 | while True: 28 | data = stream.read(self.chunk) 29 | is_speech = self.vad.is_speech(data, self.rate) 30 | 31 | if not triggered: 32 | if is_speech: 33 | triggered = True 34 | else: 35 | continue 36 | 37 | frames.append(data) 38 | 39 | if not is_speech: 40 | num_silent_frames += 1 41 | if num_silent_frames > 30: # 大约1秒的静音 42 | break 43 | else: 44 | num_silent_frames = 0 45 | finally: 46 | stream.stop_stream() 47 | stream.close() 48 | 49 | return np.frombuffer(b''.join(frames), dtype=np.int16).astype(np.float32) / 32768.0 50 | 51 | def __del__(self): 52 | self.p.terminate() 53 | 54 | 55 | class SpeechRecognizer: 56 | def __init__(self, model_path: str, device: str = "cuda"): 57 | self.model = WhisperModel(model_path, device=device, local_files_only=True) 58 | 59 | def transcribe(self, audio: np.ndarray) -> str: 60 | segments, _ = self.model.transcribe(audio, beam_size=5, language="zh", vad_filter=True, 61 | vad_parameters=dict(min_silence_duration_ms=1000)) 62 | return " ".join([segment.text for segment in segments]).strip() 63 | 64 | 65 | def is_valid_input(text: str, min_chars: int = 2) -> bool: 66 | cleaned_text = ''.join(ch for ch in text if not (ch.isspace() or ch in ',。!?;:""''()、')) 67 | return len(cleaned_text) >= min_chars 68 | 69 | 70 | def main(model_path: str, max_retries: int = 3): 71 | recorder = AudioRecorder() 72 | recognizer = SpeechRecognizer(model_path) 73 | 74 | print("ASR系统已启动,请开始说话...") 75 | 76 | try: 77 | while True: 78 | for attempt in range(max_retries): 79 | print(f"请说话... (尝试 {attempt + 1}/{max_retries})") 80 | audio_data = recorder.record() 81 | transcription = recognizer.transcribe(audio_data) 82 | 83 | if is_valid_input(transcription): 84 | print(f"识别结果: {transcription}") 85 | break 86 | else: 87 | print("未检测到有效语音输入,请重新说话...") 88 | else: 89 | print("多次尝试未检测到有效输入,请检查麦克风或环境噪音...") 90 | 91 | if transcription.lower() == '退出': 92 | print("程序退出!") 93 | break 94 | 95 | time.sleep(1) # 防止CPU占用过高 96 | 97 | except KeyboardInterrupt: 98 | print("程序被用户中断") 99 | 100 | 101 | if __name__ == "__main__": 102 | parser = argparse.ArgumentParser(description="ASR System") 103 | parser.add_argument("--model_path", type=str, default=r"B:\faster-whisper\v3", help="Path to the Whisper model") 104 | parser.add_argument("--max_retries", type=int, default=3, help="Maximum number of retry attempts") 105 | args = parser.parse_args() 106 | 107 | main(args.model_path, args.max_retries) -------------------------------------------------------------------------------- /ASR-LLM-TTS/GLM4调用API.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import argparse 4 | from typing import List, Tuple 5 | import keyboard 6 | from ASR import AudioRecorder, SpeechRecognizer 7 | from new_TTS import text_to_speech, flush_tts_buffer, wait_for_audio_complete, reset_stop_flag 8 | 9 | # 全局变量 10 | stop_flag = False 11 | 12 | 13 | def interrupt_response(): 14 | global stop_flag 15 | stop_flag = True 16 | print("\nAI: 已被打断.") 17 | 18 | 19 | keyboard.add_hotkey('tab', interrupt_response) 20 | 21 | 22 | def stream_response(url: str, prompt: str, history: List[Tuple[str, str]]): 23 | try: 24 | headers = {'Content-Type': 'application/json'} 25 | data = { 26 | "prompt": prompt, 27 | "history": history, 28 | "max_length": 2048, 29 | "top_p": 0.8, 30 | "temperature": 0.6 31 | } 32 | response = requests.post(url, headers=headers, json=data, stream=True) 33 | response.raise_for_status() 34 | 35 | for line in response.iter_lines(): 36 | if line: 37 | line = line.decode('utf-8') 38 | if line.startswith('data:'): 39 | yield json.loads(line[5:]) 40 | except requests.RequestException as e: 41 | print(f"An error occurred: {e}") 42 | yield {"error": str(e)} 43 | 44 | 45 | def chat_with_ai(url: str, prompt: str, history: List[Tuple[str, str]]) -> str: 46 | global stop_flag 47 | full_response = "" 48 | print("AI: ", end="", flush=True) 49 | 50 | for chunk in stream_response(url, prompt, history): 51 | if stop_flag: 52 | break 53 | if 'error' in chunk: 54 | print(f"\n错误: {chunk['error']}") 55 | break 56 | if 'response' in chunk: 57 | content = chunk['response'] 58 | print(content, end="", flush=True) 59 | text_to_speech(content) 60 | full_response += content 61 | elif 'end_of_stream' in chunk: 62 | break 63 | 64 | if not stop_flag: 65 | flush_tts_buffer() 66 | print() # 换行 67 | return full_response 68 | 69 | 70 | def main(url: str, model_path: str, max_retries: int = 3): 71 | recorder = AudioRecorder() 72 | recognizer = SpeechRecognizer(model_path) 73 | history = [] 74 | 75 | print(f"语音交互系统已启动,使用服务器地址: {url}") 76 | print("请开始说话...") 77 | 78 | try: 79 | while True: 80 | wait_for_audio_complete() 81 | 82 | for attempt in range(max_retries): 83 | print(f"请说话... (尝试 {attempt + 1}/{max_retries})") 84 | audio_data = recorder.record() 85 | user_input = recognizer.transcribe(audio_data) 86 | 87 | if user_input and len(user_input.strip()) >= 2: 88 | print(f"识别结果: {user_input}") 89 | break 90 | else: 91 | print("未检测到有效语音输入,请重新说话...") 92 | else: 93 | print("多次尝试未检测到有效输入,请检查麦克风或环境噪音...") 94 | continue 95 | 96 | if user_input.lower() == '退出': 97 | print("AI: 再见!") 98 | break 99 | 100 | global stop_flag 101 | stop_flag = False 102 | reset_stop_flag() 103 | 104 | full_response = chat_with_ai(url, user_input, history) 105 | 106 | if full_response: 107 | history.append((user_input, full_response)) 108 | 109 | except KeyboardInterrupt: 110 | print("程序被用户中断") 111 | finally: 112 | print("AI: 会话已终止。") 113 | wait_for_audio_complete() 114 | keyboard.remove_hotkey('tab') 115 | 116 | 117 | if __name__ == "__main__": 118 | parser = argparse.ArgumentParser(description="语音交互系统") 119 | parser.add_argument("--url", type=str, default="http://localhost:6006", help="服务器地址") 120 | parser.add_argument("--model_path", type=str, default=r"B:\faster-whisper\v3", help="Whisper模型路径") 121 | parser.add_argument("--max_retries", type=int, default=3, help="最大重试次数") 122 | args = parser.parse_args() 123 | 124 | main(args.url, args.model_path, args.max_retries) -------------------------------------------------------------------------------- /ASR-LLM-TTS/pureness.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | import ffmpeg 5 | import ast 6 | import requests 7 | import logging 8 | import time 9 | import sys 10 | 11 | logging.basicConfig( 12 | level=logging.INFO, # 设置日志级别为INFO 13 | format='%(asctime)s [%(levelname)s] %(message)s', 14 | handlers=[ 15 | 16 | logging.StreamHandler() # 输出到控制台 17 | ] 18 | ) 19 | limit = 2000 20 | sys.setrecursionlimit(limit) 21 | 22 | 23 | def role2id(role): 24 | speaker = m1.get_speaker() 25 | for s in range(len(speaker)): 26 | if role == speaker[s]: 27 | return s 28 | 29 | 30 | def id2role(id): 31 | import re 32 | speaker = m1.get_speaker() 33 | for e in range(len(speaker)): 34 | pattern = r"\(.*?\)" 35 | speaker[e] = re.sub(pattern, "", speaker[e], re.S) 36 | return speaker[int(id)] 37 | 38 | 39 | class CUT200: 40 | def __init__(self): 41 | self.new = [] 42 | self.newnew = [] 43 | 44 | def slice_string(self, text): 45 | 46 | if len(text) < 100: 47 | return text 48 | 49 | mid_index = len(text) // 2 50 | left_part = '' 51 | right_part = '' 52 | 53 | for i in range(mid_index, -1, -1): 54 | if text[i] in [',', '。', '!', '?', '!', '.', '?', '~', '~', ',', '.']: # '、', '」', '」', '“', ',', '。' 55 | left_part = text[:i + 1] 56 | right_part = text[i + 1:] 57 | break 58 | 59 | return left_part, right_part 60 | 61 | def str2list(self, content): 62 | if isinstance(content, str): 63 | content_list = [''] 64 | content_list[0] = content 65 | 66 | return self.main_cutting(content_list) 67 | 68 | def main_cutting(self, content): 69 | 70 | self.newnew = content 71 | start = time.time() 72 | for i in range(50): 73 | self.new = [] 74 | 75 | for l in self.newnew: 76 | 77 | if len(l) > 100: 78 | 79 | left, right = self.slice_string(l) 80 | 81 | self.new.append(left) 82 | self.new.append(right) 83 | 84 | else: 85 | selfless = self.slice_string(l) 86 | self.new.append(selfless) 87 | # self.new = [item for item in content_list if item != '' and item is not None] 88 | string_list = [] 89 | for item in self.new: 90 | string_list.append(str(item)) 91 | self.new = [item for item in self.new if item != '' and item is not None] 92 | self.new = string_list 93 | self.newnew = self.new 94 | 95 | end = time.time() 96 | print(f'切片完成,耗时{end - start}') 97 | return [item for item in self.new if item != '' and item is not None] 98 | 99 | 100 | class conbined_wavs: 101 | 102 | def __init__(self): 103 | self.detail_name = None 104 | self.name = None 105 | 106 | def make_valid_filename(self, filename): 107 | import re 108 | import os 109 | # 去除非法字符 110 | valid_filename = re.sub(r'[<>:"/\\|?*\s]', '', filename) 111 | 112 | # # 删除连续的空格 113 | # valid_filename = re.sub(r'\s+', ' ', valid_filename) 114 | 115 | return valid_filename 116 | 117 | def add_name(self, name): 118 | self.name = self.make_valid_filename(str(name)) 119 | 120 | def conbine(self, wavs): 121 | import ffmpeg 122 | content_list = wavs 123 | ffmpeg_path = r'./ffmpeg-master-latest-win64-gpl/bin/ffmpeg.exe' 124 | 125 | # 设置输入文件和输出文件路径 126 | input_files = content_list # 假设content_list是包含待拼接语音文件的列表 127 | # output_file = '路径/输出文件.wav' 128 | 129 | input_streams = [ffmpeg.input(filename) for filename in input_files] 130 | output = ffmpeg.concat(*input_streams, v=0, a=1) 131 | 132 | out_filename = f'{self.name}.wav' 133 | output = ffmpeg.output(output, out_filename) 134 | 135 | ffmpeg.run(output, cmd=ffmpeg_path) 136 | 137 | # inputs = [] 138 | # for file in input_files: 139 | # inputs.append(ffmpeg.input(file)) 140 | # 141 | # # 合并输入流到一个输出流中 142 | # output = ffmpeg.concat(*inputs, v=0, a=1).output(f'{self.name}.wav') 143 | # ffmpeg.run(output, cmd=ffmpeg_path) 144 | # for file_name in content_list: 145 | # if os.path.exists(file_name): 146 | # os.remove(file_name) 147 | # print(f"已删除临时文件: {file_name}") 148 | # else: 149 | # print(f"临时文件不存在: {file_name}") 150 | 151 | 152 | 153 | 154 | class main: 155 | def __init__(self): 156 | self.content = [] 157 | self.wavs = [] 158 | 159 | def read(self, speakers, tempid, _long, noise, noisew, content, filename): 160 | self.content = content.replace('\n ', '').replace('\n', '').replace(' ', '').replace(' ', 161 | '').replace( 162 | 'amp;', '') 163 | if self.content: 164 | self.content = c3.str2list(self.content) 165 | length = len(self.content) 166 | speaker = '123' 167 | files = [] 168 | 169 | for i in range(len(self.content)): 170 | pattern = r'恢(.*?)皿' 171 | self.content[i] = re.sub(pattern, r'恢父皿', self.content[i]) 172 | self.content = [item.replace('炸一炸', '杂一杂') for item in self.content] 173 | # print(self.content) 174 | for i in range(length): 175 | time.sleep(0.01) 176 | file = f'./temp/{time.time()}.wav' 177 | files.append(file) 178 | index = [0] * length 179 | print(self.content) 180 | for i in range(length): 181 | process = f'{i}/{length}' 182 | logging.info(f'{process}|{speaker}|{self.content[i][:1]}......') 183 | # url = f'https://www.纯度.site/run?text={self.content[i]}&id_speaker={tempid}&length={_long}&noise={noise}&noisew={noisew}' 184 | url = f'http://cn-hk-bgp-6.ofalias.net:28666/tts?text={self.content[i]}&batch_size=8' 185 | r = requests.get(url) 186 | stream = io.BytesIO(r.content) 187 | 188 | with open(files[i], "wb") as f: 189 | f.write(stream.getvalue()) 190 | file_size = os.path.getsize(files[i]) 191 | 192 | if file_size < 1024: 193 | index[i] = 0 194 | os.remove(files[i]) 195 | else: 196 | self.wavs.append(files[i]) 197 | index[i] = 1 198 | 199 | for j in range(30): 200 | for i in range(len(index)): 201 | if index[i] == 0: 202 | process = f'合成失败音频{i}' 203 | logging.info(f'{process}|{speaker}|{self.content[i][:1]}......') 204 | # url = f'https://www.纯度.site/run?text={self.content[i]}&id_speaker={tempid}&length={_long}&noise={noise}&noisew={noisew}' 205 | 206 | url = f'http://cn-hk-bgp-6.ofalias.net:28666/tts?text={self.content[i]}&batch_size=8' 207 | print(url) 208 | r = requests.get(url) 209 | stream = io.BytesIO(r.content) 210 | with open(files[i], "wb") as f: 211 | f.write(stream.getvalue()) 212 | file_size = os.path.getsize(files[i]) 213 | if file_size < 1024: # 1KB = 1024 bytes 214 | index[i] = 0 215 | os.remove(files[i]) 216 | else: 217 | index[i] = 1 218 | c1.add_name(f'{filename}') 219 | print(files, 'wavs') 220 | c1.conbine(files) 221 | print(f'{filename}合成完成') 222 | m1.del_folders(files) 223 | self.wavs = [] 224 | 225 | def del_folders(self, wavs): 226 | import os 227 | 228 | # 获取目标文件夹中的所有文件名 229 | 230 | # 遍历文件名,并删除文件 231 | for wav in wavs: 232 | os.remove(wav) 233 | print(f'删除{wav}') 234 | 235 | def get_speaker(self): 236 | # r=requests.get(url='https://www.baidu.com') 237 | # data_dict = r.json()["models"] # 将 JSON 响应转换为字典 238 | # new_list = [item.split(':')[1].strip() for item in data_dict] 239 | data_dict=[] 240 | print(data_dict) 241 | 242 | return data_dict 243 | 244 | if __name__ == '__main__': 245 | c1 = conbined_wavs() 246 | c3 = CUT200() 247 | m1 = main() 248 | try: 249 | import os 250 | 251 | # 指定目标文件夹的路径 252 | folder_path = 'vits' 253 | 254 | # 获取目标文件夹中的所有文件名 255 | file_names = os.listdir(folder_path) 256 | file_paths = [] 257 | 258 | for file_name in file_names: 259 | file_path = os.path.join(folder_path, file_name) 260 | file_paths.append(file_path) 261 | 262 | speakers = m1.get_speaker() 263 | tempid = int(input('输入模型ID(输入数字:')) 264 | _long = float(input('输入长度(越长越慢,按回车选取默认值1.1):') or 1.1) 265 | noise = float(input('输入控制感情起伏(按回车选取默认值0.37):') or 0.37) 266 | noisew = float(input('输入控制音素发音长度(按回车选取默认值0.2):') or 0.2) 267 | file_names = sorted(file_names) 268 | for file_path in range(len(file_paths)): 269 | with open(file_paths[file_path], 'r', encoding='utf-8') as file: 270 | content = file.read() 271 | 272 | m1.read(speakers, tempid, _long, noise, noisew, content, file_names[file_path]) 273 | # raise Exception("gg") 274 | except Exception as e: 275 | print("发生错误:", str(e), '遇到问题请加群691432604') 276 | input("按任意键继续...") 277 | sys.exit(1) 278 | # input('出现意料之外的情况,联系qq1071718696') 279 | # print(m1.get_speaker()) 280 | -------------------------------------------------------------------------------- /video/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 |