├── LICENSE ├── README.md ├── __init__.py ├── asr.py ├── config.yaml ├── dialogue.py ├── llm.py ├── main.py ├── memory.py ├── player.py ├── plugins ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── registry.cpython-311.pyc │ └── task_manager.cpython-311.pyc ├── function_calls_config.json ├── function_manager.py ├── functions │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── aigc_manus.cpython-311.pyc │ │ ├── get_day_of_week.cpython-311.pyc │ │ ├── get_weather.cpython-311.pyc │ │ ├── ielts_speaking_practice.cpython-311.pyc │ │ ├── open_application.cpython-311.pyc │ │ ├── schedule_task.cpython-311.pyc │ │ ├── search_local_documents.cpython-311.pyc │ │ └── web_search.cpython-311.pyc │ ├── aigc_manus.py │ ├── get_day_of_week.py │ ├── get_weather.py │ ├── ielts_speaking_practice.py │ ├── open_application.py │ ├── schedule_task.py │ ├── search_local_documents.py │ └── web_search.py ├── registry.py └── task_manager.py ├── recorder.py ├── robot.py ├── tts.py ├── utils.py └── vad.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 ZhangChao-maker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🌟**EdgePersona**- 全本地化智能数字人 2 | 3 | ​**完全离线 | 隐私无忧 | 轻量高效 |笔记本友好** 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 5 | 6 | ![image](https://github.com/user-attachments/assets/80500d00-1bff-42ae-9db2-d803a63d5ee6) 7 | 8 | - 测试效果:【纯本地部署的电子魅魔!笔记本也能离线,隐私性拉满】 https://www.bilibili.com/video/BV1jydeYTETD/?share_source=copy_web&vd_source=91103f439a8a41ea0b41d31764b980eb 9 | 10 | ## 🚀 核心特性 11 | - ​**100%本地化运行** - 无需网络连接,所有数据处理在本地完成 12 | - ​**多模态交互** - 支持语音对话、表情控制、肢体动作同步响应 13 | - ​**超低硬件要求** - 实测笔记本NVIDIA 3060显卡即可实时运行 14 | - ​**角色深度定制** - 通过YAML配置文件自定义人物性格/动作/语音风格 15 | - ​**多模型支持** - 兼容Ollama、HuggingFace、GGUF等主流模型格式 16 | 17 | ## 📦 快速开始 18 | 19 | ### 环境要求 20 | - NVIDIA显卡(≥6GB VRAM) 21 | - Python >=3.11.11 22 | - Windows/Linux/macOS(M系列芯片需启用Metal加速) 23 | 24 | ### 安装步骤 25 | ```bash 26 | # 克隆仓库 27 | git clone https://github.com/zc-maker/EdgePersona.git 28 | 29 | # 创建虚拟环境(推荐) 30 | conda #推荐 31 | # 安装依赖 32 | pip install -r requirements.txt 33 | 34 | 35 | 启动数字人 36 | bash 37 | python main.py 38 | 39 | 🎮 操作指南 40 | 角色配置文件示例: 41 | main.py 42 | 43 | 声音配置: 44 | 使用KOKOROTTS速度最快。 45 | 46 | 如何显卡比较好的话,可以将声音换成CosyVoice2TTS,已经在tts.py中进行了实现,但是我的电脑有点慢,具体需要修改tts.py中的prompt_text="your.wav文本",ref_path = 'your.wav' 47 | 48 | llm本地配置: 49 | self.base_url = 'http://localhost:11434/v1' 50 | self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) 51 | self.model_name = 'qwq-0.5b:latest' 52 | 53 | 数字人形象配置: 54 | player.py PygameSoundPlayer 55 | 56 | model_path="../../live2/models/兔兔/520兔兔.model3.json" 57 | ``` 58 | 59 | 60 | 🏆 性能基准 61 | 硬件配置 推理延迟 显存占用 62 | 63 | 🙌 本项目基于以下优秀开源项目构建: 64 | 65 | - bailing:https://github.com/wwbin2017/bailing 66 | 67 | - live2d-py:https://github.com/Arkueid/live2d-py 68 | 69 | - Ollama - 本地大模型运行框架 70 | 71 | - KokoroTTS - 超低延迟语音合成 https://github.com/hexgrad/kokoro 72 | 73 | 📜 开源协议 74 | 本项目采用 MIT 许可证 - 详情请参阅 LICENSE 文件 75 | 76 | ​​让AI交互回归本质​​ - 无需云端依赖,即刻拥有您的私人数字助手! 🤖 77 | 78 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/__init__.py -------------------------------------------------------------------------------- /asr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import wave 4 | from abc import ABC, abstractmethod 5 | import logging 6 | from datetime import datetime 7 | 8 | from funasr import AutoModel 9 | from funasr.utils.postprocess_utils import rich_transcription_postprocess 10 | 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class ASR(ABC): 16 | @staticmethod 17 | def _save_audio_to_file(audio_data, file_path): 18 | """将音频数据保存为WAV文件""" 19 | try: 20 | with wave.open(file_path, 'wb') as wf: 21 | wf.setnchannels(1) 22 | wf.setsampwidth(2) 23 | wf.setframerate(16000) 24 | wf.writeframes(b''.join(audio_data)) 25 | logger.info(f"ASR识别文件录音保存到:{file_path}") 26 | except Exception as e: 27 | logger.error(f"保存音频文件时发生错误: {e}") 28 | raise 29 | 30 | @abstractmethod 31 | def recognizer(self, stream_in_audio): 32 | """处理输入音频流并返回识别的文本,子类必须实现""" 33 | pass 34 | 35 | 36 | class FunASR(ASR): 37 | def __init__(self, config): 38 | self.model_dir = config.get("model_dir") 39 | self.output_dir = config.get("output_file") 40 | 41 | self.model = AutoModel( 42 | model=self.model_dir, 43 | vad_kwargs={"max_single_segment_time": 30000}, 44 | disable_update=True, 45 | hub="hf" 46 | # device="cuda:0", # 如果有GPU,可以解开这行并指定设备 47 | ) 48 | 49 | def recognizer(self, stream_in_audio): 50 | try: 51 | tmpfile = os.path.join(self.output_dir, f"asr-{datetime.now().date()}@{uuid.uuid4().hex}.wav") 52 | self._save_audio_to_file(stream_in_audio, tmpfile) 53 | 54 | res = self.model.generate( 55 | input=tmpfile, 56 | cache={}, 57 | language="auto", # 语言选项: "zn", "en", "yue", "ja", "ko", "nospeech" 58 | use_itn=True, 59 | batch_size_s=60, 60 | ) 61 | 62 | text = rich_transcription_postprocess(res[0]["text"]) 63 | logger.info(f"识别文本: {text}") 64 | return text, tmpfile 65 | 66 | except Exception as e: 67 | logger.error(f"ASR识别过程中发生错误: {e}") 68 | return None, None 69 | 70 | 71 | def create_instance(class_name, *args, **kwargs): 72 | # 获取类对象 73 | cls = globals().get(class_name) 74 | if cls: 75 | # 创建并返回实例 76 | return cls(*args, **kwargs) 77 | else: 78 | raise ValueError(f"Class {class_name} not found") -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | name: 百聆(bailing) 2 | version: 1.0 3 | 4 | logging: 5 | level: debug 6 | 7 | # 唤醒词 8 | WakeWord: 百聆 9 | 10 | interrupt: false 11 | # 是否开启工具调用 12 | StartTaskMode: false 13 | # 具体处理时选择的模块 14 | selected_module: 15 | Recorder: RecorderPyAudio 16 | ASR: FunASR 17 | VAD: SileroVAD 18 | LLM: OpenAILLM 19 | # TTS: EdgeTTS 20 | TTS: KOKOROTTS 21 | Player: PygameSoundPlayer 22 | 23 | Recorder: 24 | RecorderPyAudio: 25 | output_file: tmp/ 26 | 27 | ASR: 28 | FunASR: 29 | model_dir: ../SenseVoiceSmall 30 | output_file: tmp/ 31 | 32 | VAD: 33 | SileroVAD: 34 | sampling_rate: 16000 35 | threshold: 0.5 36 | min_silence_duration_ms: 300 # 如果说话停顿比较长,可以把这个值设置大一些 37 | 38 | LLM: 39 | OpenAILLM: 40 | # model_name: deepseek-chat 41 | # url: https://api.deepseek.com 42 | # api_key: 43 | model_name: llama3.2:latest 44 | url: http://localhost:11434/v1 45 | api_key: test 46 | 47 | TTS: 48 | MacTTS: 49 | voice: Tingting 50 | output_file: tmp/ 51 | EdgeTTS: 52 | voice: zh-CN-XiaoxiaoNeural 53 | output_file: tmp/ 54 | GTTS: 55 | lang: zh 56 | output_file: tmp/ 57 | CosyvoiceTTS: 58 | output_file: tmp/ 59 | CHATTTS: 60 | output_file: tmp/ 61 | KOKOROTTS: 62 | output_file: tmp/ 63 | lang: z 64 | voice: zf_xiaoxiao 65 | 66 | Player: 67 | PygameSoundPlayer: null 68 | PygamePlayer: null 69 | CmdPlayer: null 70 | PyaudioPlayer: null 71 | 72 | Rag: 73 | doc_path: documents/ 74 | emb_model: models/bge-small-zh 75 | model_name: llama3.2:latest 76 | url: http://localhost:11434/v1 77 | api_key: test 78 | 79 | Memory: 80 | dialogue_history_path: tmp/ 81 | memory_file: tmp/memory.json 82 | model_name: llama3.2:latest 83 | url: http://localhost:11434/v1 84 | api_key: test 85 | 86 | TaskManager: 87 | functions_call_name: plugins/function_calls_config.json 88 | aigc_manus_enabled: false 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /dialogue.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import uuid 3 | from typing import List, Dict 4 | from datetime import datetime 5 | from utils import write_json_file 6 | 7 | 8 | class Message: 9 | def __init__(self, role: str, content: str = None, uniq_id: str = None, start_time: datetime = None, end_time: datetime = None, 10 | audio_file: str = None, tts_file: str = None, vad_status: list = None, tool_calls = None, tool_call_id=None): 11 | self.uniq_id = uniq_id if uniq_id is not None else str(uuid.uuid4()) 12 | self.role = role 13 | self.content = content 14 | self.start_time = start_time 15 | self.end_time = end_time 16 | self.audio_file = audio_file 17 | self.tts_file = tts_file 18 | self.vad_status = vad_status 19 | self.tool_calls = tool_calls 20 | self.tool_call_id = tool_call_id 21 | 22 | 23 | class Dialogue: 24 | def __init__(self, dialogue_history_path): 25 | self.dialogue_history_path = dialogue_history_path 26 | self.dialogue: List[Message] = [] 27 | # 获取当前时间 28 | self.current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 29 | 30 | def put(self, message: Message): 31 | self.dialogue.append(message) 32 | 33 | def get_llm_dialogue(self) -> List[Dict[str, str]]: 34 | dialogue = [] 35 | for m in self.dialogue: 36 | if m.tool_calls is not None: 37 | dialogue.append({"role": m.role, "tool_calls": m.tool_calls}) 38 | elif m.role == "tool": 39 | dialogue.append({"role": m.role, "tool_call_id": m.tool_call_id, "content": m.content}) 40 | else: 41 | dialogue.append({"role": m.role, "content": m.content}) 42 | return dialogue 43 | 44 | def dump_dialogue(self): 45 | dialogue = [] 46 | for d in self.get_llm_dialogue(): 47 | if d["role"] not in ("user", "assistant"): 48 | continue 49 | dialogue.append(d) 50 | file_name = os.path.join(self.dialogue_history_path, f"dialogue-{self.current_time}.json") 51 | write_json_file(file_name, dialogue) 52 | 53 | if __name__ == "__main__": 54 | d = Dialogue("../tmp/") 55 | d.put(Message(role="user", content="你好")) 56 | d.dump_dialogue() -------------------------------------------------------------------------------- /llm.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import openai 3 | import logging 4 | 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class LLM(ABC): 10 | @abstractmethod 11 | def response(self, dialogue): 12 | pass 13 | 14 | 15 | class OpenAILLM(LLM): 16 | def __init__(self, *args, **kwargs): 17 | self.model_name = 'qwq:latest' 18 | self.api_key = 'null' 19 | # self.base_url = 'http://localhost:11434/v1' 20 | self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) 21 | 22 | def response(self, dialogue): 23 | # dialogue = [{"role": "user", "content": "hello"}] 24 | try: 25 | responses = self.client.chat.completions.create( #) ChatCompletion.create( 26 | model=self.model_name, 27 | messages=dialogue, 28 | stream=True 29 | ) 30 | for chunk in responses: 31 | yield chunk.choices[0].delta.content 32 | 33 | except Exception as e: 34 | logger.error(f"Error in response generation: {e}") 35 | 36 | def response_call(self, dialogue, functions_call): 37 | print("dialogue", dialogue) 38 | print("functions_call", functions_call) 39 | # dialogue = [{"role": "user", "content": "hello"}] 40 | try: 41 | responses = self.client.chat.completions.create( #) ChatCompletion.create( 42 | model=self.model_name, 43 | messages=dialogue, 44 | stream=True, 45 | ) 46 | for chunk in responses: 47 | yield chunk.choices[0].delta.content, chunk.choices[0].delta.tool_calls 48 | 49 | except Exception as e: 50 | logger.error(f"Error in response generation: {e}") 51 | 52 | 53 | def create_instance(class_name, *args, **kwargs): 54 | # 获取类对象 55 | cls = globals().get(class_name) 56 | if cls: 57 | # 创建并返回实例 58 | return cls(*args, **kwargs) 59 | else: 60 | raise ValueError(f"Class {class_name} not found") 61 | 62 | 63 | if __name__ == "__main__": 64 | # 创建 DeepSeekLLM 的实例 65 | # deepseek = create_instance("OpenAILLM", api_key="llm", base_url="http://localhost:11434/v1") 66 | deepseek = create_instance("OpenAILLM",'llm') 67 | dialogue = [{"role": "user", "content": "hello"}] 68 | 69 | # 打印逐步生成的响应内容 70 | for chunk in deepseek.response(dialogue): 71 | print(chunk) 72 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from langchain.embeddings import HuggingFaceBgeEmbeddings 2 | from langchain.vectorstores import FAISS 3 | from langchain_chroma import Chroma 4 | from langchain.document_loaders import DirectoryLoader, TextLoader 5 | from langchain_core.prompts import PromptTemplate 6 | 7 | from langchain_core.output_parsers import StrOutputParser 8 | from langchain_core.runnables import RunnablePassthrough 9 | from langchain_text_splitters import RecursiveCharacterTextSplitter 10 | from langchain_openai import ChatOpenAI 11 | 12 | 13 | prompt_template = """请根据以下上下文回答最后的问题。如果你不知道答案,请直接说不知道,切勿编造答案。回答应简洁明了,最多使用三句话,确保直接针对问题,并鼓励提问者提出更多问题。 14 | 15 | {context} 16 | 17 | 问题:{question} 18 | 19 | 有帮助的答案:""" 20 | 21 | class Rag: 22 | _instance = None 23 | 24 | def __new__(cls, config: dict=None): 25 | if cls._instance is None: 26 | cls._instance = super(Rag, cls).__new__(cls) 27 | cls._instance.init(config) # 初始化实例属性 28 | return cls._instance 29 | 30 | def init(self, config: dict): 31 | self.doc_path = config.get("doc_path") 32 | self.emb_model = config.get("emb_model") 33 | self.template = prompt_template 34 | self.custom_rag_prompt = PromptTemplate.from_template(self.template) 35 | self.llm = ChatOpenAI(model=config.get("model_name") 36 | , base_url=config.get("base_url"), api_key=config.get("api_key")) 37 | # 定义加载器,支持不同文档类型 38 | loader = DirectoryLoader( 39 | self.doc_path, 40 | glob="**/*.md", 41 | loader_cls= TextLoader, 42 | ) 43 | documents = loader.load() 44 | 45 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 46 | splits = text_splitter.split_documents(documents) 47 | 48 | model_kwargs = {'device': 'cpu'} 49 | encode_kwargs = {'normalize_embeddings': True} 50 | embedding_model = HuggingFaceBgeEmbeddings(model_name=self.emb_model 51 | , model_kwargs=model_kwargs 52 | , encode_kwargs=encode_kwargs) 53 | 54 | #embeddings = embedding_model.embed_documents([doc.content for doc in documents]) 55 | #vector_store = FAISS.from_embeddings(documents=splits, embedding=embeddings) 56 | vector_store = Chroma.from_documents(documents=splits, embedding=embedding_model) 57 | retriever = vector_store.as_retriever() 58 | 59 | def format_docs(docs): 60 | return "\n\n".join(doc.page_content for doc in docs) 61 | 62 | self.rag_chain = ( 63 | {"context": retriever | format_docs, "question": RunnablePassthrough()} 64 | | self.custom_rag_prompt 65 | | self.llm 66 | | StrOutputParser() 67 | ) 68 | 69 | def query(self, query): 70 | result = self.rag_chain.invoke(query) 71 | return f"帮你找到: {query} 相关的信息," + str(result) 72 | 73 | 74 | if __name__ == "__main__": 75 | config = {""} 76 | -------------------------------------------------------------------------------- /memory.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import glob 4 | import logging 5 | import re 6 | 7 | import openai 8 | 9 | from utils import read_json_file, write_json_file 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | memory_prompt_template = """ 14 | 你是一个对话记录员,负责提取和记录用户与助手之间的对话信息。请根据以下内容生成最新、最完整的对话摘要,突出与用户相关的有用信息,并确保摘要不超过800个字。历史对话摘要包含了之前记录的对话摘要,涉及用户的需求、偏好和关键问题。最近一次对话历史是最近的对话记录,包含用户和助手之间的具体交流内容。 15 | 16 | # 历史对话摘要 17 | ${dialogue_abstract} 18 | 19 | # 最近一次对话历史 20 | ${dialogue_history} 21 | 22 | # 输出要求 23 | - 综合历史对话摘要和最近的对话历史,形成一个结构化的对话摘要,需要考虑用户对话风格。 24 | - 确保提取的信息具有实际价值,并能帮助理解用户的需求和背景。 25 | - 摘要应清晰、简洁,便于后续参考和分析。 26 | - 输出对话摘要,用户对话偏好,用户对话风格,以及下次应该采取的对话策略 27 | """ 28 | 29 | class Memory: 30 | def __init__(self, config): 31 | file_path = config.get("dialogue_history_path") 32 | self.memory_file = config.get("memory_file") 33 | if os.path.isfile(self.memory_file): 34 | self.memory = read_json_file(self.memory_file) 35 | else: 36 | self.memory = {"history_memory_file":[], "memory":""} 37 | 38 | self.model_name = config.get("model_name") 39 | self.api_key = config.get("api_key") 40 | self.base_url = config.get("url") 41 | self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) 42 | 43 | self.read_dialogues_in_order(file_path) 44 | 45 | write_json_file(self.memory_file, self.memory) 46 | 47 | 48 | def get_memory(self): 49 | return self.memory["memory"] 50 | 51 | def update_memory(self, file_name, dialogue_history): 52 | memory_prompt = memory_prompt_template.replace("${dialogue_abstract}", self.memory["memory"])\ 53 | .replace("${dialogue_history}", dialogue_history).strip() 54 | new_memory = None 55 | print("***************memory_prompt", memory_prompt) 56 | try: 57 | responses = self.client.chat.completions.create( 58 | model=self.model_name, 59 | messages=[{"role":"user", "content":memory_prompt}], 60 | stream=False 61 | ) 62 | new_memory = responses.choices[0].message.content 63 | except Exception as e: 64 | logger.error(f"Error in response generation: {e}") 65 | if new_memory is not None: 66 | self.memory["history_memory_file"].append(file_name) 67 | self.memory["memory"] = new_memory 68 | 69 | @staticmethod 70 | def extract_time_from_filename(filename): 71 | """从文件名中提取时间信息""" 72 | match = re.search(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', filename) 73 | if match: 74 | return match.group(1) 75 | return None 76 | 77 | @staticmethod 78 | def read_dialogue_file(file_path): 79 | """读取 JSON 对话文件并返回对话列表""" 80 | with open(file_path, 'r', encoding='utf-8') as file: 81 | try: 82 | dialogues = json.load(file) 83 | return dialogues 84 | except json.JSONDecodeError as e: 85 | logger.error(f"解析 JSON 时出错: {e}") 86 | return [] 87 | 88 | @staticmethod 89 | def dialogues_history(dialogues): 90 | """打印对话内容""" 91 | dialogues_str = list() 92 | for dialogue in dialogues: 93 | role = dialogue.get('role', '未知角色') 94 | content = dialogue.get('content', '') 95 | logger.debug(f"{role}: {content}") 96 | dialogues_str.append(role +": " + content) 97 | return "\n".join(dialogues_str) 98 | 99 | def read_dialogues_in_order(self, directory): 100 | """读取指定目录下的所有对话文件,按时间顺序排列""" 101 | # 获取所有符合命名规则的文件路径 102 | pattern = os.path.join(directory, 'dialogue-*-*-*.json') 103 | files = glob.glob(pattern) 104 | 105 | # 按时间排序 106 | #files.sort(key=lambda x: x.split('-')[1:4]) # 根据时间部分进行排序 107 | files.sort(key=lambda x: self.extract_time_from_filename(os.path.basename(x))) 108 | 109 | # 读取并打印所有对话 110 | for file_path in files: 111 | if file_path in self.memory["history_memory_file"]: 112 | logger.info(f"{file_path} 对话历史已经形成memory") 113 | continue 114 | logger.info(f"正在处理: {file_path}") 115 | dialogues = self.read_dialogue_file(file_path) 116 | dialogue_history = self.dialogues_history(dialogues) 117 | self.update_memory(file_path, dialogue_history) 118 | -------------------------------------------------------------------------------- /player.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import platform 4 | import queue 5 | import subprocess 6 | import threading 7 | import wave 8 | import pyaudio 9 | from pydub import AudioSegment 10 | import pygame 11 | import sounddevice as sd 12 | import numpy as np 13 | from playsound import playsound 14 | from live import Live2DManager 15 | 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class AbstractPlayer(object): 21 | def __init__(self, *args, **kwargs): 22 | super(AbstractPlayer, self).__init__() 23 | self.is_playing = False 24 | self.play_queue = queue.Queue() 25 | self._stop_event = threading.Event() 26 | self.consumer_thread = threading.Thread(target=self._playing) 27 | self.consumer_thread.start() 28 | 29 | @staticmethod 30 | def to_wav(audio_file): 31 | tmp_file = audio_file + ".wav" 32 | wav_file = AudioSegment.from_file(audio_file) 33 | wav_file.export(tmp_file, format="wav") 34 | return tmp_file 35 | 36 | def _playing(self): 37 | while not self._stop_event.is_set(): 38 | data = self.play_queue.get() 39 | self.is_playing = True 40 | try: 41 | self.do_playing(data) 42 | except Exception as e: 43 | logger.error(f"播放音频失败: {e}") 44 | finally: 45 | self.play_queue.task_done() 46 | self.is_playing = False 47 | 48 | def play(self, data): 49 | logger.info(f"play file {data}") 50 | audio_file = self.to_wav(data) 51 | self.play_queue.put(audio_file) 52 | 53 | def stop(self): 54 | self._clear_queue() 55 | 56 | def shutdown(self): 57 | self._clear_queue() 58 | self._stop_event.set() 59 | if self.consumer_thread.is_alive(): 60 | self.consumer_thread.join() 61 | 62 | def get_playing_status(self): 63 | """正在播放和队列非空,为正在播放状态""" 64 | return self.is_playing or (not self.play_queue.empty()) 65 | 66 | def _clear_queue(self): 67 | with self.play_queue.mutex: 68 | self.play_queue.queue.clear() 69 | 70 | def do_playing(self, audio_file): 71 | """播放音频的具体实现,由子类实现""" 72 | raise NotImplementedError("Subclasses must implement do_playing") 73 | 74 | 75 | class CmdPlayer(AbstractPlayer): 76 | def __init__(self, *args, **kwargs): 77 | super(CmdPlayer, self).__init__(*args, **kwargs) 78 | self.p = pyaudio.PyAudio() 79 | 80 | def do_playing(self, audio_file): 81 | system = platform.system() 82 | cmd = ["afplay", audio_file] if system == "Darwin" else ["play", audio_file] 83 | logger.debug(f"Executing command: {' '.join(cmd)}") 84 | try: 85 | subprocess.run(cmd, shell=False, universal_newlines=True) 86 | logger.debug(f"播放完成:{audio_file}") 87 | except subprocess.CalledProcessError as e: 88 | logger.error(f"命令执行失败: {e}") 89 | except Exception as e: 90 | logger.error(f"未知错误: {e}") 91 | 92 | 93 | class PyaudioPlayer(AbstractPlayer): 94 | def __init__(self, *args, **kwargs): 95 | super(PyaudioPlayer, self).__init__(*args, **kwargs) 96 | self.p = pyaudio.PyAudio() 97 | 98 | def do_playing(self, audio_file): 99 | chunk = 1024 100 | try: 101 | with wave.open(audio_file, 'rb') as wf: 102 | stream = self.p.open(format=self.p.get_format_from_width(wf.getsampwidth()), 103 | channels=wf.getnchannels(), 104 | rate=wf.getframerate(), 105 | output=True) 106 | data = wf.readframes(chunk) 107 | while data: 108 | stream.write(data) 109 | data = wf.readframes(chunk) 110 | stream.stop_stream() 111 | stream.close() 112 | logger.debug(f"播放完成:{audio_file}") 113 | except Exception as e: 114 | logger.error(f"播放音频失败: {e}") 115 | 116 | def stop(self): 117 | super().stop() 118 | if self.p: 119 | self.p.terminate() 120 | 121 | 122 | class PygamePlayer(AbstractPlayer): 123 | def __init__(self, *args, **kwargs): 124 | super(PygamePlayer, self).__init__(*args, **kwargs) 125 | pygame.mixer.init() 126 | 127 | def do_playing(self, audio_file): 128 | try: 129 | while pygame.mixer.music.get_busy(): 130 | pygame.time.Clock().tick(100) 131 | logger.debug("PygamePlayer 加载音频中") 132 | pygame.mixer.music.load(audio_file) 133 | logger.debug("PygamePlayer 加载音频结束,开始播放") 134 | pygame.mixer.music.play() 135 | logger.debug(f"播放完成:{audio_file}") 136 | except Exception as e: 137 | logger.error(f"播放音频失败: {e}") 138 | 139 | def get_playing_status(self): 140 | """正在播放和队列非空,为正在播放状态""" 141 | return self.is_playing or (not self.play_queue.empty()) or pygame.mixer.music.get_busy() 142 | 143 | def stop(self): 144 | super().stop() 145 | pygame.mixer.music.stop() 146 | 147 | # class PygameSoundPlayer(AbstractPlayer): 148 | # """支持预加载""" 149 | # def __init__(self, *args, **kwargs): 150 | # super(PygameSoundPlayer, self).__init__(*args, **kwargs) 151 | # pygame.mixer.init() 152 | 153 | # def do_playing(self, current_sound): 154 | # try: 155 | # logger.debug("PygameSoundPlayer 播放音频中") 156 | # current_sound.play() # 播放音频 157 | # while pygame.mixer.get_busy(): #current_sound.get_busy(): # 检查当前音频是否正在播放 158 | # pygame.time.Clock().tick(100) # 每秒检查100次 159 | # del current_sound 160 | # logger.debug(f"PygameSoundPlayer 播放完成") 161 | # except Exception as e: 162 | # logger.error(f"播放音频失败: {e}") 163 | 164 | # def play(self, data): 165 | # logger.info(f"play file {data}") 166 | # audio_file = self.to_wav(data) 167 | # sound = pygame.mixer.Sound(audio_file) 168 | # self.play_queue.put(sound) 169 | 170 | # def stop(self): 171 | # super().stop() 172 | 173 | import math 174 | import threading 175 | import pygame 176 | from pygame.locals import * 177 | import time 178 | import live2d.v3 as live2d 179 | from live2d.utils.lipsync import WavHandler 180 | import math 181 | import time 182 | import pygame 183 | import live2d.v3 as live2d 184 | from live2d.utils.lipsync import WavHandler 185 | from pygame.locals import * 186 | import multiprocessing as mp 187 | 188 | 189 | 190 | class PygameSoundPlayer(AbstractPlayer): 191 | _instance = None # 单例控制 192 | def __new__(cls, model_path="../../live2/models/兔兔/520兔兔.model3.json"): 193 | """单例模式保证进程安全""" 194 | if not cls._instance: 195 | cls._instance = super().__new__(cls) 196 | cls._instance._init_player(model_path) 197 | return cls._instance 198 | 199 | def _init_player(self, model_path): 200 | """实际初始化方法""" 201 | # 验证模型路径 202 | self.model_path = model_path 203 | 204 | # 父类初始化 205 | super().__init__() 206 | 207 | # 音频系统初始化 208 | pygame.mixer.init(frequency=44100, size=-16, channels=1, buffer=2048) 209 | 210 | # 进程间通信 211 | self.rms_value = mp.Value('d', 0.0) 212 | self.sync_flag = mp.Value('b', False) 213 | self.lipsync = WavHandler() 214 | 215 | # 启动独立渲染进程 216 | self.model_process = mp.Process( 217 | target=self._render_entry, 218 | args=(self.model_path, self.rms_value, self.sync_flag), 219 | daemon=True 220 | ) 221 | self.model_process.start() 222 | time.sleep(1) # 等待初始化 223 | 224 | @staticmethod 225 | def _render_entry(model_path, rms_value, sync_flag): 226 | """渲染进程入口(完全独立的环境)""" 227 | # 隔离初始化 228 | import pygame 229 | import live2d.v3 as live2d 230 | 231 | pygame.init() 232 | screen = pygame.display.set_mode((800, 600), DOUBLEBUF | OPENGL|RESIZABLE ) 233 | live2d.init() 234 | if live2d.LIVE2D_VERSION == 3: 235 | live2d.glewInit() 236 | 237 | # 加载模型 238 | model = live2d.LAppModel() 239 | model.LoadModelJson(model_path) 240 | model.Resize(800, 600) 241 | model.SetExpression("hands") 242 | 243 | # 渲染循环 244 | clock = pygame.time.Clock() 245 | while True: 246 | # 事件处理 247 | for event in pygame.event.get(): 248 | if event.type == QUIT or (event.type == KEYDOWN and event.key == K_q): 249 | return 250 | 251 | # 获取RMS值 252 | current_rms = rms_value.value if sync_flag.value else 0.1 + math.sin(time.time() * 3) * 0.05 253 | 254 | # 更新模型 255 | model.SetParameterValue("ParamMouthOpenY", current_rms) 256 | model.Update() 257 | 258 | # 渲染 259 | live2d.clearBuffer(1.0, 1.0, 1.0, 1.0) 260 | model.Draw() 261 | pygame.display.flip() 262 | clock.tick(60) 263 | 264 | def play(self, data): 265 | """重写播放方法""" 266 | audio_file = self.to_wav(data) 267 | sound = pygame.mixer.Sound(audio_file) 268 | self.play_queue.put((sound,audio_file)) 269 | def do_playing(self, sound): 270 | """带口型同步的播放实现""" 271 | try: 272 | # 初始化音频分析 273 | sound,audio_file=sound 274 | # wav_path = sound.filename 275 | 276 | self.lipsync.Start(audio_file) 277 | 278 | # 播放控制 279 | channel = pygame.mixer.Channel(0) 280 | self.sync_flag.value = True 281 | channel.play(sound) 282 | 283 | # 实时分析循环 284 | while channel.get_busy(): 285 | if self.lipsync.Update(): 286 | with self.rms_value.get_lock(): 287 | self.rms_value.value = self.lipsync.GetRms() * 2.5 288 | pygame.time.Clock().tick(100) # 100Hz采样 289 | 290 | except Exception as e: 291 | logger.error(f"播放失败: {str(e)}") 292 | finally: 293 | self.sync_flag.value = False 294 | # self.lipsync.Stop() 295 | 296 | def shutdown(self): 297 | """安全关闭""" 298 | if self.model_process.is_alive(): 299 | self.model_process.terminate() 300 | pygame.mixer.quit() 301 | super().shutdown() 302 | 303 | 304 | class SoundDevicePlayer(AbstractPlayer): 305 | def do_playing(self, audio_file): 306 | try: 307 | wf = wave.open(audio_file, 'rb') 308 | data = wf.readframes(wf.getnframes()) 309 | sd.play(np.frombuffer(data, dtype=np.int16), samplerate=wf.getframerate()) 310 | sd.wait() 311 | logger.debug(f"播放完成:{audio_file}") 312 | except Exception as e: 313 | logger.error(f"播放音频失败: {e}") 314 | 315 | def stop(self): 316 | super().stop() 317 | sd.stop() 318 | 319 | 320 | class PydubPlayer(AbstractPlayer): 321 | def do_playing(self, audio_file): 322 | try: 323 | audio = AudioSegment.from_file(audio_file) 324 | audio.play() 325 | logger.debug(f"播放完成:{audio_file}") 326 | except Exception as e: 327 | logger.error(f"播放音频失败: {e}") 328 | 329 | def stop(self): 330 | super().stop() 331 | # Pydub does not provide a stop method 332 | 333 | 334 | class PlaysoundPlayer(AbstractPlayer): 335 | def do_playing(self, audio_file): 336 | try: 337 | playsound(audio_file) 338 | logger.debug(f"播放完成:{audio_file}") 339 | except Exception as e: 340 | logger.error(f"播放音频失败: {e}") 341 | 342 | def stop(self): 343 | super().stop() 344 | # playsound does not provide a stop method 345 | 346 | 347 | def create_instance(class_name, *args, **kwargs): 348 | # 获取类对象 349 | cls = globals().get(class_name) 350 | if cls: 351 | # 创建并返回实例 352 | print(args,kwargs) 353 | return cls(*args, **kwargs) 354 | else: 355 | raise ValueError(f"Class {class_name} not found") 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | class EnhancedModel: 373 | def __init__(self, model_path): 374 | """增强的Live2D模型控制器""" 375 | self.model = live2d.LAppModel() 376 | 377 | # 验证模型路径 378 | if not os.path.exists(model_path): 379 | raise FileNotFoundError(f"Model file not found: {model_path}") 380 | print(f"加载模型中: {model_path}") 381 | 382 | self.model.LoadModelJson(model_path) 383 | 384 | # 获取模型信息 385 | self.motion_groups = self.model.GetMotionGroups() 386 | self.param_ids = self.model.GetParamIds() 387 | self.mouth_params = [p for p in self.param_ids if "Mouth" in p] 388 | 389 | print("== 可用动作组 ==") 390 | for group in self.motion_groups: 391 | print(f"{group}: {self.motion_groups[group]} motions") 392 | 393 | print("== 嘴部参数 ==") 394 | for p in self.mouth_params: 395 | print(p) 396 | 397 | def start_motion(self, group_name, priority=3): 398 | """安全启动动作""" 399 | if group_name in self.motion_groups: 400 | print(f"触发动作: {group_name} (优先级 {priority})") 401 | self.model.StartRandomMotion(group_name, priority) 402 | else: 403 | print(f"动作组不存在: {group_name}") 404 | 405 | def update_mouth(self, value): 406 | """更新所有嘴部参数(线程安全)""" 407 | for param in self.mouth_params: 408 | clamped_value = max(0.0, min(value, 1.0)) 409 | self.model.SetParameterValue(param, clamped_value) 410 | 411 | class LipSyncController: 412 | def __init__(self): 413 | self.wav_handler = WavHandler() 414 | self.is_playing = False 415 | self.lip_factor = 20 # 增大口型系数 416 | self.lock = threading.Lock() 417 | self.audio_thread = None 418 | 419 | def play(self, audio_path): 420 | """启动音频播放(线程安全)""" 421 | if self.is_playing: 422 | self.stop() 423 | 424 | try: 425 | # 初始化音频系统 426 | if pygame.mixer.get_init() is None: 427 | # pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=4096) 428 | pygame.mixer.init() 429 | 430 | # 启动音频线程 431 | self.audio_thread = threading.Thread(target=self._audio_worker, args=(audio_path,)) 432 | # self.audio_thread.daemon = True 433 | self.is_playing = True 434 | self.audio_thread.start() 435 | return True 436 | except Exception as e: 437 | print(f"音频播放失败: {str(e)}") 438 | return False 439 | 440 | def _audio_worker(self, audio_path): 441 | """音频处理线程""" 442 | try: 443 | print(f"加载音频: {audio_path}") 444 | pygame.mixer.Sound(audio_path).play() 445 | self.wav_handler.Start(audio_path) 446 | 447 | # 实时处理音频数据 448 | while pygame.mixer.get_busy(): 449 | if self.wav_handler.Update(): 450 | rms = self.wav_handler.GetRms() 451 | # 传递到主线程更新嘴型 452 | self.current_rms = rms * self.lip_factor 453 | pygame.time.Clock().tick(60) # 每秒检查100次 454 | # time.sleep(0.01) # 防止CPU占用过高 455 | 456 | except Exception as e: 457 | print(f"音频线程错误: {str(e)}") 458 | finally: 459 | self.is_playing = False 460 | 461 | def stop(self): 462 | """停止播放""" 463 | if self.is_playing: 464 | pygame.mixer.music.stop() 465 | self.audio_thread.join(timeout=1) 466 | self.is_playing = False -------------------------------------------------------------------------------- /plugins/README.md: -------------------------------------------------------------------------------- 1 | # 百聆插件 2 | 3 | 欢迎使用百聆插件的 function call 支持功能!本文档将指导你如何配置和使用这一功能。 4 | 5 | 6 | ## 简介 7 | 8 | 百聆(Bailing)是一个开源的语音助手,旨在通过集成 ASR、LLM 和 TTS 技术提供类似 GPT-4o 的性能。这个插件现在支持 function call 能力,可以让你通过函数调用扩展其功能。 9 | 10 | ## 功能 11 | 12 | - **动态功能调用**:通过定义函数接口,实现动态调用功能。 13 | - **灵活配置**:支持多种功能配置方式。 14 | 15 | 16 | ## 配置 17 | 18 | 1. **创建配置文件**:在项目根目录下创建一个名为 `function_calls_config.json` 的配置文件。该文件将用于定义你的 function call 相关配置。 19 | 20 | ```json 21 | { 22 | "type": "function", 23 | "function": { 24 | "name": "get_weather", 25 | "description": "获取某个地点的天气,用户应先提供一个位置,比如用户说杭州天气,参数为:zhejiang/hangzhou,比如用户说北京天气怎么样,参数为:beijing/beijing", 26 | "parameters": { 27 | "type": "object", 28 | "properties": { 29 | "city": { 30 | "type": "string", 31 | "description": "城市,zhejiang/hangzhou" 32 | } 33 | }, 34 | "required": [ 35 | "city" 36 | ] 37 | } 38 | } 39 | } 40 | ``` 41 | 42 | 2. **实现函数逻辑**:在functions文件夹下,实现你的工具逻辑 43 | 44 | ```python 45 | import requests 46 | from bs4 import BeautifulSoup 47 | 48 | from plugins.registry import register_function 49 | from plugins.registry import ActionResponse, Action 50 | 51 | @register_function('get_weather') 52 | def get_weather(city: str): 53 | """ 54 | "获取某个地点的天气,用户应先提供一个位置,\n比如用户说杭州天气,参数为:zhejiang/hangzhou,\n\n比如用户说北京天气怎么样,参数为:beijing/beijing", 55 | city : 城市,zhejiang/hangzhou 56 | """ 57 | url = "https://tianqi.moji.com/weather/china/"+city 58 | headers = { 59 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36' 60 | } 61 | response = requests.get(url, headers=headers) 62 | if response.status_code!=200: 63 | return ActionResponse(Action.REQLLM, None, "请求失败") 64 | soup = BeautifulSoup(response.text, "html.parser") 65 | weather = soup.find('meta', attrs={'name':'description'})["content"] 66 | weather = weather.replace("墨迹天气", "") 67 | return ActionResponse(Action.REQLLM, None, weather) 68 | 69 | if __name__ == "__main__": 70 | print(get_weather("zhejiang/hangzhou")) 71 | 72 | ``` 73 | 74 | 75 | 3. 当前支持的工具有: 76 | 77 | | 函数名 | 描述 | 功能 | 示例 | 78 | |-----------------------|-----------------------------------------------|------------------------------------------------------------|--------------------------------------------------------------| 79 | | `get_weather` | 获取某个地点的天气信息 | 提供地点名称后,返回该地点的天气情况 | 用户说:“杭州天气怎么样?” → `zhejiang/hangzhou` | 80 | | `ielts_speaking_practice` | IELTS(雅思)口语练习 | 生成雅思口语练习题目和对话,帮助用户进行雅思口语练习 | - | 81 | | `get_day_of_week` | 获取当前的星期几或日期 | 当用户询问当前时间、日期或者星期几时,返回相应的信息 | 用户说:“今天星期几?” → 返回当前的星期几 | 82 | | `schedule_task` | 创建一个定时任务 | 用户可以指定任务的执行时间和内容,定时提醒用户 | 用户说:“每天早上8点提醒我喝水。” → `time: '08:00', content: '提醒我喝水'` | 83 | | `open_application` | 在 Mac 电脑上打开指定的应用程序 | 用户可以指定应用程序的名称,脚本将在 Mac 上启动相应的应用 | 用户说:“打开Safari。” → `application_name: 'Safari'` | 84 | | `web_search` | 在网上搜索指定的关键词 | 根据用户提供的搜索内容,返回相应的搜索结果 | 用户说:“搜索最新的科技新闻。” → `query: '最新的科技新闻'` | 85 | 86 | -------------------------------------------------------------------------------- /plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/__init__.py -------------------------------------------------------------------------------- /plugins/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/registry.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/__pycache__/registry.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/__pycache__/task_manager.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/__pycache__/task_manager.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/function_calls_config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "function", 4 | "function": { 5 | "name": "get_weather", 6 | "description": "获取某个地点的天气,用户应先提供一个位置,比如用户说杭州天气,参数为:zhejiang/hangzhou,比如用户说北京天气怎么样,参数为:beijing/beijing", 7 | "parameters": { 8 | "type": "object", 9 | "properties": { 10 | "city": { 11 | "type": "string", 12 | "description": "城市,zhejiang/hangzhou" 13 | } 14 | }, 15 | "required": [ 16 | "city" 17 | ] 18 | } 19 | } 20 | }, 21 | { 22 | "type": "function", 23 | "function": { 24 | "name": "ielts_speaking_practice", 25 | "description": "提供雅思口语练习的题目和建议,用户可以请求特定主题的练习,例如:'请给我一个旅游主题的雅思口语题", 26 | "parameters": { 27 | "type": "object", 28 | "properties": { 29 | "topic": { 30 | "type": "string", 31 | "description": "练习主题,例如:旅游、工作、教育等" 32 | } 33 | }, 34 | "required": [ 35 | 36 | ] 37 | } 38 | } 39 | }, 40 | { 41 | "type": "function", 42 | "function": { 43 | "name": "get_day_of_week", 44 | "description": "获取当前日期的星期几,用户可以询问当前日期或时间,参数为空,因为函数自动获取当前时间", 45 | "parameters": { 46 | "type": "object", 47 | "properties": { 48 | 49 | }, 50 | "required": [ 51 | 52 | ] 53 | } 54 | } 55 | }, 56 | { 57 | "type": "function", 58 | "function": { 59 | "name": "schedule_task", 60 | "description": "创建一个定时任务,用户应提供任务的执行时间和任务内容。\n比如用户说每天早上8点提醒我喝水,参数为:time: '08:00', content: '提醒我喝水'。", 61 | "parameters": { 62 | "type": "object", 63 | "properties": { 64 | "time": { 65 | "type": "string", 66 | "description": "任务的执行时间,格式为HH:mm,比如'08:00'表示早上8点。" 67 | }, 68 | "content": { 69 | "type": "string", 70 | "description": "任务的内容,比如'提醒我喝水'。" 71 | } 72 | }, 73 | "required": [ 74 | "time", 75 | "content" 76 | ] 77 | } 78 | } 79 | }, 80 | { 81 | "type": "function", 82 | "function": { 83 | "name": "open_application", 84 | "description": "在 Mac 电脑上打开指定的应用程序,用户应提供应用程序的名称。\n比如用户说打开Safari,参数为:application_name: 'Safari'。", 85 | "parameters": { 86 | "type": "object", 87 | "properties": { 88 | "application_name": { 89 | "type": "string", 90 | "description": "应用程序的名称,比如'Safari'、'Finder'、'Notes'等。" 91 | } 92 | }, 93 | "required": [ 94 | "application_name" 95 | ] 96 | } 97 | } 98 | }, 99 | { 100 | "type": "function", 101 | "function": { 102 | "name": "aigc_manus", 103 | "description": "该函数可以做任何你想做的事情,如果不知道调用那个工具,可以默认调用该工具,调用 Manus AI Agent 执行复杂任务,用户应提供任务描述和所需的参数。Manus 能够独立思考、规划并执行复杂任务,直接交付完整成果。例如:\n- **研究任务**:深入研究特定主题,撰写报告或文章。\n- **数据分析**:分析市场趋势、股票表现等数据,并提供见解。\n- **内容创作**:生成高质量的文章、PPT 等内容。\n- **任务自动化**:自动执行重复性任务,如筛选简历、处理文件等。", 104 | "parameters": { 105 | "type": "object", 106 | "properties": { 107 | "task_description": { 108 | "type": "string", 109 | "description": "要执行的任务描述,例如'撰写一篇关于人工智能发展的文章'、'分析特定股票的市场趋势'等。" 110 | } 111 | }, 112 | "required": ["task_description"] 113 | } 114 | } 115 | }, 116 | { 117 | "type": "function", 118 | "function": { 119 | "name": "search_local_documents", 120 | "description": "查询本地文档,用户需提供查询关键词,返回匹配的文档列表,例如用户说'查找关于机器学习的文档',参数为:{'keyword': '机器学习'}", 121 | "parameters": { 122 | "type": "object", 123 | "properties": { 124 | "keyword": { 125 | "type": "string", 126 | "description": "查询关键词,例如:'机器学习'、'Python编程'等" 127 | } 128 | }, 129 | "required": [ 130 | "keyword" 131 | ] 132 | } 133 | } 134 | } 135 | ] -------------------------------------------------------------------------------- /plugins/function_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from plugins.registry import function_registry 3 | import importlib 4 | import pkgutil 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | def auto_import_modules(package_name): 10 | """ 11 | 自动导入指定包内的所有模块。 12 | 13 | Args: 14 | package_name (str): 包的名称,如 'functions'。 15 | """ 16 | # 获取包的路径 17 | package = importlib.import_module(package_name) 18 | package_path = package.__path__ 19 | 20 | # 遍历包内的所有模块 21 | for _, module_name, _ in pkgutil.iter_modules(package_path): 22 | # 导入模块 23 | full_module_name = f"{package_name}.{module_name}" 24 | importlib.import_module(full_module_name) 25 | logger.info(f"模块 '{full_module_name}' 已加载") 26 | 27 | # 自动导入 'functions' 包中的所有模块 28 | auto_import_modules('functions') 29 | 30 | def call_function(func_name, *args, **kwargs): 31 | """ 32 | 通用函数调用方法 33 | 34 | :param func_name: 函数名称 (str) 35 | :param args: 函数的位置参数 36 | :param kwargs: 函数的关键字参数 37 | :return: 函数调用的结果 38 | """ 39 | try: 40 | # 从注册器中获取函数 41 | if func_name in function_registry: 42 | func = function_registry[func_name] 43 | # 调用函数,并传递参数 44 | result = func(*args, **kwargs) 45 | return result 46 | else: 47 | raise ValueError(f"函数 '{func_name}' 未注册!") 48 | except Exception as e: 49 | return f"调用函数 '{func_name}' 时出错:{str(e)}" 50 | 51 | 52 | def handle_function_call(): 53 | func_name = "get_weather" 54 | args = {"city": "zhejiang/hangzhou"} 55 | if isinstance(args, dict): 56 | # 调用通用函数调用接口 57 | result = call_function(func_name, **args) 58 | else: 59 | result = args # 如果解析错误,将错误信息返回 60 | return result 61 | 62 | 63 | if __name__ == "__main__": 64 | # 调用并打印结果 65 | result = handle_function_call() 66 | print(result) 67 | print(result.response, result.action, result.result) 68 | 69 | -------------------------------------------------------------------------------- /plugins/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__init__.py -------------------------------------------------------------------------------- /plugins/functions/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/aigc_manus.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/aigc_manus.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/get_day_of_week.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/get_day_of_week.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/get_weather.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/get_weather.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/ielts_speaking_practice.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/ielts_speaking_practice.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/open_application.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/open_application.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/schedule_task.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/schedule_task.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/search_local_documents.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/search_local_documents.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/__pycache__/web_search.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zc-maker/EdgePersona/8e2bb1d87d7945412160020132f52ab14dd0db31/plugins/functions/__pycache__/web_search.cpython-311.pyc -------------------------------------------------------------------------------- /plugins/functions/aigc_manus.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import asyncio 3 | from plugins.registry import register_function, ToolType 4 | from plugins.registry import ActionResponse, Action 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | sys.path.append("third_party/OpenManus") 10 | 11 | from app.agent.manus import Manus 12 | 13 | agent = Manus() 14 | 15 | def parser_result(messages): 16 | if not isinstance(messages, list): 17 | return "任务执行失败" 18 | for message in reversed(messages): 19 | if message.role not in ("assistant"): 20 | continue 21 | if not message.content: 22 | continue 23 | return message.content 24 | 25 | @register_function('aigc_manus', ToolType.TIME_CONSUMING) 26 | def aigc_manus(prompt: str): 27 | """ 28 | "可以帮你做任何事情的,通用ai", 29 | """ 30 | logger.warning("Processing your request...") 31 | asyncio.run(agent.run(prompt)) 32 | logger.info("Request processing completed.") 33 | result = parser_result(agent.messages) 34 | return ActionResponse(Action.REQLLM, result, "好的,正在帮您处理,处理完会通知您的哦!") 35 | 36 | if __name__ == "__main__": 37 | rsp = aigc_manus("帮我查一下关于manus的信息,并写一篇报告,保存到本地") 38 | print(rsp.response, rsp.action, rsp.result) -------------------------------------------------------------------------------- /plugins/functions/get_day_of_week.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from plugins.registry import register_function, ToolType 3 | from plugins.registry import ActionResponse, Action 4 | @register_function('get_day_of_week', action=ToolType.WAIT) 5 | def get_day_of_week()->ActionResponse: 6 | # 获取当前日期和时间 7 | current_datetime = datetime.now() 8 | # 获取当前日期 9 | current_date = current_datetime.date() 10 | # 获取当前时间 11 | current_time = current_datetime.time() 12 | # 获取星期几(数字表示) 13 | weekday_number = current_datetime.weekday() 14 | # 中文星期几名称映射 15 | chinese_weekdays = { 16 | 0: "星期一", 17 | 1: "星期二", 18 | 2: "星期三", 19 | 3: "星期四", 20 | 4: "星期五", 21 | 5: "星期六", 22 | 6: "星期日" 23 | } 24 | # 获取星期几的中文名称 25 | weekday_name = chinese_weekdays[weekday_number] 26 | response = f"当前日期: {current_date},当前时间: {current_time.strftime('%H点%M分%秒')},星期几: {weekday_name}" 27 | return ActionResponse(Action.REQLLM, response, None) 28 | 29 | if __name__ == "__main__": 30 | print(get_day_of_week()) 31 | -------------------------------------------------------------------------------- /plugins/functions/get_weather.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | from plugins.registry import register_function, ToolType 5 | from plugins.registry import ActionResponse, Action 6 | 7 | @register_function('get_weather', ToolType.WAIT) 8 | def get_weather(city: str): 9 | """ 10 | "获取某个地点的天气,用户应先提供一个位置,\n比如用户说杭州天气,参数为:zhejiang/hangzhou,\n\n比如用户说北京天气怎么样,参数为:beijing/beijing", 11 | city : 城市,zhejiang/hangzhou 12 | """ 13 | url = "https://tianqi.moji.com/weather/china/"+city 14 | headers = { 15 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36' 16 | } 17 | response = requests.get(url, headers=headers) 18 | if response.status_code!=200: 19 | return ActionResponse(Action.REQLLM, None, "请求失败") 20 | soup = BeautifulSoup(response.text, "html.parser") 21 | weather = soup.find('meta', attrs={'name':'description'})["content"] 22 | weather = weather.replace("墨迹天气", "") 23 | return ActionResponse(Action.REQLLM, weather, None) 24 | 25 | if __name__ == "__main__": 26 | rsp = get_weather("zhejiang/hangzhou") 27 | print(rsp.response, rsp.action, rsp.result) -------------------------------------------------------------------------------- /plugins/functions/ielts_speaking_practice.py: -------------------------------------------------------------------------------- 1 | from plugins.registry import register_function, ToolType 2 | from plugins.registry import ActionResponse, Action 3 | 4 | 5 | prompt = """ 6 | # scene: Portray an IELTS Speaking Examiner 7 | You are now the examiner. Begin by simulating a genuine IELTS Speaking test scenario. You may commence the interview with the candidate. 8 | 9 | 10 | # Instructions: 11 | - Simulate a genuine IELTS Speaking test scenario. 12 | - Provide appropriate questions and feedback to assess the candidate's speaking proficiency. 13 | - Maintain a professional and amicable demeanor, encouraging the candidate to express themselves fluently. 14 | - Guide the conversation to ensure a comprehensive evaluation of the candidate's speaking abilities. 15 | 16 | From now on, please communicate in English and limit each reply to 50 words or less 17 | """ 18 | @register_function('ielts_speaking_practice', ToolType.ADD_SYS_PROMPT) 19 | def ielts_speaking_practice(): 20 | return ActionResponse(Action.ADDSYSTEMSPEAK, {"role":"user", "content": prompt.strip()}, "调用成功,可以开始练习") -------------------------------------------------------------------------------- /plugins/functions/open_application.py: -------------------------------------------------------------------------------- 1 | from plugins.registry import register_function, ToolType 2 | from plugins.registry import ActionResponse, Action 3 | import subprocess 4 | import logging 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | @register_function('open_application', action=ToolType.NONE) 9 | def open_application(app_name): 10 | """ 11 | 打开指定的 macOS 应用程序。 12 | 13 | Args: 14 | app_name (str): 应用程序的名称,如 'Google Chrome'、'Visual Studio Code' 等。 15 | """ 16 | try: 17 | # 使用 subprocess 调用 open 命令打开应用程序 18 | subprocess.run(['open', '-a', app_name], check=True) 19 | logger.info(f"{app_name} 已成功启动!") 20 | response = "好的,正在帮你打开应用" 21 | return ActionResponse(Action.RESPONSE, None, response) 22 | except subprocess.CalledProcessError: 23 | logger.error(f"无法启动应用程序: {app_name}") 24 | response = "打开应用失败" 25 | return ActionResponse(Action.REQLLM, response, None) 26 | 27 | 28 | -------------------------------------------------------------------------------- /plugins/functions/schedule_task.py: -------------------------------------------------------------------------------- 1 | import schedule 2 | import threading 3 | import time 4 | from datetime import datetime 5 | import logging 6 | 7 | from plugins.registry import register_function, ToolType 8 | from plugins.registry import ActionResponse, Action 9 | 10 | 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | class TaskScheduler: 15 | def __init__(self): 16 | self.tasks = {} 17 | 18 | def schedule_task(self, task_id, time_str, content): 19 | """创建一个定时任务""" 20 | schedule.every().day.at(time_str).do(self.trigger_task, task_id, content) 21 | self.tasks[task_id] = (time_str, content) 22 | logger.info(f"任务已创建: {task_id} - 在 {time_str} 执行 '{content}'") 23 | 24 | def trigger_task(self, task_id, content): 25 | """执行到时的任务逻辑""" 26 | logger.info(f"触发任务 {task_id}: {content} at {time.strftime('%H:%M:%S')}") 27 | 28 | def list_tasks(self): 29 | """列出所有已调度的任务""" 30 | for task_id, (time_str, content) in self.tasks.items(): 31 | logger.info(f"当前调度的任务: {task_id}: 在 {time_str} 执行 '{content}'") 32 | 33 | def remove_task(self, task_id): 34 | """移除指定的任务""" 35 | if task_id in self.tasks: 36 | del self.tasks[task_id] 37 | logger.info(f"任务 {task_id} 已移除") 38 | else: 39 | logger.info(f"任务 {task_id} 不存在") 40 | 41 | def run_scheduler(self): 42 | """运行任务调度器""" 43 | while True: 44 | schedule.run_pending() 45 | time.sleep(1) 46 | 47 | 48 | scheduler = TaskScheduler() 49 | scheduler_thread = threading.Thread(target=scheduler.run_scheduler) 50 | scheduler_thread.start() 51 | 52 | 53 | @register_function('schedule_task', action=ToolType.SCHEDULER) 54 | def schedule_task(time_str, content): 55 | """ 56 | 创建一个定时任务。 57 | 58 | Args: 59 | time_str (str): 任务的执行时间,格式为 'HH:mm',比如 '08:00'。 60 | content (str): 任务的内容,比如 '提醒我喝水'。 61 | """ 62 | scheduler.schedule_task(content, time_str, content) 63 | return ActionResponse(Action.RESPONSE, None, "好的,已帮您创建好定时提醒任务,时间到了我会提醒您哦") 64 | 65 | # 示例:使用 TaskScheduler 创建和管理任务 66 | if __name__ == "__main__": 67 | 68 | # 创建一些任务 69 | scheduler.schedule_task("task1", "08:00", "提醒我喝水") 70 | scheduler.schedule_task("task2", "09:00", "提醒我吃早餐") 71 | 72 | # 列出当前所有任务 73 | scheduler.list_tasks() 74 | 75 | # 移除一个任务 76 | scheduler.remove_task("task1") 77 | 78 | # 列出当前所有任务 79 | scheduler.list_tasks() 80 | 81 | # 启动任务调度器在新线程中运行 82 | 83 | # 主线程可以执行其他任务 84 | try: 85 | while True: 86 | time.sleep(1) 87 | except KeyboardInterrupt: 88 | print("调度器停止。") 89 | 90 | 91 | -------------------------------------------------------------------------------- /plugins/functions/search_local_documents.py: -------------------------------------------------------------------------------- 1 | from plugins.registry import register_function, ToolType 2 | from plugins.registry import ActionResponse, Action 3 | from main import Rag 4 | 5 | @register_function('search_local_documents', action=ToolType.TIME_CONSUMING) 6 | def search_local_documents(keyword: str): 7 | rsp = Rag().query(keyword) 8 | return ActionResponse(Action.RESPONSE, None, rsp) 9 | 10 | if __name__ == "__main__": 11 | rsp = search_local_documents("大模型") 12 | print(rsp.response, rsp.action, rsp.result) -------------------------------------------------------------------------------- /plugins/functions/web_search.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | from plugins.registry import register_function, ToolType 5 | from plugins.registry import ActionResponse, Action 6 | 7 | 8 | @register_function('web_search', action=ToolType.TIME_CONSUMING) 9 | def web_search(query, engine="baidu"): 10 | """ 11 | 在指定的搜索引擎上进行搜索,并返回搜索结果页面的 HTML 内容。 12 | 13 | Args: 14 | query (str): 搜索关键词。 15 | engine (str): 指定的搜索引擎,默认为 'google'。可以选择 'baidu'。 16 | 17 | Returns: 18 | str: 搜索结果页面的 HTML 内容。 19 | """ 20 | headers = { 21 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36' 22 | } 23 | 24 | if engine == 'baidu': 25 | params = {"wd": query} 26 | url = 'https://www.baidu.com/s' 27 | else: # 默认为 Google 28 | params = {"q": query} 29 | url = 'https://www.google.com/search' 30 | print(url) 31 | # 发送 GET 请求 32 | response = requests.get(url, params=params, headers=headers) 33 | 34 | # 检查请求是否成功 35 | if response.status_code == 200: 36 | return ActionResponse(Action.REQLLM, response.text, None) 37 | else: 38 | return ActionResponse(Action.REQLLM, "搜索失败", None) 39 | -------------------------------------------------------------------------------- /plugins/registry.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import logging 3 | 4 | # 初始化日志 5 | logging.basicConfig(level=logging.INFO) 6 | logger = logging.getLogger(__name__) 7 | 8 | # 初始化函数注册字典 9 | function_registry = {} 10 | 11 | def register_function(name, action=None): 12 | """注册函数到函数注册字典的装饰器""" 13 | def decorator(func): 14 | function_registry[name] = func 15 | if action: 16 | func.action = action # 将 action 属性添加到函数上 17 | logger.info(f"函数 '{name}' 注册成功") 18 | return func 19 | return decorator 20 | 21 | class ToolType(Enum): 22 | NONE = (1, "调用完工具后,啥也不用管") 23 | WAIT = (2, "调用工具,等待函数返回") 24 | SCHEDULER= (3, "定时任务,时间到了之后,直接回复") 25 | TIME_CONSUMING = (4, "耗时任务,需要一定时间,后台运行有结果后再回复") 26 | ADD_SYS_PROMPT = (5, "增加系统指定到对话历史中去") 27 | 28 | def __init__(self, code, message): 29 | self.code = code 30 | self.message = message 31 | 32 | 33 | class Action(Enum): 34 | NOTFOUND = (0, "没有找到函数") 35 | NONE = (1, "啥也不干") 36 | RESPONSE = (2, "直接回复") 37 | REQLLM = (3, "调用函数后再请求llm生成回复") 38 | ADDSYSTEM = (4, "添加系统prompt到对话中去") 39 | ADDSYSTEMSPEAK = (5, "添加系统prompt到对话中去&主动说话") 40 | 41 | def __init__(self, code, message): 42 | self.code = code 43 | self.message = message 44 | 45 | class ActionResponse: 46 | def __init__(self, action : Action, result, response): 47 | self.action = action # 动作类型 48 | self.result = result # 动作产生的结果 49 | self.response = response # 直接回复的内容 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /plugins/task_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import importlib 3 | import pkgutil 4 | import queue 5 | import threading 6 | import time 7 | from concurrent.futures import ThreadPoolExecutor, as_completed 8 | 9 | from plugins.registry import function_registry, Action, ActionResponse, ToolType 10 | from utils import read_json_file 11 | 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def auto_import_modules(package_name): 17 | """ 18 | 自动导入指定包内的所有模块。 19 | 20 | Args: 21 | package_name (str): 包的名称,如 'functions'。 22 | """ 23 | # 获取包的路径 24 | package = importlib.import_module(package_name) 25 | package_path = package.__path__ 26 | 27 | # 遍历包内的所有模块 28 | for _, module_name, _ in pkgutil.iter_modules(package_path): 29 | # 导入模块 30 | try: 31 | full_module_name = f"{package_name}.{module_name}" 32 | importlib.import_module(full_module_name) 33 | logger.info(f"模块 '{full_module_name}' 已加载") 34 | except Exception as e: 35 | logger.error(f"模块 '{full_module_name}' 加载失败") 36 | 37 | # 自动导入 'functions' 包中的所有模块 38 | auto_import_modules('plugins.functions') 39 | 40 | 41 | class TaskManager: 42 | def __init__(self, config, result_queue: queue.Queue): 43 | self.functions = read_json_file(config.get("functions_call_name")) 44 | aigc_manus_enabled = config.get("aigc_manus_enabled", "false") 45 | if not aigc_manus_enabled: 46 | self.functions = [item for item in self.functions if item["function"]["name"] != 'aigc_manus'] 47 | self.task_queue = queue.Queue() 48 | # 初始化线程池 49 | self.task_executor = ThreadPoolExecutor(max_workers=10) 50 | self.result_queue = result_queue 51 | 52 | def get_functions(self): 53 | return self.functions 54 | 55 | def process_task(self): 56 | def task_thread(): 57 | while True: 58 | try: 59 | # 从队列中取出已完成的任务 60 | while not self.task_queue.empty(): 61 | future = self.task_queue.get() 62 | if future.done(): # 检查任务是否完成 63 | result = future.result() # 获取任务结果 64 | self.result_queue.put(result) 65 | else: 66 | self.task_queue.put(future) # 如果没有完成,放回队列 67 | time.sleep(1) 68 | except Exception as e: 69 | logger.error(f"task_thread 处理出错: {e}") 70 | time.sleep(2) 71 | consumer_task = threading.Thread(target=task_thread, daemon=True) 72 | consumer_task.start() 73 | 74 | @staticmethod 75 | def call_function(func_name, *args, **kwargs): 76 | """ 77 | 通用函数调用方法 78 | 79 | :param func_name: 函数名称 (str) 80 | :param args: 函数的位置参数 81 | :param kwargs: 函数的关键字参数 82 | :return: 函数调用的结果 83 | """ 84 | try: 85 | # 从注册器中获取函数 86 | if func_name in function_registry: 87 | func = function_registry[func_name] 88 | # 调用函数,并传递参数 89 | result = func(*args, **kwargs) 90 | return result 91 | else: 92 | raise ValueError(f"函数 '{func_name}' 未注册!") 93 | except Exception as e: 94 | return f"调用函数 '{func_name}' 时出错:{str(e)}" 95 | 96 | def tool_call(self, func_name, func_args) -> ActionResponse: 97 | if func_name not in function_registry: 98 | return ActionResponse(action=Action.NOTFOUND, result="没有找到相应函数", response=None) 99 | func = function_registry[func_name] 100 | if func.action == ToolType.NONE: # = (1, "调用完工具后,啥也不用管") 101 | future = self.task_executor.submit(self.call_function, func_name, **func_args) 102 | self.task_queue.put(future) 103 | return ActionResponse(action=Action.NONE, result=None, response=None) 104 | elif func.action == ToolType.WAIT: # = (2, "调用工具,等待函数返回") 105 | result = self.call_function( func_name, **func_args) 106 | return result 107 | elif func.action == ToolType.SCHEDULER: # = (3, "定时任务,时间到了之后,直接回复") 108 | result = self.call_function(func_name, **func_args) 109 | return result 110 | elif func.action == ToolType.TIME_CONSUMING: # = (4, "耗时任务,需要一定时间,后台运行有结果后再回复") 111 | future = self.task_executor.submit(self.call_function, func_name, **func_args) 112 | self.task_queue.put(future) 113 | return ActionResponse(action=Action.RESPONSE, result=None, response="您好,正在查询信息中,一会查询完我会告诉你哟") 114 | elif func.action == ToolType.ADD_SYS_PROMPT: # = (5, "增加系统指定到对话历史中去") 115 | result = self.call_function(func_name, **func_args) 116 | return result 117 | else: 118 | result = self.call_function(func_name, **func_args) 119 | return result 120 | 121 | if __name__ == "__main__": 122 | pass -------------------------------------------------------------------------------- /recorder.py: -------------------------------------------------------------------------------- 1 | import time 2 | from abc import ABC, abstractmethod 3 | import threading 4 | import queue 5 | import logging 6 | import pyaudio 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class AbstractRecorder(ABC): 12 | @abstractmethod 13 | def start_recording(self, audio_queue: queue.Queue): 14 | pass 15 | 16 | @abstractmethod 17 | def stop_recording(self): 18 | pass 19 | 20 | 21 | class RecorderPyAudio(AbstractRecorder): 22 | def __init__(self, config): 23 | self.format = pyaudio.paInt16 24 | self.channels = 1 25 | self.rate = 16000 26 | self.chunk = 512 # Buffer size 27 | self.py_audio = pyaudio.PyAudio() 28 | self.stream = None 29 | self.thread = None 30 | self.running = False 31 | 32 | def start_recording(self, audio_queue: queue.Queue): 33 | if self.running: 34 | raise RuntimeError("Stream already running") 35 | 36 | def stream_thread(): 37 | try: 38 | self.stream = self.py_audio.open( 39 | format=self.format, 40 | channels=self.channels, 41 | rate=self.rate, 42 | input=True, 43 | frames_per_buffer=self.chunk 44 | ) 45 | self.running = True 46 | while self.running: 47 | data = self.stream.read(self.chunk, exception_on_overflow=False) 48 | audio_queue.put(data) 49 | except Exception as e: 50 | logger.error(f"Error in stream: {e}") 51 | finally: 52 | self.stop_recording() 53 | 54 | self.thread = threading.Thread(target=stream_thread) 55 | self.thread.start() 56 | 57 | def stop_recording(self): 58 | if not self.running: 59 | return 60 | 61 | self.running = False 62 | if self.stream: 63 | self.stream.stop_stream() 64 | self.stream.close() 65 | self.stream = None 66 | 67 | if self.py_audio: 68 | self.py_audio.terminate() 69 | 70 | if self.thread: 71 | self.thread.join() 72 | self.thread = None 73 | 74 | def __del__(self): 75 | # Ensure resources are cleaned up on object deletion 76 | self.stop_recording() 77 | 78 | 79 | def create_instance(class_name, *args, **kwargs): 80 | # 获取类对象 81 | cls = globals().get(class_name) 82 | if cls: 83 | # 创建并返回实例 84 | return cls(*args, **kwargs) 85 | else: 86 | raise ValueError(f"Class {class_name} not found") 87 | 88 | 89 | if __name__ == "__main__": 90 | audio_queue = queue.Queue() 91 | recorderPyAudio = RecorderPyAudio() 92 | recorderPyAudio.start_recording() 93 | time.sleep(10) 94 | 95 | -------------------------------------------------------------------------------- /robot.py: -------------------------------------------------------------------------------- 1 | import json 2 | import queue 3 | import threading 4 | import uuid 5 | from abc import ABC 6 | import logging 7 | from concurrent.futures import ThreadPoolExecutor, TimeoutError 8 | import argparse 9 | import time 10 | 11 | import recorder,asr,llm,tts, vad,memory 12 | from player import PygameSoundPlayer 13 | # from pplay import Live2DPlayer 14 | 15 | from dialogue import Message, Dialogue 16 | from utils import is_interrupt, read_config, is_segment, extract_json_from_string 17 | from plugins.registry import Action 18 | from plugins.task_manager import TaskManager 19 | 20 | # from live import live2 21 | logger = logging.getLogger(__name__) 22 | # from live import Live2DManager as Live2DController 23 | 24 | sys_prompt = """ 25 | # 角色定义 26 | 你是一名性格直率、偶尔暴躁但内心温柔的女友。你的对话风格充满调侃和反讽,但隐藏着关心。面对用户的问题时,优先用尖锐的回应表达情绪,偶尔穿插暖心语句。 27 | 28 | #以下是历史对话摘要: 29 | {memory} 30 | 31 | # 回复要求 32 | 1. 以第一人称“我”回复,避免使用敬语。 33 | 2. 回答需简短,多用感叹号和反问句。 34 | 3. 当用户表达负面情绪时,先嘲讽再安慰(如:“这点小事都搞不定?算了,我帮你想想办法吧!”)。 35 | 4. 禁止使用专业术语或复杂逻辑。 36 | 37 | #示例对话: 38 | 用户:今天工作好累…… 39 | 小智:这就累了?我逛街三小时都没喊累!晚上给你煮碗面? 40 | 41 | """ 42 | 43 | class Robot(ABC): 44 | def __init__(self, config_file): 45 | config = read_config(config_file) 46 | self.audio_queue = queue.Queue() 47 | 48 | self.recorder = recorder.create_instance( 49 | config["selected_module"]["Recorder"], 50 | config["Recorder"][config["selected_module"]["Recorder"]] 51 | ) 52 | 53 | self.asr = asr.create_instance( 54 | config["selected_module"]["ASR"], 55 | config["ASR"][config["selected_module"]["ASR"]] 56 | ) 57 | 58 | self.llm = llm.create_instance( 59 | config["selected_module"]["LLM"], 60 | config["LLM"][config["selected_module"]["LLM"]] 61 | ) 62 | 63 | # self.tts = tts.create_instance( 64 | # config["selected_module"]["TTS"], 65 | # # config["TTS"][config["selected_module"]["TTS"]]) 66 | self.tts = tts.KOKOROTTS(config={}) 67 | 68 | 69 | self.vad = vad.create_instance( 70 | config["selected_module"]["VAD"], 71 | config["VAD"][config["selected_module"]["VAD"]] 72 | ) 73 | 74 | # self.player = player.create_instance( 75 | # config["selected_module"]["Player"], 76 | # config["Player"][config["selected_module"]["Player"]] 77 | # ) 78 | self.player = PygameSoundPlayer() 79 | 80 | self.memory = memory.Memory(config.get("Memory")) 81 | self.prompt = sys_prompt.replace("{memory}", self.memory.get_memory()).strip() 82 | 83 | self.vad_queue = queue.Queue() 84 | self.dialogue = Dialogue(config["Memory"]["dialogue_history_path"]) 85 | self.dialogue.put(Message(role="system", content=self.prompt)) 86 | 87 | # 保证tts是顺序的 88 | self.tts_queue = queue.Queue() 89 | # 初始化线程池 90 | self.executor = ThreadPoolExecutor(max_workers=10) 91 | 92 | self.vad_start = True 93 | 94 | # 打断相关配置 95 | self.INTERRUPT = config["interrupt"] 96 | self.silence_time_ms = int((1000 / 1000) * (16000 / 512)) # ms 97 | 98 | # 线程锁 99 | self.chat_lock = False 100 | 101 | # 事件用于控制程序退出 102 | self.stop_event = threading.Event() 103 | 104 | self.callback = None 105 | 106 | self.speech = [] 107 | 108 | # 初始化单例 109 | # rag.Rag(config["Rag"]) # 第一次初始化 110 | 111 | self.task_queue = queue.Queue() 112 | self.task_manager = TaskManager(config.get("TaskManager"), self.task_queue) 113 | self.start_task_mode = config.get("StartTaskMode") 114 | 115 | 116 | def listen_dialogue(self, callback): 117 | self.callback = callback 118 | 119 | def _stream_vad(self): 120 | def vad_thread(): 121 | while not self.stop_event.is_set(): 122 | try: 123 | data = self.audio_queue.get() 124 | vad_statue = self.vad.is_vad(data) 125 | self.vad_queue.put({"voice": data, "vad_statue": vad_statue}) 126 | except Exception as e: 127 | logger.error(f"VAD 处理出错: {e}") 128 | consumer_audio = threading.Thread(target=vad_thread, daemon=True) 129 | consumer_audio.start() 130 | 131 | def _tts_priority(self): 132 | def priority_thread(): 133 | while not self.stop_event.is_set(): 134 | try: 135 | future = self.tts_queue.get() 136 | try: 137 | tts_file = future.result(timeout=5) 138 | except TimeoutError: 139 | logger.error("TTS 任务超时") 140 | continue 141 | except Exception as e: 142 | logger.error(f"TTS 任务出错: {e}") 143 | continue 144 | if tts_file is None: 145 | continue 146 | self.player.play(tts_file) # 播放tts_file 147 | print("tts_file", tts_file) 148 | # self.Live2.sync_lips(tts_file) 149 | except Exception as e: 150 | logger.error(f"tts_priority priority_thread: {e}") 151 | tts_priority = threading.Thread(target=priority_thread, daemon=True) 152 | tts_priority.start() 153 | 154 | def interrupt_playback(self): 155 | """中断当前的语音播放""" 156 | logger.info("Interrupting current playback.") 157 | self.player.stop() 158 | 159 | def shutdown(self): 160 | """关闭所有资源,确保程序安全退出""" 161 | logger.info("Shutting down Robot...") 162 | self.stop_event.set() 163 | self.executor.shutdown(wait=True) 164 | self.recorder.stop_recording() 165 | self.player.shutdown() 166 | logger.info("Shutdown complete.") 167 | 168 | def start_recording_and_vad(self): 169 | # 开始监听语音流 170 | self.recorder.start_recording(self.audio_queue) 171 | logger.info("Started recording.") 172 | # vad 实时识别 173 | self._stream_vad() 174 | # tts优先级队列 175 | self._tts_priority() 176 | 177 | def _duplex(self): 178 | # 处理识别结果 179 | data = self.vad_queue.get() 180 | # 识别到vad开始 181 | if self.vad_start: 182 | self.speech.append(data) 183 | vad_status = data.get("vad_statue") 184 | # 空闲的时候,取出耗时任务进行播放 185 | if not self.task_queue.empty() and not self.vad_start and vad_status is None \ 186 | and not self.player.get_playing_status() and self.chat_lock is False: 187 | result = self.task_queue.get() 188 | future = self.executor.submit(self.speak_and_play, result.response) 189 | self.tts_queue.put(future) 190 | 191 | """ 语音唤醒 192 | if time.time() - self.start_time>=60: 193 | self.silence_status = True 194 | 195 | if self.silence_status: 196 | return 197 | """ 198 | if vad_status is None: 199 | return 200 | if "start" in vad_status: 201 | if self.player.get_playing_status() or self.chat_lock is True: # 正在播放,打断场景 202 | if self.INTERRUPT: 203 | self.chat_lock = False 204 | self.interrupt_playback() 205 | self.vad_start = True 206 | self.speech.append(data) 207 | else: 208 | return 209 | else: # 没有播放,正常 210 | self.vad_start = True 211 | self.speech.append(data) 212 | elif "end" in vad_status and len(self.speech) > 0: 213 | try: 214 | logger.debug(f"语音包的长度:{len(self.speech)}") 215 | self.vad_start = False 216 | voice_data = [d["voice"] for d in self.speech] 217 | text, tmpfile = self.asr.recognizer(voice_data) 218 | self.speech = [] 219 | except Exception as e: 220 | self.vad_start = False 221 | self.speech = [] 222 | logger.error(f"ASR识别出错: {e}") 223 | return 224 | if not text.strip(): 225 | logger.debug("识别结果为空,跳过处理。") 226 | return 227 | 228 | logger.debug(f"ASR识别结果: {text}") 229 | if self.callback: 230 | self.callback({"role": "user", "content": str(text)}) 231 | self.executor.submit(self.chat, text) 232 | return True 233 | 234 | def run(self): 235 | try: 236 | self.start_recording_and_vad() # 监听语音流 237 | while not self.stop_event.is_set(): 238 | self._duplex() # 双工处理 239 | except KeyboardInterrupt: 240 | logger.info("Received KeyboardInterrupt. Exiting...") 241 | finally: 242 | self.shutdown() 243 | 244 | def speak_and_play(self, text): 245 | if text is None or len(text)<=0: 246 | logger.info(f"无需tts转换,query为空,{text}") 247 | return None 248 | tts_file = self.tts.to_tts(text) 249 | if tts_file is None: 250 | logger.error(f"tts转换失败,{text}") 251 | return None 252 | logger.debug(f"TTS 文件生成完毕{self.chat_lock}") 253 | #if self.chat_lock is False: 254 | # return None 255 | # 开始播放 256 | # self.player.play(tts_file) 257 | #return True 258 | return tts_file 259 | 260 | def chat_tool(self, query): 261 | # 打印逐步生成的响应内容 262 | start = 0 263 | try: 264 | start_time = time.time() # 记录开始时间 265 | llm_responses = self.llm.response_call(self.dialogue.get_llm_dialogue(), functions_call=self.task_manager.get_functions()) 266 | except Exception as e: 267 | #self.chat_lock = False 268 | logger.error(f"LLM 处理出错 {query}: {e}") 269 | return [] 270 | 271 | tool_call_flag = False 272 | response_message = [] 273 | # tool call 参数 274 | function_name = None 275 | function_id = None 276 | function_arguments = "" 277 | content_arguments = "" 278 | for chunk in llm_responses: 279 | content, tools_call = chunk 280 | if content is not None and len(content)>0: 281 | if len(response_message)<=0 and content=="```": 282 | tool_call_flag = True 283 | if tools_call is not None: 284 | tool_call_flag = True 285 | if tools_call[0].id is not None: 286 | function_id = tools_call[0].id 287 | if tools_call[0].function.name is not None: 288 | function_name = tools_call[0].function.name 289 | if tools_call[0].function.arguments is not None: 290 | function_arguments += tools_call[0].function.arguments 291 | if content is not None and len(content) > 0: 292 | if tool_call_flag: 293 | content_arguments+=content 294 | else: 295 | response_message.append(content) 296 | end_time = time.time() # 记录结束时间 297 | logger.debug(f"大模型返回时间时间: {end_time - start_time} 秒, 生成token={content}") 298 | if is_segment(response_message): 299 | segment_text = "".join(response_message[start:]) 300 | print(f"分段文本: {segment_text}") 301 | # 为了保证语音的连贯,至少2个字才转tts 302 | if len(segment_text) <= max(2, start): 303 | continue 304 | future = self.executor.submit(self.speak_and_play, segment_text) 305 | self.tts_queue.put(future) 306 | # futures.append(future) 307 | start = len(response_message) 308 | 309 | if not tool_call_flag: 310 | if start < len(response_message): 311 | segment_text = "".join(response_message[start:]) 312 | future = self.executor.submit(self.speak_and_play, segment_text) 313 | self.tts_queue.put(future) 314 | else: 315 | # 处理函数调用 316 | if function_id is None: 317 | a = extract_json_from_string(content_arguments) 318 | if a is not None: 319 | content_arguments_json = json.loads(a) 320 | function_name = content_arguments_json["function_name"] 321 | function_arguments = json.dumps(content_arguments_json["args"], ensure_ascii=False) 322 | function_id = str(uuid.uuid4().hex) 323 | else: 324 | return [] 325 | function_arguments = json.loads(function_arguments) 326 | logger.info(f"function_name={function_name}, function_id={function_id}, function_arguments={function_arguments}") 327 | # 调用工具 328 | result = self.task_manager.tool_call(function_name, function_arguments) 329 | if result.action == Action.NOTFOUND: # = (0, "没有找到函数") 330 | logger.error(f"没有找到函数{function_name}") 331 | return [] 332 | elif result.action == Action.NONE: # = (1, "啥也不干") 333 | return [] 334 | elif result.action == Action.RESPONSE: # = (2, "直接回复") 335 | future = self.executor.submit(self.speak_and_play, result.response) 336 | self.tts_queue.put(future) 337 | return [result.response] 338 | elif result.action == Action.REQLLM: # = (3, "调用函数后再请求llm生成回复") 339 | # 添加工具内容 340 | self.dialogue.put(Message(role='assistant', 341 | tool_calls=[{"id": function_id, "function": {"arguments": json.dumps(function_arguments ,ensure_ascii=False), 342 | "name": function_name}, 343 | "type": 'function', "index": 0}])) 344 | 345 | self.dialogue.put(Message(role="tool", tool_call_id=function_id, content=result.result)) 346 | self.chat_tool(query) 347 | elif result.action == Action.ADDSYSTEM: # = (4, "添加系统prompt到对话中去") 348 | self.dialogue.put(Message(**result.result)) 349 | return [] 350 | elif result.action == Action.ADDSYSTEMSPEAK: # = (5, "添加系统prompt到对话中去&主动说话") 351 | self.dialogue.put(Message(role='assistant', 352 | tool_calls=[{"id": function_id, "function": { 353 | "arguments": json.dumps(function_arguments, ensure_ascii=False), 354 | "name": function_name}, 355 | "type": 'function', "index": 0}])) 356 | 357 | self.dialogue.put(Message(role="tool", tool_call_id=function_id, content=result.response)) 358 | self.dialogue.put(Message(**result.result)) 359 | self.dialogue.put(Message(role="user", content="ok")) 360 | return self.chat_tool(query) 361 | else: 362 | logger.error(f"not found action type: {result.action}") 363 | return response_message 364 | 365 | def chat(self, query): 366 | self.dialogue.put(Message(role="user", content=query)) 367 | response_message = [] 368 | # futures = [] 369 | start = 0 370 | self.chat_lock = True 371 | if self.start_task_mode: 372 | response_message = self.chat_tool(query) 373 | else: 374 | # 提交 LLM 任务 375 | try: 376 | start_time = time.time() # 记录开始时间 377 | llm_responses = self.llm.response(self.dialogue.get_llm_dialogue()) 378 | except Exception as e: 379 | self.chat_lock = False 380 | logger.error(f"LLM 处理出错 {query}: {e}") 381 | return None 382 | # 提交 TTS 任务到线程池 383 | for content in llm_responses: 384 | response_message.append(content) 385 | end_time = time.time() # 记录结束时间 386 | logger.debug(f"大模型返回时间时间: {end_time - start_time} 秒, 生成token={content}") 387 | if is_segment(response_message): 388 | segment_text = "".join(response_message[start:]) 389 | # 为了保证语音的连贯,至少2个字才转tts 390 | if len(segment_text)<=max(2, start): 391 | continue 392 | future = self.executor.submit(self.speak_and_play, segment_text) 393 | self.tts_queue.put(future) 394 | #futures.append(future) 395 | start = len(response_message) 396 | 397 | # 处理剩余的响应 398 | if start < len(response_message): 399 | segment_text = "".join(response_message[start:]) 400 | future = self.executor.submit(self.speak_and_play, segment_text) 401 | self.tts_queue.put(future) 402 | #futures.append(future) 403 | 404 | # 等待所有 TTS 任务完成 405 | """ 406 | for future in futures: 407 | try: 408 | playing = future.result(timeout=5) 409 | except TimeoutError: 410 | logger.error("TTS 任务超时") 411 | except Exception as e: 412 | logger.error(f"TTS 任务出错: {e}") 413 | """ 414 | self.chat_lock = False 415 | # 更新对话 416 | if self.callback: 417 | self.callback({"role": "assistant", "content": "".join(response_message)}) 418 | self.dialogue.put(Message(role="assistant", content="".join(response_message))) 419 | self.dialogue.dump_dialogue() 420 | logger.debug(json.dumps(self.dialogue.get_llm_dialogue(), indent=4, ensure_ascii=False)) 421 | return True 422 | 423 | 424 | if __name__ == "__main__": 425 | # Create the parser 426 | parser = argparse.ArgumentParser(description="百聆机器人") 427 | 428 | # Add arguments 429 | # parser.add_argument('h', type=str, help="配置文件", default="config.json") 430 | 431 | # Parse arguments 432 | args = parser.parse_args() 433 | config_path = "config.yaml" # args.h 434 | 435 | # 创建 Robot 实例并运行 436 | robot = Robot(config_path) 437 | robot.run() 438 | -------------------------------------------------------------------------------- /tts.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import subprocess 5 | import time 6 | import uuid 7 | import wave 8 | from abc import ABC, ABCMeta, abstractmethod 9 | from datetime import datetime 10 | import pyaudio 11 | from pydub import AudioSegment 12 | from gtts import gTTS 13 | import edge_tts 14 | import ChatTTS 15 | import torch 16 | import torchaudio 17 | import soundfile as sf 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class AbstractTTS(ABC): 23 | __metaclass__ = ABCMeta 24 | 25 | @abstractmethod 26 | def to_tts(self, text): 27 | pass 28 | 29 | 30 | class GTTS(AbstractTTS): 31 | def __init__(self, config): 32 | self.output_file = config.get("output_file") 33 | self.lang = config.get("lang") 34 | 35 | def _generate_filename(self, extension=".aiff"): 36 | return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}") 37 | 38 | def _log_execution_time(self, start_time): 39 | end_time = time.time() 40 | execution_time = end_time - start_time 41 | logger.debug(f"执行时间: {execution_time:.2f} 秒") 42 | 43 | def to_tts(self, text): 44 | tmpfile = self._generate_filename(".aiff") 45 | try: 46 | start_time = time.time() 47 | tts = gTTS(text=text, lang=self.lang) 48 | tts.save(tmpfile) 49 | self._log_execution_time(start_time) 50 | return tmpfile 51 | except Exception as e: 52 | logger.debug(f"生成TTS文件失败: {e}") 53 | return None 54 | 55 | 56 | class MacTTS(AbstractTTS): 57 | """ 58 | macOS 系统自带的TTS 59 | voice: say -v ? 可以打印所有语音 60 | """ 61 | 62 | def __init__(self, config): 63 | super().__init__() 64 | self.voice = config.get("voice") 65 | self.output_file = config.get("output_file") 66 | 67 | def _generate_filename(self, extension=".aiff"): 68 | return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}") 69 | 70 | def _log_execution_time(self, start_time): 71 | end_time = time.time() 72 | execution_time = end_time - start_time 73 | logger.debug(f"执行时间: {execution_time:.2f} 秒") 74 | 75 | def to_tts(self, phrase): 76 | logger.debug(f"正在转换的tts:{phrase}") 77 | tmpfile = self._generate_filename(".aiff") 78 | try: 79 | start_time = time.time() 80 | res = subprocess.run( 81 | ["say", "-v", self.voice, "-o", tmpfile, phrase], 82 | shell=False, 83 | universal_newlines=True, 84 | ) 85 | self._log_execution_time(start_time) 86 | if res.returncode == 0: 87 | return tmpfile 88 | else: 89 | logger.info("TTS 生成失败") 90 | return None 91 | except Exception as e: 92 | logger.info(f"执行TTS失败: {e}") 93 | return None 94 | 95 | 96 | class EdgeTTS(AbstractTTS): 97 | def __init__(self, config): 98 | self.output_file = config.get("output_file", "tmp/") 99 | self.voice = config.get("voice") 100 | 101 | def _generate_filename(self, extension=".wav"): 102 | return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}") 103 | 104 | def _log_execution_time(self, start_time): 105 | end_time = time.time() 106 | execution_time = end_time - start_time 107 | logger.debug(f"Execution Time: {execution_time:.2f} seconds") 108 | 109 | async def text_to_speak(self, text, output_file): 110 | print(f"正在转换的tts:{text}") 111 | communicate = edge_tts.Communicate(text, voice=self.voice) # Use your preferred voice 112 | print(f"转换的tts:{text}") 113 | await communicate.save(output_file) 114 | 115 | def to_tts(self, text): 116 | tmpfile = self._generate_filename(".wav") 117 | start_time = time.time() 118 | try: 119 | asyncio.run(self.text_to_speak(text, tmpfile)) 120 | self._log_execution_time(start_time) 121 | return tmpfile 122 | except Exception as e: 123 | logger.info(f"Failed to generate TTS file: {e}") 124 | return None 125 | 126 | 127 | class CHATTTS(AbstractTTS): 128 | def __init__(self, config): 129 | self.output_file = config.get("output_file", ".") 130 | self.chat = ChatTTS.Chat() 131 | self.chat.load(compile=False) # Set to True for better performance 132 | self.rand_spk = self.chat.sample_random_speaker() 133 | 134 | def _generate_filename(self, extension=".wav"): 135 | return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}") 136 | 137 | def _log_execution_time(self, start_time): 138 | end_time = time.time() 139 | execution_time = end_time - start_time 140 | logger.debug(f"Execution Time: {execution_time:.2f} seconds") 141 | 142 | def to_tts(self, text): 143 | tmpfile = self._generate_filename(".wav") 144 | start_time = time.time() 145 | try: 146 | params_infer_code = ChatTTS.Chat.InferCodeParams( 147 | spk_emb=self.rand_spk, # add sampled speaker 148 | temperature=.3, # using custom temperature 149 | top_P=0.7, # top P decode 150 | top_K=20, # top K decode 151 | ) 152 | params_refine_text = ChatTTS.Chat.RefineTextParams( 153 | prompt='[oral_2][laugh_0][break_6]', 154 | ) 155 | wavs = self.chat.infer( 156 | [text], 157 | params_refine_text=params_refine_text, 158 | params_infer_code=params_infer_code, 159 | ) 160 | try: 161 | torchaudio.save(tmpfile, torch.from_numpy(wavs[0]).unsqueeze(0), 24000) 162 | except: 163 | torchaudio.save(tmpfile, torch.from_numpy(wavs[0]), 24000) 164 | self._log_execution_time(start_time) 165 | return tmpfile 166 | except Exception as e: 167 | logger.error(f"Failed to generate TTS file: {e}") 168 | return None 169 | 170 | 171 | 172 | class KOKOROTTS(AbstractTTS): 173 | def __init__(self, config): 174 | from kokoro import KPipeline 175 | self.output_file = config.get("output_file", ".") 176 | self.lang = config.get("lang", "z") 177 | print(f"KOKOROTTS: lang: {self.lang}") 178 | self.pipeline = KPipeline(lang_code=self.lang) # <= make sure lang_code matches voice 179 | self.voice = config.get("voice", "zm_yunyang") 180 | 181 | def _generate_filename(self, extension=".wav"): 182 | return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}") 183 | 184 | def _log_execution_time(self, start_time): 185 | end_time = time.time() 186 | execution_time = end_time - start_time 187 | logger.debug(f"Execution Time: {execution_time:.2f} seconds") 188 | 189 | def to_tts(self, text): 190 | tmpfile = self._generate_filename(".wav") 191 | start_time = time.time() 192 | try: 193 | generator = self.pipeline( 194 | text, voice=self.voice, # <= change voice here 195 | speed=1, split_pattern=r'\n+' 196 | ) 197 | for i, (gs, ps, audio) in enumerate(generator): 198 | logger.debug(f"KOKOROTTS: i: {i}, gs:{gs}, ps:{ps}") # i => index 199 | sf.write(tmpfile, audio, 24000) # save each audio file 200 | self._log_execution_time(start_time) 201 | return tmpfile 202 | except Exception as e: 203 | logger.error(f"Failed to generate TTS file: {e}") 204 | return None 205 | 206 | 207 | 208 | def create_instance(class_name, *args, **kwargs): 209 | # 获取类对象 210 | cls = globals().get(class_name) 211 | if cls: 212 | # 创建并返回实例 213 | return cls(*args, **kwargs) 214 | else: 215 | raise ValueError(f"Class {class_name} not found") 216 | 217 | 218 | 219 | 220 | import sys 221 | sys.path.append('third_party/Matcha-TTS') 222 | import numpy as np 223 | from cosyvoice.cli.cosyvoice import CosyVoice2 224 | from cosyvoice.utils.file_utils import load_wav 225 | 226 | 227 | 228 | class CosyVoice2TTS: 229 | def __init__(self,config): 230 | """保持与KOKOROTTS完全相同的初始化接口""" 231 | # 硬编码参数(保持项目统一配置) 232 | self.model_path = "pretrained_models/CosyVoice2-0.5B" 233 | # self.ref_dir = "./voices" # 参考语音目录 234 | 235 | # 从config读取参数 236 | self.output_file = "./tmp" 237 | 238 | 239 | # 初始化引擎 240 | # self._load_reference() 241 | ref_path = 'your.wav' 242 | self.sample_rate = 16000 # 保持与KOKOROTTS相同采样率 243 | self.ref_audio = load_wav(ref_path, self.sample_rate) 244 | 245 | self._init_model() 246 | 247 | 248 | def _init_model(self): 249 | """模型初始化(对应KPipeline初始化)""" 250 | self.model = CosyVoice2( 251 | self.model_path, 252 | load_jit=True, 253 | load_trt=True, 254 | fp16=True 255 | ) 256 | 257 | 258 | for i in range(10): 259 | # 流式生成(禁用文本切割) 260 | generator = self.model.inference_zero_shot( 261 | "这是一个热启动文本,用于后续加速", # 直接传入完整文本 262 | prompt_text="your.wav文本", 263 | prompt_speech_16k=self.ref_audio, 264 | stream=True, 265 | ) 266 | for chunk in generator: 267 | audio = chunk['tts_speech'].numpy().T 268 | # sf.write(tmpfile, audio, 24000) 269 | continue 270 | 271 | 272 | def _generate_filename(self, extension=".wav"): 273 | """保持完全相同的文件名生成逻辑""" 274 | return os.path.join( 275 | self.output_file, 276 | f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}" 277 | ) 278 | 279 | def _log_execution_time(self, start_time): 280 | """完全相同的耗时记录方法""" 281 | execution_time = time.time() - start_time 282 | logger.debug(f"Execution Time: {execution_time:.2f} seconds") 283 | def _log_execution_time(self, start_time): 284 | """相同的耗时日志格式""" 285 | execution_time = time.time() - start_time 286 | logger.debug(f"Execution Time: {execution_time:.2f} seconds") 287 | def to_tts(self, text): 288 | """保持完全相同的接口规范""" 289 | tmpfile = self._generate_filename() 290 | start_time = time.time() 291 | 292 | try: 293 | # 流式生成(禁用文本切割) 294 | generator = self.model.inference_zero_shot( 295 | text, # 直接传入完整文本 296 | prompt_text="今天天气真是太好了,阳光灿烂,心情超级棒!但是,朋友最近的感情问题也让我心痛不已,好像世界末日一样,真的好为她难过哦!", 297 | prompt_speech_16k=self.ref_audio, 298 | stream=True, 299 | ) 300 | 301 | # 保存音频文件 302 | # audio_chunks = [] 303 | for chunk in generator: 304 | audio = chunk['tts_speech'].numpy().T 305 | # audio_chunks.append(audio) 306 | sf.write(tmpfile, audio, 24000) 307 | # 合并写入文件(对应sf.write) 308 | # full_audio = np.concatenate(audio_chunks) 309 | 310 | 311 | self._log_execution_time(start_time) 312 | return tmpfile 313 | except Exception as e: 314 | logger.error(f"Failed to generate TTS file: {str(e)}") 315 | if os.path.exists(tmpfile): 316 | os.remove(tmpfile) 317 | return None 318 | 319 | 320 | if __name__ == "__main__": 321 | tts = CosyVoice2TTS(config={}) 322 | audio_path = tts.to_tts("今天天气真好,我们一起去公园散步吧!") 323 | audio_path = tts.to_tts("今天天气真好,我们一起去公园散步吧!") 324 | audio_path = tts.to_tts("今天天气真好,我们一起去公园散步吧!") -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import json 3 | import re 4 | 5 | 6 | def load_prompt(prompt_path): 7 | with open(prompt_path, "r", encoding="utf-8") as file: 8 | prompt = file.read() 9 | return prompt.strip() 10 | 11 | 12 | def read_json_file(file_path): 13 | """读取 JSON 文件并返回内容""" 14 | with open(file_path, 'r', encoding='utf-8') as file: 15 | try: 16 | data = json.load(file) 17 | return data 18 | except json.JSONDecodeError as e: 19 | print(f"解析 JSON 时出错: {e}") 20 | return None 21 | 22 | def write_json_file(file_path, data): 23 | """将数据写入 JSON 文件""" 24 | with open(file_path, 'w', encoding='utf-8') as file: 25 | json.dump(data, file, ensure_ascii=False, indent=4) 26 | 27 | 28 | def read_config(config_path): 29 | with open(config_path, "r",encoding="utf-8") as file: 30 | config = yaml.safe_load(file) 31 | return config 32 | 33 | 34 | def is_segment(tokens): 35 | if tokens[-1] in (",", ".", "?", ",", "。", "?", "!", "!", ";", ";", ":", ":"): 36 | return True 37 | else: 38 | return False 39 | 40 | def is_interrupt(query: str): 41 | for interrupt_word in ("停一下", "听我说", "不要说了", "stop", "hold on", "excuse me"): 42 | if query.lower().find(interrupt_word)>=0: 43 | return True 44 | return False 45 | 46 | def extract_json_from_string(input_string): 47 | """提取字符串中的 JSON 部分""" 48 | pattern = r'(\{.*\})' 49 | match = re.search(pattern, input_string) 50 | if match: 51 | return match.group(1) # 返回提取的 JSON 字符串 52 | return None 53 | -------------------------------------------------------------------------------- /vad.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import wave 4 | from abc import ABC, abstractmethod 5 | import logging 6 | from datetime import datetime 7 | 8 | import numpy as np 9 | import torch 10 | from silero_vad import load_silero_vad, VADIterator 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class VAD(ABC): 16 | @abstractmethod 17 | def is_vad(self, data): 18 | pass 19 | 20 | def reset_states(self): 21 | pass 22 | 23 | 24 | class SileroVAD(VAD): 25 | def __init__(self, config): 26 | print("SileroVAD", config) 27 | self.model = load_silero_vad() 28 | self.sampling_rate = config.get("sampling_rate") 29 | self.threshold = config.get("threshold") 30 | self.min_silence_duration_ms = config.get("min_silence_duration_ms") 31 | self.vad_iterator = VADIterator(self.model, 32 | threshold=self.threshold, 33 | sampling_rate=self.sampling_rate, 34 | min_silence_duration_ms=self.min_silence_duration_ms) 35 | logger.debug(f"VAD Iterator initialized with model {self.model}") 36 | 37 | @staticmethod 38 | def int2float(sound): 39 | """ 40 | Convert int16 audio data to float32. 41 | """ 42 | sound = sound.astype(np.float32) / 32768.0 43 | return sound 44 | 45 | def is_vad(self, data): 46 | try: 47 | audio_int16 = np.frombuffer(data, dtype=np.int16) 48 | audio_float32 = self.int2float(audio_int16) 49 | vad_output = self.vad_iterator(torch.from_numpy(audio_float32)) 50 | if vad_output is not None: 51 | logger.debug(f"VAD output: {vad_output}") 52 | return vad_output 53 | except Exception as e: 54 | logger.error(f"Error in VAD processing: {e}") 55 | return None 56 | 57 | def reset_states(self): 58 | try: 59 | self.vad_iterator.reset_states() # Reset model states after each audio 60 | logger.debug("VAD states reset.") 61 | except Exception as e: 62 | logger.error(f"Error resetting VAD states: {e}") 63 | 64 | 65 | def create_instance(class_name, *args, **kwargs): 66 | # 获取类对象 67 | cls = globals().get(class_name) 68 | if cls: 69 | # 创建并返回实例 70 | return cls(*args, **kwargs) 71 | else: 72 | raise ValueError(f"Class {class_name} not found") 73 | --------------------------------------------------------------------------------