├── Demo ├── builder │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ └── 健康档案.pdf │ ├── indexer.py │ └── prompt │ │ └── __init__.py ├── kag_config.cfg ├── reasoner │ └── __init__.py ├── schema │ ├── KagDemo.schema │ └── __init__.py └── solver │ ├── __init__.py │ ├── prompt │ └── __init__.py │ └── query.py ├── JayChouProject ├── builder │ ├── data │ │ └── jay.txt │ ├── indexer.py │ └── prompt │ │ ├── __pycache__ │ │ ├── ner.cpython-311.pyc │ │ ├── std.cpython-311.pyc │ │ └── triple.cpython-311.pyc │ │ ├── ner.py │ │ ├── std.py │ │ └── triple.py ├── kag_config.cfg ├── reasoner │ └── __init__.py ├── schema │ └── JayChouProject.schema └── solver │ ├── prompt │ ├── __pycache__ │ │ ├── logic_form_plan.cpython-311.pyc │ │ ├── question_ner.cpython-311.pyc │ │ └── resp_generator.cpython-311.pyc │ ├── logic_form_plan.py │ ├── question_ner.py │ └── resp_generator.py │ └── query.py ├── KagV6Test ├── JayChouTest_KAG_V6 │ ├── builder │ │ ├── data │ │ │ └── jay.txt │ │ ├── indexer.py │ │ └── prompt │ │ │ ├── __init__.py │ │ │ ├── ner.py │ │ │ ├── std.py │ │ │ └── triple.py │ ├── schema │ │ └── JayChouTest.schema │ └── solver │ │ ├── prompt │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── resp_generator.cpython-311.pyc │ │ └── resp_generator.py │ │ └── query.py ├── README.md ├── XiYouJiTest_KAG_V6 │ ├── builder │ │ ├── data │ │ │ ├── incremental_inputs │ │ │ │ ├── file4.md │ │ │ │ ├── file5.md │ │ │ │ ├── file6.md │ │ │ │ ├── file7.md │ │ │ │ ├── file8.docx │ │ │ │ └── file9.pdf │ │ │ └── inputs │ │ │ │ ├── file1.md │ │ │ │ ├── file2.docx │ │ │ │ └── file3.pdf │ │ ├── docxIndexer.py │ │ ├── mdIndexer.py │ │ ├── mixIndexer.py │ │ └── pdfIndexer.py │ ├── config │ │ └── example_config.yaml │ ├── kag_config.yaml │ ├── reasoner │ │ └── __init__.py │ ├── schema │ │ ├── XiYouJiTest.schema │ │ └── __init__.py │ └── solver │ │ ├── prompt │ │ ├── __init__.py │ │ └── resp_generator.py │ │ └── query.py └── other │ ├── apiFile │ └── KagTest.apifox.json │ ├── config │ └── example_config.yaml │ └── docker │ └── docker-compose.yml ├── LICENSE ├── README.md ├── docker-compose.yml ├── example.cfg └── other ├── config └── example.cfg └── docker └── docker-compose.yml /Demo/builder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | Builder Dir. 14 | """ -------------------------------------------------------------------------------- /Demo/builder/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | Place the files to be used for building the index in this directory. 14 | """ 15 | -------------------------------------------------------------------------------- /Demo/builder/data/健康档案.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/Demo/builder/data/健康档案.pdf -------------------------------------------------------------------------------- /Demo/builder/indexer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from kag.builder.component.reader import DocxReader, PDFReader, MarkDownReader 3 | from kag.builder.component.splitter import LengthSplitter, OutlineSplitter 4 | from knext.builder.builder_chain_abc import BuilderChainABC 5 | from kag.builder.component.extractor import KAGExtractor 6 | from kag.builder.component.vectorizer.batch_vectorizer import BatchVectorizer 7 | from kag.builder.component.writer import KGWriter 8 | from kag.solver.logic.solver_pipeline import SolverPipeline 9 | import logging 10 | from kag.common.env import init_kag_config 11 | 12 | 13 | 14 | 15 | # 获取当前脚本所在的目录路径 16 | file_path = os.path.dirname(__file__) 17 | 18 | # 定义文件后缀与对应读取器类的映射,用于动态选择合适的读取器 19 | suffix_mapping = { 20 | "docx": DocxReader, 21 | "pdf": PDFReader, 22 | "md": MarkDownReader 23 | } 24 | 25 | 26 | class KagDemoBuildChain(BuilderChainABC): 27 | def build(self, **kwargs): 28 | # 获取文件路径 29 | file_path = kwargs.get("file_path", "a.docx") 30 | # 根据文件后缀从suffix_mapping中选择合适的读取器 31 | suffix = file_path.split(".")[-1] 32 | reader = suffix_mapping[suffix]() 33 | # 若未实现某种格式的读取器,则抛出 NotImplementedError 34 | if reader is None: 35 | raise NotImplementedError 36 | 37 | # 项目ID 38 | project_id = int(os.getenv("KAG_PROJECT_ID")) 39 | # 将文本切分为长度为 2000 的块 40 | splitter = LengthSplitter(split_length=2000) 41 | # 将文本数据转化为向量 42 | vectorizer = BatchVectorizer() 43 | # 提取知识的核心组件,使用环境变量 KAG_PROJECT_ID 指定项目 ID 44 | extractor = KAGExtractor(project_id=project_id) 45 | # 将最终的知识数据写入输出 46 | writer = KGWriter() 47 | # 各组件按顺序串联成构建chain 48 | chain = reader >> splitter >> extractor >> vectorizer >> writer 49 | # 返回chain 50 | return chain 51 | 52 | 53 | # 构建知识图谱 54 | def buildKG(test_file, **kwargs): 55 | # 创建 KagDemoBuildChain 对象,传入文件路径 56 | chain = KagDemoBuildChain(file_path=test_file) 57 | # 调用 chain.invoke 方法执行构建,设置并发任务数量为 10 58 | chain.invoke(test_file, max_workers=10) 59 | 60 | 61 | if __name__ == "__main__": 62 | # 文件输入 63 | test_file = os.path.join(file_path, "./data/健康档案.pdf") 64 | # 构建知识图谱 65 | buildKG(test_file) 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /Demo/builder/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | Place the prompts to be used for building the index in this directory. 14 | """ -------------------------------------------------------------------------------- /Demo/kag_config.cfg: -------------------------------------------------------------------------------- 1 | [project] 2 | namespace = KagDemo 3 | host_addr = http://localhost:8887 4 | id = 2 5 | 6 | [vectorizer] 7 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 8 | model = text-embedding-ada-002 9 | api_key = sk-zL8dD8hTwv0d5GRlYC0eUPH8QvWxnXIR6XTWsx7WKzoSO1uo 10 | base_url = https://yunwu.ai/v1 11 | vector_dimensions = 1536 12 | 13 | [llm] 14 | client_type = maas 15 | base_url = https://yunwu.ai/v1 16 | api_key = sk-zL8dD8hTwv0d5GRlYC0eUPH8QvWxnXIR6XTWsx7WKzoSO1uo 17 | model = gpt-4o-mini 18 | 19 | [prompt] 20 | biz_scene = default 21 | language = zh 22 | 23 | [log] 24 | level = INFO 25 | 26 | -------------------------------------------------------------------------------- /Demo/reasoner/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | Place the DSL file for graph reasoning in this directory. 14 | For example: 15 | 16 | ```company.dsl 17 | MATCH (s:DEFAULT.Company) 18 | RETURN s.id, s.address 19 | ``` 20 | """ -------------------------------------------------------------------------------- /Demo/schema/KagDemo.schema: -------------------------------------------------------------------------------- 1 | namespace KagDemo 2 | 3 | Chunk(文本块): EntityType 4 | properties: 5 | content(内容): Text 6 | index: TextAndVector 7 | 8 | ArtificialObject(人造物体): EntityType 9 | properties: 10 | desc(描述): Text 11 | index: TextAndVector 12 | semanticType(语义类型): Text 13 | index: Text 14 | 15 | Astronomy(天文学): EntityType 16 | properties: 17 | desc(描述): Text 18 | index: TextAndVector 19 | semanticType(语义类型): Text 20 | index: Text 21 | 22 | Building(建筑): EntityType 23 | properties: 24 | desc(描述): Text 25 | index: TextAndVector 26 | semanticType(语义类型): Text 27 | index: Text 28 | 29 | Creature(生物): EntityType 30 | properties: 31 | desc(描述): Text 32 | index: TextAndVector 33 | semanticType(语义类型): Text 34 | index: Text 35 | 36 | Concept(概念): EntityType 37 | properties: 38 | desc(描述): Text 39 | index: TextAndVector 40 | semanticType(语义类型): Text 41 | index: Text 42 | 43 | Date(日期): EntityType 44 | properties: 45 | desc(描述): Text 46 | index: TextAndVector 47 | semanticType(语义类型): Text 48 | index: Text 49 | 50 | GeographicLocation(地理位置): EntityType 51 | properties: 52 | desc(描述): Text 53 | index: TextAndVector 54 | semanticType(语义类型): Text 55 | index: Text 56 | 57 | Keyword(关键词): EntityType 58 | properties: 59 | desc(描述): Text 60 | index: TextAndVector 61 | semanticType(语义类型): Text 62 | index: Text 63 | 64 | Medicine(药物): EntityType 65 | properties: 66 | desc(描述): Text 67 | index: TextAndVector 68 | semanticType(语义类型): Text 69 | index: Text 70 | 71 | 72 | NaturalScience(自然科学): EntityType 73 | properties: 74 | desc(描述): Text 75 | index: TextAndVector 76 | semanticType(语义类型): Text 77 | index: Text 78 | 79 | Organization(组织机构): EntityType 80 | properties: 81 | desc(描述): Text 82 | index: TextAndVector 83 | semanticType(语义类型): Text 84 | index: Text 85 | 86 | Person(人物): EntityType 87 | properties: 88 | desc(描述): Text 89 | index: TextAndVector 90 | semanticType(语义类型): Text 91 | index: Text 92 | 93 | Transport(运输): EntityType 94 | properties: 95 | desc(描述): Text 96 | index: TextAndVector 97 | semanticType(语义类型): Text 98 | index: Text 99 | 100 | Works(作品): EntityType 101 | properties: 102 | desc(描述): Text 103 | index: TextAndVector 104 | semanticType(语义类型): Text 105 | index: Text 106 | 107 | Event(事件): EntityType 108 | properties: 109 | desc(描述): Text 110 | index: TextAndVector 111 | semanticType(语义类型): Text 112 | index: Text 113 | 114 | Others(其他): EntityType 115 | properties: 116 | desc(描述): Text 117 | index: TextAndVector 118 | semanticType(语义类型): Text 119 | index: Text 120 | 121 | SemanticConcept(语义概念): EntityType 122 | properties: 123 | desc(内容): Text 124 | index: Text -------------------------------------------------------------------------------- /Demo/schema/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | {{namespace}}.schema: 14 | The MarkLang file for the schema of this project. 15 | You can execute `kag schema commit` to commit your schema to SPG server. 16 | 17 | 18 | """ -------------------------------------------------------------------------------- /Demo/solver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/Demo/solver/__init__.py -------------------------------------------------------------------------------- /Demo/solver/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | Place the prompts to be used for solving problems in this directory. 14 | """ -------------------------------------------------------------------------------- /Demo/solver/query.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from kag.common.env import init_kag_config 5 | from kag.solver.logic.solver_pipeline import SolverPipeline 6 | 7 | 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class KagDemo: 13 | 14 | def __init__(self): 15 | pass 16 | 17 | def qa(self, query): 18 | # 创建一个 SolverPipeline 实例,负责查询的逻辑处理 19 | resp = SolverPipeline() 20 | answer, trace_log = resp.run(query) 21 | return answer,trace_log 22 | 23 | if __name__ == "__main__": 24 | demo = KagDemo() 25 | query = "张三九的基本信息" 26 | answer,trace_log = demo.qa(query) 27 | 28 | print(f"answer:{answer}\ntraceLog:{trace_log}") 29 | -------------------------------------------------------------------------------- /JayChouProject/builder/data/jay.txt: -------------------------------------------------------------------------------- 1 | 周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。 2 | 2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2003年,成为美国《时代周刊》封面人物;同年,发行音乐专辑《叶惠美》,该专辑获得第15届台湾金曲奖最佳流行音乐演唱专辑奖。2004年,发行音乐专辑《七里香》,该专辑在亚洲的首月销量达到300万张;同年,获得世界音乐大奖中国区最畅销艺人奖。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。 3 | 2007年,自编自导爱情电影《不能说的秘密》,同年,成立杰威尔音乐有限公司。2007年,凭借歌曲《青花瓷》获得第19届台湾金曲奖最佳作曲人奖。2007年,入选美国CNN“25位亚洲最具影响力人物”;同年,凭借专辑《魔杰座》获得第20届台湾金曲奖最佳国语男歌手奖。2010年,入选美国《Fast Company》评出的“全球百大创意人物”。2011年,凭借专辑《跨时代》获得第22届台湾金曲奖最佳国语男歌手奖。2012年,登上福布斯中国名人榜榜首。2014年,发行个人首张数字音乐专辑《哎呦,不错哦》。2023年,凭借专辑《最伟大的作品》成为首位获得国际唱片业协会“全球畅销专辑榜”冠军的华语歌手。 -------------------------------------------------------------------------------- /JayChouProject/builder/indexer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from kag.builder.component.reader import DocxReader, PDFReader, MarkDownReader, CSVReader, TXTReader, JSONReader 4 | from kag.builder.component.splitter import LengthSplitter 5 | from knext.builder.builder_chain_abc import BuilderChainABC 6 | from kag.builder.component.extractor import KAGExtractor 7 | from kag.builder.component.vectorizer.batch_vectorizer import BatchVectorizer 8 | from kag.builder.component.writer import KGWriter 9 | 10 | 11 | 12 | 13 | # 初始化日志 14 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 15 | 16 | 17 | # 获取当前脚本所在的目录路径 18 | file_path = os.path.dirname(__file__) 19 | 20 | # 定义文件后缀与对应读取器类的映射,用于动态选择合适的读取器 21 | suffix_mapping = { 22 | "docx": DocxReader, 23 | "pdf": PDFReader, 24 | "md": MarkDownReader, 25 | "csv": CSVReader, 26 | "txt": TXTReader, 27 | "json": JSONReader 28 | } 29 | 30 | class KagDemoBuildChain(BuilderChainABC): 31 | def build(self, **kwargs): 32 | try: 33 | # 1、定义文件读取 读取文件内容 34 | file_path = kwargs.get("file_path") 35 | if not os.path.exists(file_path): 36 | logging.error(f"File not found: {file_path}") 37 | raise FileNotFoundError(f"The file {file_path} does not exist.") 38 | 39 | suffix = file_path.split(".")[-1] 40 | reader_class = suffix_mapping.get(suffix) 41 | if reader_class is None: 42 | logging.error(f"Unsupported file format: {suffix}") 43 | raise NotImplementedError(f"No reader implemented for file type: {suffix}") 44 | 45 | reader = reader_class() 46 | 47 | # 2、定义文本分割 切分为chunk 48 | project_id = os.getenv("KAG_PROJECT_ID") 49 | if project_id is None: 50 | logging.error("Environment variable KAG_PROJECT_ID is not set.") 51 | raise EnvironmentError("Missing environment variable: KAG_PROJECT_ID") 52 | 53 | project_id = int(project_id) 54 | splitter = LengthSplitter(split_length=800, window_length=100) 55 | 56 | # 3、定义知识提取 提取知识 57 | extractor = KAGExtractor(project_id=project_id) 58 | 59 | # 4、定义批量生成文本向量表示 将文本转化为向量 60 | vectorizer = BatchVectorizer() 61 | 62 | # 5、定义知识写入 将最终的知识数据写入向量数据库 63 | writer = KGWriter() 64 | 65 | # 6、将定义的各组件按顺序串联成构建索引的chain并返回 66 | chain = reader >> splitter >> extractor >> vectorizer >> writer 67 | return chain 68 | except Exception as e: 69 | logging.error(f"An error occurred during the build process: {e}") 70 | raise 71 | 72 | # 构建知识图谱 73 | def buildKG(test_file, **kwargs): 74 | try: 75 | if not os.path.exists(test_file): 76 | logging.error(f"File not found: {test_file}") 77 | raise FileNotFoundError(f"The file {test_file} does not exist.") 78 | 79 | # 创建 KagDemoBuildChain 对象,传入文件路径 80 | chain = KagDemoBuildChain(file_path=test_file) 81 | 82 | # 调用 chain.invoke 方法执行构建,设置并发任务数量为 10 83 | chain.invoke(test_file, max_workers=10) 84 | except FileNotFoundError as e: 85 | logging.error(f"File error: {e}") 86 | except Exception as e: 87 | logging.error(f"An unexpected error occurred during the knowledge graph build process: {e}") 88 | 89 | if __name__ == "__main__": 90 | try: 91 | # 文件输入 92 | test_file = os.path.join(file_path, "./data/jay.txt") 93 | 94 | # 构建知识图谱 95 | buildKG(test_file) 96 | except Exception as e: 97 | logging.critical(f"Critical error: {e}") 98 | -------------------------------------------------------------------------------- /JayChouProject/builder/prompt/__pycache__/ner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/JayChouProject/builder/prompt/__pycache__/ner.cpython-311.pyc -------------------------------------------------------------------------------- /JayChouProject/builder/prompt/__pycache__/std.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/JayChouProject/builder/prompt/__pycache__/std.cpython-311.pyc -------------------------------------------------------------------------------- /JayChouProject/builder/prompt/__pycache__/triple.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/JayChouProject/builder/prompt/__pycache__/triple.cpython-311.pyc -------------------------------------------------------------------------------- /JayChouProject/builder/prompt/ner.py: -------------------------------------------------------------------------------- 1 | # NER:命名实体识别(Named Entity Recognition,简称 NER)是自然语言处理(NLP)领域中的一项关键任务 2 | # 它的目标是从文本中识别和分类具有特定意义的实体,例如人名、地名、组织名、日期、时间、货币金额等 3 | # 自定义ner的prompt 4 | # 源码在 KAG/kag/builder/prompt/default 5 | 6 | import json 7 | from string import Template 8 | from typing import List, Optional 9 | 10 | from kag.common.base.prompt_op import PromptOp 11 | from knext.schema.client import SchemaClient 12 | 13 | 14 | class OpenIENERPrompt(PromptOp): 15 | 16 | template_zh = """ 17 | { 18 | "instruction": "你是命名实体识别的专家。请从输入中提取与模式定义匹配的实体。如果不存在该类型的实体,请返回一个空列表。请以JSON字符串格式回应。你可以参照example进行抽取。", 19 | "schema": $schema, 20 | "example": [ 21 | { 22 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 23 | "output": [ 24 | {"entity": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 25 | {"entity": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 26 | {"entity": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 27 | {"entity": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 28 | {"entity": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 29 | {"entity": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 30 | {"entity": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 31 | {"entity": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 32 | {"entity": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 33 | {"entity": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 34 | ] 35 | } 36 | ], 37 | "input": "$input" 38 | } 39 | """ 40 | 41 | 42 | template_en = template_zh 43 | 44 | def __init__( 45 | self, language: Optional[str] = "en", **kwargs 46 | ): 47 | super().__init__(language, **kwargs) 48 | self.schema = SchemaClient(project_id=self.project_id).extract_types() 49 | self.template = Template(self.template).safe_substitute(schema=self.schema) 50 | 51 | @property 52 | def template_variables(self) -> List[str]: 53 | return ["input"] 54 | 55 | def parse_response(self, response: str, **kwargs): 56 | rsp = response 57 | if isinstance(rsp, str): 58 | rsp = json.loads(rsp) 59 | if isinstance(rsp, dict) and "output" in rsp: 60 | rsp = rsp["output"] 61 | if isinstance(rsp, dict) and "named_entities" in rsp: 62 | entities = rsp["named_entities"] 63 | else: 64 | entities = rsp 65 | 66 | return entities 67 | -------------------------------------------------------------------------------- /JayChouProject/builder/prompt/std.py: -------------------------------------------------------------------------------- 1 | # Entity Standardization(实体标准化) 2 | # Entity Standardization(实体标准化)是将提取的实体规范化、统一化的过程。这一过程非常重要,因为同一实体可能在不同的上下文中以多种形式出现 3 | # 源码在 KAG/kag/builder/prompt/default 4 | 5 | 6 | import json 7 | from typing import Optional, List 8 | 9 | from kag.common.base.prompt_op import PromptOp 10 | 11 | 12 | class OpenIEEntitystandardizationdPrompt(PromptOp): 13 | 14 | template_zh = """ 15 | { 16 | "instruction": "input字段包含用户提供的上下文。命名实体字段包含从上下文中提取的命名实体,这些可能是含义不明的缩写、别名或俚语。为了消除歧义,请尝试根据上下文和您自己的知识提供这些实体的官方名称。请注意,具有相同含义的实体只能有一个官方名称。请按照提供的示例中的输出字段格式,以单个JSONArray字符串形式回复,无需任何解释。", 17 | "example": { 18 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 19 | "named_entities": [ 20 | {"entity": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 21 | {"entity": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 22 | {"entity": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 23 | {"entity": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 24 | {"entity": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 25 | {"entity": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 26 | {"entity": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 27 | {"entity": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 28 | {"entity": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 29 | {"entity": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 30 | ], 31 | "output": [ 32 | {"entity": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。","official_name": "Jay Chou"}, 33 | {"entity": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。","official_name": "音乐从业者"}, 34 | {"entity": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。","official_name": "1979-01-18"}, 35 | {"entity": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。","official_name": "台湾新北"}, 36 | {"entity": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。","official_name": "福建永春"}, 37 | {"entity": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。","official_name": "淡江高级中学"}, 38 | {"entity": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。","official_name": "专辑Jay"}, 39 | {"entity": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。","official_name": "头文字D"}, 40 | {"entity": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。","official_name": "香港电影金像奖"}, 41 | {"entity": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。","official_name": "台湾电影金马奖"}, 42 | ] 43 | }, 44 | "input": $input, 45 | "named_entities": $named_entities, 46 | } 47 | """ 48 | 49 | template_en = template_zh 50 | 51 | def __init__(self, language: Optional[str] = "en"): 52 | super().__init__(language) 53 | 54 | @property 55 | def template_variables(self) -> List[str]: 56 | return ["input", "named_entities"] 57 | 58 | def parse_response(self, response: str, **kwargs): 59 | 60 | rsp = response 61 | if isinstance(rsp, str): 62 | rsp = json.loads(rsp) 63 | if isinstance(rsp, dict) and "output" in rsp: 64 | rsp = rsp["output"] 65 | if isinstance(rsp, dict) and "named_entities" in rsp: 66 | standardized_entity = rsp["named_entities"] 67 | else: 68 | standardized_entity = rsp 69 | entities_with_offical_name = set() 70 | merged = [] 71 | entities = kwargs.get("named_entities", []) 72 | for entity in standardized_entity: 73 | merged.append(entity) 74 | entities_with_offical_name.add(entity["entity"]) 75 | # in case llm ignores some entities 76 | for entity in entities: 77 | if entity["entity"] not in entities_with_offical_name: 78 | entity["official_name"] = entity["entity"] 79 | merged.append(entity) 80 | return merged 81 | -------------------------------------------------------------------------------- /JayChouProject/builder/prompt/triple.py: -------------------------------------------------------------------------------- 1 | # OpenIE(Open Information Extraction) 2 | # OpenIE(开放信息抽取)是一种从自然语言文本中自动提取语义关系的方法 3 | # 它不需要依赖预定义的关系模式(如数据库模式),可以直接从非结构化的自然语言中提取出主语-谓语-宾语(SPO)三元组,或更复杂的多元组信息 4 | # 输入文本:"张三在北京参加了人工智能大会。" 5 | # 抽取结果:(张三, 参加, 人工智能大会) 6 | # 或者:(张三, 在北京参加, 人工智能大会) 7 | 8 | # 源码在 KAG/kag/builder/prompt/default 9 | 10 | 11 | 12 | 13 | import json 14 | from typing import Optional, List 15 | 16 | from kag.common.base.prompt_op import PromptOp 17 | 18 | 19 | class OpenIETriplePrompt(PromptOp): 20 | 21 | template_zh = """ 22 | { 23 | "instruction": "您是一位专门从事开放信息提取(OpenIE)的专家。请从input字段的文本中提取任何可能的关系(包括主语、谓语、宾语),并按照JSON格式列出它们,须遵循example字段的示例格式。请注意以下要求:1. 每个三元组应至少包含entity_list实体列表中的一个,但最好是两个命名实体。2. 明确地将代词解析为特定名称,以保持清晰度。", 24 | "entity_list": $entity_list, 25 | "input": "$input", 26 | "example": { 27 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 28 | "entity_list": [ 29 | {"entity": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 30 | {"entity": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 31 | {"entity": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 32 | {"entity": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 33 | {"entity": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 34 | {"entity": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 35 | {"entity": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 36 | {"entity": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 37 | {"entity": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 38 | {"entity": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 39 | ], 40 | "output":[ 41 | ["周杰伦", "出生于", "1979年1月18日"], 42 | ["周杰伦", "毕业于", "淡江中学"], 43 | ["周杰伦", "出生在", "台湾省新北市"], 44 | ["周杰伦", "发行", "专辑《Jay》"], 45 | ["周杰伦", "主演", "《头文字D》"], 46 | ["周杰伦", "获得", "金像奖"], 47 | ] 48 | } 49 | } 50 | """ 51 | 52 | template_en = template_zh 53 | 54 | def __init__(self, language: Optional[str] = "en"): 55 | super().__init__(language) 56 | 57 | @property 58 | def template_variables(self) -> List[str]: 59 | return ["entity_list", "input"] 60 | 61 | def parse_response(self, response: str, **kwargs): 62 | rsp = response 63 | if isinstance(rsp, str): 64 | rsp = json.loads(rsp) 65 | if isinstance(rsp, dict) and "output" in rsp: 66 | rsp = rsp["output"] 67 | if isinstance(rsp, dict) and "triples" in rsp: 68 | triples = rsp["triples"] 69 | else: 70 | triples = rsp 71 | 72 | standardized_triples = [] 73 | for triple in triples: 74 | if isinstance(triple, list): 75 | standardized_triples.append(triple) 76 | elif isinstance(triple, dict): 77 | s = triple.get("subject") 78 | p = triple.get("predicate") 79 | o = triple.get("object") 80 | if s and p and o: 81 | standardized_triples.append([s, p, o]) 82 | 83 | return standardized_triples 84 | -------------------------------------------------------------------------------- /JayChouProject/kag_config.cfg: -------------------------------------------------------------------------------- 1 | [project] 2 | namespace = JayChouProject 3 | host_addr = http://localhost:8887 4 | id = 5 5 | 6 | [vectorizer] 7 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 8 | model = text-embedding-3-small 9 | api_key = sk-DK45tcPWxP9azYfMTXAltpR3vTxdZcG205zdLndwK9CoAUe0 10 | base_url = https://yunwu.ai/v1 11 | vector_dimensions = 1536 12 | 13 | [llm] 14 | client_type = maas 15 | base_url = https://yunwu.ai/v1 16 | api_key = sk-DK45tcPWxP9azYfMTXAltpR3vTxdZcG205zdLndwK9CoAUe0 17 | model = gpt-4o-mini 18 | 19 | [prompt] 20 | biz_scene = default 21 | language = zh 22 | 23 | [log] 24 | level = INFO 25 | 26 | -------------------------------------------------------------------------------- /JayChouProject/reasoner/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 OpenSPG Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 4 | # in compliance with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. 11 | 12 | """ 13 | Place the DSL file for graph reasoning in this directory. 14 | For example: 15 | 16 | ```company.dsl 17 | MATCH (s:DEFAULT.Company) 18 | RETURN s.id, s.address 19 | ``` 20 | """ -------------------------------------------------------------------------------- /JayChouProject/schema/JayChouProject.schema: -------------------------------------------------------------------------------- 1 | namespace JayChouProject 2 | 3 | Chunk(文本块): EntityType 4 | properties: 5 | content(内容): Text 6 | index: TextAndVector 7 | 8 | Date(日期): EntityType 9 | properties: 10 | desc(描述): Text 11 | index: TextAndVector 12 | semanticType(语义类型): Text 13 | index: Text 14 | 15 | GeographicLocation(地理位置): EntityType 16 | properties: 17 | desc(描述): Text 18 | index: TextAndVector 19 | semanticType(语义类型): Text 20 | index: Text 21 | 22 | Organization(组织机构): EntityType 23 | properties: 24 | desc(描述): Text 25 | index: TextAndVector 26 | semanticType(语义类型): Text 27 | index: Text 28 | 29 | Person(人物): EntityType 30 | properties: 31 | desc(描述): Text 32 | index: TextAndVector 33 | semanticType(语义类型): Text 34 | index: Text 35 | 36 | Works(作品): EntityType 37 | properties: 38 | desc(描述): Text 39 | index: TextAndVector 40 | semanticType(语义类型): Text 41 | index: Text 42 | 43 | Albums(专辑): EntityType 44 | properties: 45 | desc(描述): Text 46 | index: TextAndVector 47 | semanticType(语义类型): Text 48 | index: Text 49 | 50 | Roles(角色): EntityType 51 | properties: 52 | desc(描述): Text 53 | index: TextAndVector 54 | semanticType(语义类型): Text 55 | index: Text 56 | 57 | Awards(获奖): EntityType 58 | properties: 59 | desc(描述): Text 60 | index: TextAndVector 61 | semanticType(语义类型): Text 62 | index: Text 63 | 64 | Others(其他): EntityType 65 | properties: 66 | desc(描述): Text 67 | index: TextAndVector 68 | semanticType(语义类型): Text 69 | index: Text 70 | -------------------------------------------------------------------------------- /JayChouProject/solver/prompt/__pycache__/logic_form_plan.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/JayChouProject/solver/prompt/__pycache__/logic_form_plan.cpython-311.pyc -------------------------------------------------------------------------------- /JayChouProject/solver/prompt/__pycache__/question_ner.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/JayChouProject/solver/prompt/__pycache__/question_ner.cpython-311.pyc -------------------------------------------------------------------------------- /JayChouProject/solver/prompt/__pycache__/resp_generator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/JayChouProject/solver/prompt/__pycache__/resp_generator.cpython-311.pyc -------------------------------------------------------------------------------- /JayChouProject/solver/prompt/logic_form_plan.py: -------------------------------------------------------------------------------- 1 | # 实现了将自然语言转换成logic form 2 | # 源码在 KAG/kag/solver/prompt/default 3 | 4 | 5 | import logging 6 | import re 7 | from string import Template 8 | from typing import List 9 | 10 | from kag.common.base.prompt_op import PromptOp 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | from kag.common.base.prompt_op import PromptOp 15 | 16 | 17 | class LogicFormPlanPrompt(PromptOp): 18 | instruct_zh = """"instruction": "", 19 | "function_description": "functionName为算子名;基本格式为 functionName(arg_name1=arg_value1,[args_name2=arg_value2, args_name3=arg_value3]),括号中为参数,被[]包含的参数为可选参数,未被[]包含的为必选参数", 20 | "function": [ 21 | { 22 | "functionName": "get_spo", 23 | "function_declaration": "get_spo(s=s_alias:entity_type[entity_name], p=p_alias:edge_type, o=o_alias:entity_type[entity_name], p.edge_type=value)", 24 | "description": "查找spo信息,s代表主体,o代表客体,表示为变量名:实体类型[实体名称],实体名称作为可选参数,当有明确的查询实体时需要给出;p代表谓词,即关系或属性,表示为变量名:边类型或属性类型;这里为每个变量都分配一个变量名,作为后续提及时的指代;注意,s、p、o不能在同一表达式中反复多次出现;当变量为前文指代的变量名是,变量名必须和指代的变量名一致,且只需给出变量名,实体类型仅在首次引入时给定" 25 | }, 26 | { 27 | "functionName": "count", 28 | "function_declaration": "count_alias=count(alias)", 29 | "description": "统计节点个数,参数为指定待统计的节点集合,只能是get_spo中出现的变量名;count_alias作为变量名表示计算结果,只能是int类型,变量名可作为下文的指代" 30 | }, 31 | { 32 | "functionName": "sum", 33 | "function_declaration": "sum(alias, num1, num2, ...)->sum_alias", 34 | "description": "数据求和,参数为指定待求和的集合,可以是数字也可以是前文中出现的变量名,其内容只能是数值类型;sum_alias作为变量名表示计算结果,只能是数值类型,变量名可作为下文的指代" 35 | }, 36 | { 37 | "functionName": "sort", 38 | "function_declaration": "sort(set=alias, orderby=o_alias or count_alias or sum_alias, direction=min or max, limit=N)", 39 | "description": "对节点集合排序,set指定待排序的节点集合,只能是get_spo中出现的变量名;orderby指定排序的依据,为节点的关系或属性名称,若是前文提及过的,则用别名指代;direction指定排序的方向,只能是min(正序)或max(倒序)排列;limit为输出个数限制,为int类型;可作为最后的输出结果" 40 | }, 41 | { 42 | "functionName": "get", 43 | "function_declaration": "get(alias)", 44 | "description": "返回指定的别名代表的信息,可以是实体、关系路径或get_spo中获取到的属性值;可作为最后的输出结果" 45 | } 46 | ], 47 | """ 48 | default_case_zh = """"cases": [ 49 | { 50 | "Action": "吴京是谁", 51 | "answer": "Step1:查询吴京\nAction1:get_spo(s=s1:公众人物[吴京], p=p1, o=o1)\nOutput:输出s1\nAction2:get(s1)" 52 | }, 53 | { 54 | "query": "30+6加上华为创始人在2024年的年龄是多少", 55 | "answer": "Step1:30+6 等于多少?\nAction1:sum(30,6)->sum1\nStep2:华为创始人是谁?\nAction2:get_spo(s=s2:企业[华为],p=p2:创始人,o=o2)\nStep3:华为创始人出生在什么年份?\nAction3:get_spo(s=o2,p=p3:出生年份,o=o3)\nStep4:华为创始人在2024年的年龄是多少?\nAction4:sum(2024,-o3)->sum4\nStep5:30+6的结果与华为创始人在2024年的年龄相加是多少?\nAction5:sum(sum1,sum4)->sum5\nStep6:输出sum5\nAction6:get(sum5)" 56 | } 57 | ],""" 58 | 59 | template_zh = f""" 60 | {{ 61 | {instruct_zh} 62 | {default_case_zh} 63 | "output_format": "only output `Step`, `Action` and `Output` content", 64 | "query": "$question" 65 | }} 66 | """ 67 | 68 | template_en = template_zh 69 | 70 | def __init__(self, language: str): 71 | super().__init__(language) 72 | 73 | @property 74 | def template_variables(self) -> List[str]: 75 | return ["question"] 76 | 77 | 78 | def parse_response(self, response: str, **kwargs): 79 | try: 80 | logger.debug(f"logic form:{response}") 81 | _output_string = response.replace(":", ":") 82 | _output_string = response.strip() 83 | sub_querys = [] 84 | logic_forms = [] 85 | current_sub_query = '' 86 | for line in _output_string.split('\n'): 87 | if line.startswith('Step'): 88 | sub_querys_regex = re.search('Step\d+:(.*)', line) 89 | if sub_querys_regex is not None: 90 | sub_querys.append(sub_querys_regex.group(1)) 91 | current_sub_query = sub_querys_regex.group(1) 92 | elif line.startswith('Output'): 93 | sub_querys.append("output") 94 | elif line.startswith('Action'): 95 | logic_forms_regex = re.search('Action\d+:(.*)', line) 96 | if logic_forms_regex: 97 | logic_forms.append(logic_forms_regex.group(1)) 98 | if len(logic_forms) - len(sub_querys) == 1: 99 | sub_querys.append(current_sub_query) 100 | return sub_querys, logic_forms 101 | except Exception as e: 102 | logger.warning(f"{response} parse logic form faied {e}", exc_info=True) 103 | return [], [] 104 | -------------------------------------------------------------------------------- /JayChouProject/solver/prompt/question_ner.py: -------------------------------------------------------------------------------- 1 | # question_ner.py 对问题进行ner识别 作用范围 chunk检索 2 | # 源码在 KAG/kag/solver/prompt/default 3 | 4 | 5 | 6 | import json 7 | from string import Template 8 | from typing import List, Optional 9 | 10 | from kag.common.base.prompt_op import PromptOp 11 | from knext.schema.client import SchemaClient 12 | 13 | 14 | class QuestionNER(PromptOp): 15 | 16 | template_zh = """ 17 | { 18 | "instruction": "你是命名实体识别的专家。请从输入中提取与模式定义匹配的实体。如果不存在该类型的实体,请返回一个空列表。请以JSON字符串格式回应。你可以参照example进行抽取。", 19 | "schema": $schema, 20 | "example": [ 21 | { 22 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 23 | "output": [ 24 | {"entity": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 25 | {"entity": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 26 | {"entity": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 27 | {"entity": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 28 | {"entity": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 29 | {"entity": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 30 | {"entity": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 31 | {"entity": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 32 | {"entity": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 33 | {"entity": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 34 | ] 35 | } 36 | ], 37 | "input": "$input" 38 | } 39 | """ 40 | 41 | template_en = template_zh 42 | 43 | def __init__( 44 | self, language: Optional[str] = "en", **kwargs 45 | ): 46 | super().__init__(language, **kwargs) 47 | self.schema = SchemaClient(project_id=self.project_id).extract_types() 48 | self.template = Template(self.template).safe_substitute(schema=self.schema) 49 | 50 | @property 51 | def template_variables(self) -> List[str]: 52 | return ["input"] 53 | 54 | def parse_response(self, response: str, **kwargs): 55 | rsp = response 56 | if isinstance(rsp, str): 57 | rsp = json.loads(rsp) 58 | if isinstance(rsp, dict) and "output" in rsp: 59 | rsp = rsp["output"] 60 | if isinstance(rsp, dict) and "named_entities" in rsp: 61 | entities = rsp["named_entities"] 62 | else: 63 | entities = rsp 64 | 65 | return entities 66 | -------------------------------------------------------------------------------- /JayChouProject/solver/prompt/resp_generator.py: -------------------------------------------------------------------------------- 1 | # 根据已有信息生成最终答案 2 | # 源码在 KAG/kag/solver/prompt/default 3 | 4 | 5 | import re 6 | from string import Template 7 | from typing import List 8 | import logging 9 | 10 | from kag.common.base.prompt_op import PromptOp 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class RespGenerator(PromptOp): 16 | template_zh = "基于给定的引用信息完整回答问题。" \ 17 | "\n给定的引用信息:'$memory'\n问题:'$instruction'" 18 | template_en = template_zh 19 | 20 | def __init__(self, language: str): 21 | super().__init__(language) 22 | 23 | @property 24 | def template_variables(self) -> List[str]: 25 | return ["memory", "instruction"] 26 | 27 | def parse_response(self, response: str, **kwargs): 28 | logger.debug('推理器判别:{}'.format(response)) 29 | return response 30 | -------------------------------------------------------------------------------- /JayChouProject/solver/query.py: -------------------------------------------------------------------------------- 1 | import os 2 | from kag.common.env import init_kag_config 3 | from kag.solver.logic.solver_pipeline import SolverPipeline 4 | 5 | 6 | 7 | # 定义了一个封装问答系统功能的类 提供问答功能的接口 8 | class SolverDemo: 9 | # 确保问答系统在使用前正确初始化 10 | def __init__(self, configFilePath): 11 | self.configFilePath = configFilePath 12 | init_kag_config(self.configFilePath) 13 | 14 | 15 | # 执行问答,使用 SolverPipeline 类的实例 resp 处理查询,并返回答案和跟踪日志 16 | def qa(self, query): 17 | resp = SolverPipeline(max_run=2) 18 | answer, trace_log = resp.run(query) 19 | # print(f"\n\nso the answer for '{query}' is: {answer}\n\n") 20 | return answer, trace_log 21 | 22 | 23 | 24 | if __name__ == "__main__": 25 | configFilePath = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../kag_config.cfg") 26 | 27 | demo = SolverDemo(configFilePath=configFilePath) 28 | query = "周杰伦出生日期?" 29 | 30 | answer, trace_log = demo.qa(query) 31 | 32 | print(f"answer:\n{answer}\n\ntraceLog:\n{trace_log}") 33 | -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/builder/data/jay.txt: -------------------------------------------------------------------------------- 1 | 周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。 2 | 2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2003年,成为美国《时代周刊》封面人物;同年,发行音乐专辑《叶惠美》,该专辑获得第15届台湾金曲奖最佳流行音乐演唱专辑奖。2004年,发行音乐专辑《七里香》,该专辑在亚洲的首月销量达到300万张;同年,获得世界音乐大奖中国区最畅销艺人奖。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。 3 | 2007年,自编自导爱情电影《不能说的秘密》,同年,成立杰威尔音乐有限公司。2007年,凭借歌曲《青花瓷》获得第19届台湾金曲奖最佳作曲人奖。2007年,入选美国CNN“25位亚洲最具影响力人物”;同年,凭借专辑《魔杰座》获得第20届台湾金曲奖最佳国语男歌手奖。2010年,入选美国《Fast Company》评出的“全球百大创意人物”。2011年,凭借专辑《跨时代》获得第22届台湾金曲奖最佳国语男歌手奖。2012年,登上福布斯中国名人榜榜首。2014年,发行个人首张数字音乐专辑《哎呦,不错哦》。2023年,凭借专辑《最伟大的作品》成为首位获得国际唱片业协会“全球畅销专辑榜”冠军的华语歌手。 -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/builder/indexer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from kag.common.registry import import_modules_from_path 4 | from kag.builder.runner import BuilderChainRunner 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | def buildKB(dir_path): 10 | from kag.common.conf import KAG_CONFIG 11 | 12 | runner = BuilderChainRunner.from_config( 13 | KAG_CONFIG.all_config["kag_builder_pipeline"] 14 | ) 15 | runner.invoke(dir_path) 16 | 17 | logger.info(f"\n\nbuildKB successfully for {dir_path}\n\n") 18 | 19 | 20 | if __name__ == "__main__": 21 | dir_path = os.path.dirname(os.path.abspath(__file__)) 22 | import_modules_from_path(dir_path) 23 | 24 | data_dir_path = os.path.join(dir_path, "data/jay.txt") 25 | buildKB(data_dir_path) -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/builder/prompt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/JayChouTest_KAG_V6/builder/prompt/__init__.py -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/builder/prompt/ner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2023 OpenSPG Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. 12 | 13 | import json 14 | from string import Template 15 | from typing import List 16 | from kag.common.conf import KAG_PROJECT_CONF 17 | from kag.interface import PromptABC 18 | from knext.schema.client import SchemaClient 19 | 20 | 21 | @PromptABC.register("jaychou_ner") 22 | class OpenIENERPrompt(PromptABC): 23 | 24 | template_zh = """ 25 | { 26 | "instruction": "你是命名实体识别的专家。请从输入中提取与模式定义匹配的实体。如果不存在该类型的实体,请返回一个空列表。请以JSON字符串格式回应。你可以参照example进行抽取。", 27 | "schema": $schema, 28 | "example": [ 29 | { 30 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 31 | "output": [ 32 | {"name": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 33 | {"name": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 34 | {"name": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 35 | {"name": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 36 | {"name": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 37 | {"name": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 38 | {"name": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 39 | {"name": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 40 | {"name": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 41 | {"name": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 42 | ] 43 | } 44 | ], 45 | "input": "$input" 46 | } 47 | """ 48 | 49 | 50 | template_en = template_zh 51 | 52 | def __init__(self, language: str = "", **kwargs): 53 | super().__init__(language, **kwargs) 54 | self.schema = SchemaClient( 55 | project_id=KAG_PROJECT_CONF.project_id 56 | ).extract_types() 57 | self.template = Template(self.template).safe_substitute( 58 | schema=json.dumps(self.schema) 59 | ) 60 | 61 | @property 62 | def template_variables(self) -> List[str]: 63 | return ["input"] 64 | 65 | def parse_response(self, response: str, **kwargs): 66 | rsp = response 67 | if isinstance(rsp, str): 68 | rsp = json.loads(rsp) 69 | if isinstance(rsp, dict) and "output" in rsp: 70 | rsp = rsp["output"] 71 | if isinstance(rsp, dict) and "named_entities" in rsp: 72 | entities = rsp["named_entities"] 73 | else: 74 | entities = rsp 75 | 76 | return entities 77 | -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/builder/prompt/std.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2023 OpenSPG Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. 12 | 13 | import json 14 | from typing import List 15 | 16 | from kag.interface import PromptABC 17 | 18 | 19 | @PromptABC.register("jaychou_std") 20 | class OpenIEEntitystandardizationdPrompt(PromptABC): 21 | template_zh = """ 22 | { 23 | "instruction": "input字段包含用户提供的上下文。命名实体字段包含从上下文中提取的命名实体,这些可能是含义不明的缩写、别名或俚语。为了消除歧义,请尝试根据上下文和您自己的知识提供这些实体的官方名称。请注意,具有相同含义的实体只能有一个官方名称。请按照提供的示例中的输出字段格式,以单个JSONArray字符串形式回复,无需任何解释。", 24 | "example": { 25 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 26 | "named_entities": [ 27 | {"name": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 28 | {"name": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 29 | {"name": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 30 | {"name": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 31 | {"name": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 32 | {"name": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 33 | {"name": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 34 | {"name": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 35 | {"name": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 36 | {"name": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 37 | ], 38 | "output": [ 39 | {"name": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。","official_name": "Jay Chou"}, 40 | {"name": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。","official_name": "音乐从业者"}, 41 | {"name": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。","official_name": "1979-01-18"}, 42 | {"name": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。","official_name": "台湾新北"}, 43 | {"name": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。","official_name": "福建永春"}, 44 | {"name": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。","official_name": "淡江高级中学"}, 45 | {"name": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。","official_name": "专辑Jay"}, 46 | {"name": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。","official_name": "头文字D"}, 47 | {"name": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。","official_name": "香港电影金像奖"}, 48 | {"name": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。","official_name": "台湾电影金马奖"}, 49 | ] 50 | }, 51 | "input": $input, 52 | "named_entities": $named_entities, 53 | } 54 | """ 55 | 56 | template_en = template_zh 57 | 58 | @property 59 | def template_variables(self) -> List[str]: 60 | return ["input", "named_entities"] 61 | 62 | def parse_response(self, response: str, **kwargs): 63 | rsp = response 64 | if isinstance(rsp, str): 65 | rsp = json.loads(rsp) 66 | if isinstance(rsp, dict) and "output" in rsp: 67 | rsp = rsp["output"] 68 | if isinstance(rsp, dict) and "named_entities" in rsp: 69 | standardized_entity = rsp["named_entities"] 70 | else: 71 | standardized_entity = rsp 72 | entities_with_offical_name = set() 73 | merged = [] 74 | entities = kwargs.get("named_entities", []) 75 | for entity in standardized_entity: 76 | merged.append(entity) 77 | entities_with_offical_name.add(entity["name"]) 78 | # in case llm ignores some entities 79 | for entity in entities: 80 | if entity["name"] not in entities_with_offical_name: 81 | entity["official_name"] = entity["name"] 82 | merged.append(entity) 83 | return merged 84 | -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/builder/prompt/triple.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2023 OpenSPG Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | # in compliance with the License. You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software distributed under the License 10 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | # or implied. 12 | 13 | import json 14 | from typing import List 15 | 16 | from kag.interface import PromptABC 17 | 18 | 19 | @PromptABC.register("jaychou_triple") 20 | class OpenIETriplePrompt(PromptABC): 21 | template_zh = """ 22 | { 23 | "instruction": "您是一位专门从事开放信息提取(OpenIE)的专家。请从input字段的文本中提取任何可能的关系(包括主语、谓语、宾语),并按照JSON格式列出它们,须遵循example字段的示例格式。请注意以下要求:1. 每个三元组应至少包含entity_list实体列表中的一个,但最好是两个命名实体。2. 明确地将代词解析为特定名称,以保持清晰度。", 24 | "entity_list": $entity_list, 25 | "input": "$input", 26 | "example": { 27 | "input": "周杰伦(Jay Chou),1979年1月18日出生于台湾省新北市,祖籍福建省永春县,华语流行乐男歌手、音乐人、演员、导演、编剧,毕业于淡江中学。\n2000年,发行个人首张音乐专辑《Jay》。2001年,凭借专辑《范特西》奠定其融合中西方音乐的风格。2002年,举行“The One”世界巡回演唱会。2005年,主演个人首部电影《头文字D》,并凭借该片获得第25届香港电影金像奖和第42届台湾电影金马奖的最佳新演员奖。2006年起,连续三年获得世界音乐大奖中国区最畅销艺人奖。", 28 | "entity_list": [ 29 | {"name": "周杰伦","category": "Person","description": "周杰伦(Jay Chou)是一位华语流行乐男歌手、音乐人、演员、导演和编剧。"}, 30 | {"name": "音乐人","category": "Roles","description": "周杰伦(Jay Chou)是一位音乐人。"}, 31 | {"name": "1979年1月18日","category": "Date","description": "周杰伦(Jay Chou)在1979年1月18日出生。"}, 32 | {"name": "台湾省新北市","category": "GeographicLocation","description": "周杰伦出生在台湾省新北市。"}, 33 | {"name": "福建省永春县","category": "GeographicLocation","description": "周杰伦的祖籍在福建省永春县。"}, 34 | {"name": "淡江中学","category": "Organization","description": "周杰伦毕业于淡江中学。"}, 35 | {"name": "专辑《Jay》","category": "Albums","description": "专辑《Jay》是周杰伦2000年发行个人首张音乐专辑。"}, 36 | {"name": "《头文字D》","category": "Works","description": "《头文字D》是周杰伦2005年主演个人首部电影。"}, 37 | {"name": "金像奖","category": "Awards","description": "2005年,凭借《头文字D》获得第25届香港电影金像奖。"}, 38 | {"name": "金马奖","category": "Awards","description": "2005年,凭借《头文字D》获得第42届台湾电影金马奖。"}, 39 | ], 40 | "output":[ 41 | ["周杰伦", "出生于", "1979年1月18日"], 42 | ["周杰伦", "毕业于", "淡江中学"], 43 | ["周杰伦", "出生在", "台湾省新北市"], 44 | ["周杰伦", "发行", "专辑《Jay》"], 45 | ["周杰伦", "主演", "《头文字D》"], 46 | ["周杰伦", "获得", "金像奖"], 47 | ] 48 | } 49 | } 50 | """ 51 | 52 | template_en = template_zh 53 | 54 | @property 55 | def template_variables(self) -> List[str]: 56 | return ["entity_list", "input"] 57 | 58 | def parse_response(self, response: str, **kwargs): 59 | rsp = response 60 | if isinstance(rsp, str): 61 | rsp = json.loads(rsp) 62 | if isinstance(rsp, dict) and "output" in rsp: 63 | rsp = rsp["output"] 64 | if isinstance(rsp, dict) and "triples" in rsp: 65 | triples = rsp["triples"] 66 | else: 67 | triples = rsp 68 | 69 | standardized_triples = [] 70 | for triple in triples: 71 | if isinstance(triple, list): 72 | standardized_triples.append(triple) 73 | elif isinstance(triple, dict): 74 | s = triple.get("subject") 75 | p = triple.get("predicate") 76 | o = triple.get("object") 77 | if s and p and o: 78 | standardized_triples.append([s, p, o]) 79 | 80 | return standardized_triples 81 | -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/schema/JayChouTest.schema: -------------------------------------------------------------------------------- 1 | namespace JayChouTest 2 | 3 | Chunk(文本块): EntityType 4 | properties: 5 | content(内容): Text 6 | index: TextAndVector 7 | 8 | Date(日期): EntityType 9 | properties: 10 | desc(描述): Text 11 | index: TextAndVector 12 | semanticType(语义类型): Text 13 | index: Text 14 | 15 | GeographicLocation(地理位置): EntityType 16 | properties: 17 | desc(描述): Text 18 | index: TextAndVector 19 | semanticType(语义类型): Text 20 | index: Text 21 | 22 | Organization(组织机构): EntityType 23 | properties: 24 | desc(描述): Text 25 | index: TextAndVector 26 | semanticType(语义类型): Text 27 | index: Text 28 | 29 | Person(人物): EntityType 30 | properties: 31 | desc(描述): Text 32 | index: TextAndVector 33 | semanticType(语义类型): Text 34 | index: Text 35 | 36 | Works(作品): EntityType 37 | properties: 38 | desc(描述): Text 39 | index: TextAndVector 40 | semanticType(语义类型): Text 41 | index: Text 42 | 43 | Albums(专辑): EntityType 44 | properties: 45 | desc(描述): Text 46 | index: TextAndVector 47 | semanticType(语义类型): Text 48 | index: Text 49 | 50 | Roles(角色): EntityType 51 | properties: 52 | desc(描述): Text 53 | index: TextAndVector 54 | semanticType(语义类型): Text 55 | index: Text 56 | 57 | Awards(获奖): EntityType 58 | properties: 59 | desc(描述): Text 60 | index: TextAndVector 61 | semanticType(语义类型): Text 62 | index: Text 63 | 64 | Others(其他): EntityType 65 | properties: 66 | desc(描述): Text 67 | index: TextAndVector 68 | semanticType(语义类型): Text 69 | index: Text -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/solver/prompt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/JayChouTest_KAG_V6/solver/prompt/__init__.py -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/solver/prompt/__pycache__/resp_generator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/JayChouTest_KAG_V6/solver/prompt/__pycache__/resp_generator.cpython-311.pyc -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/solver/prompt/resp_generator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from string import Template 3 | from typing import List 4 | import logging 5 | 6 | from kag.interface import PromptABC 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @PromptABC.register("resp_simple") 12 | class RespGenerator(PromptABC): 13 | template_zh = ( 14 | "基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'" 15 | ) 16 | template_en = ( 17 | "Answer the question based on the given reference." 18 | "\nOnly give me the answer and do not output any other words." 19 | "\nThe following are given reference:'$memory'\nQuestion: '$instruction'" 20 | ) 21 | 22 | @property 23 | def template_variables(self) -> List[str]: 24 | return ["memory", "instruction"] 25 | 26 | def parse_response(self, response: str, **kwargs): 27 | logger.debug("推理器判别:{}".format(response)) 28 | return response 29 | -------------------------------------------------------------------------------- /KagV6Test/JayChouTest_KAG_V6/solver/query.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import time 5 | from concurrent.futures import ThreadPoolExecutor, as_completed 6 | 7 | from tqdm import tqdm 8 | 9 | from kag.common.benchmarks.evaluate import Evaluate 10 | from kag.solver.logic.solver_pipeline import SolverPipeline 11 | from kag.common.conf import KAG_CONFIG 12 | from kag.common.registry import import_modules_from_path 13 | 14 | from kag.common.checkpointer import CheckpointerManager 15 | 16 | 17 | def qa(query): 18 | resp = SolverPipeline.from_config(KAG_CONFIG.all_config["kag_solver_pipeline"]) 19 | answer, traceLog = resp.run(query) 20 | 21 | print(f"\n\nso the answer for '{query}' is: {answer}\n\n") # 22 | print(traceLog) 23 | return answer, traceLog 24 | 25 | 26 | if __name__ == "__main__": 27 | import_modules_from_path("./prompt") 28 | queries = [ 29 | "周杰伦哪一年出生?", 30 | ] 31 | for q in queries: 32 | qa(q) 33 | -------------------------------------------------------------------------------- /KagV6Test/README.md: -------------------------------------------------------------------------------- 1 | # 1、介绍 2 | ## 1.1 主要内容 3 | **KAG新版本V0.6功能测试** 4 | 主要内容:测试版本升级后的产品模式、开发者模式使用 5 | 6 | ## 1.2 KAG框架 7 | **(1)KAG是什么** 8 | KAG是OpenSPG发布v0.5版本中推出的知识增强生成(KAG)的专业领域知识服务框架,旨在充分利用知识图谱和向量检索的优势,增强大型语言模型和知识图谱,以解决 RAG 挑战 9 | OpenSPG是蚂蚁集团结合多年金融领域多元场景知识图谱构建与应用业务经验的总结,并与OpenKG联合推出的基于SPG(Semantic-enhanced Programmable Graph)框架研发的知识图谱引擎 10 | 检索增强生成(RAG)技术推动了领域应用与大模型结合。然而,RAG 存在着向量相似度与知识推理相关性差距大、对知识逻辑(如数值、时间关系、专家规则等)不敏感等问题,这些缺陷阻碍了专业知识服务的落地 11 | 官方网址:https://openspg.yuque.com/r/organizations/homepage 12 | Github地址:https://github.com/OpenSPG/KAG 13 | **(2)KAGV6.0版本更新** 14 | https://github.com/OpenSPG/KAG/releases/tag/v0.6 15 | 16 | # 2、前期准备工作 17 | ## 2.1 开发环境搭建:anaconda、pycharm 18 | anaconda:提供python虚拟环境,官网下载对应系统版本的安装包安装即可 19 | pycharm:提供集成开发环境,官网下载社区版本安装包安装即可 20 | **可参考如下视频:** 21 | 集成开发环境搭建Anaconda+PyCharm 22 | https://www.bilibili.com/video/BV1q9HxeEEtT/?vd_source=30acb5331e4f5739ebbad50f7cc6b949 23 | https://youtu.be/myVgyitFzrA 24 | 25 | ## 2.2 大模型相关配置 26 | (1)GPT大模型使用方案(第三方代理方式) 27 | (2)非GPT大模型(阿里通义千问、讯飞星火、智谱等大模型)使用方案(OneAPI方式) 28 | (3)本地开源大模型使用方案(Ollama方式) 29 | **可参考如下视频:** 30 | 提供一种LLM集成解决方案,一份代码支持快速同时支持gpt大模型、国产大模型(通义千问、文心一言、百度千帆、讯飞星火等)、本地开源大模型(Ollama) 31 | https://www.bilibili.com/video/BV12PCmYZEDt/?vd_source=30acb5331e4f5739ebbad50f7cc6b949 32 | https://youtu.be/CgZsdK43tcY 33 | 34 | 35 | # 3、项目初始化 36 | ## 3.1 下载源码 37 | GitHub或Gitee中下载工程文件到本地,下载地址如下: 38 | https://github.com/NanGePlus/KagTest 39 | https://gitee.com/NanGePlus/KagTest 40 | 41 | ## 3.2 构建项目 42 | 使用pycharm构建一个项目,为项目配置虚拟python环境 43 | 项目名称:KagV6Test 44 | 虚拟环境名称保持与项目名称一致 45 | 46 | ## 3.3 将相关代码拷贝到项目工程中 47 | 将下载的代码文件夹中的文件全部拷贝到新建的项目根目录下 48 | 49 | 50 | # 4、功能测试 51 | ## 4.1 OpenSPG-Server部署 52 | 首先,下载官方提供的最新版本的OpenSPG-Server的docker-compose.yml文件 53 | 链接:https://github.com/OpenSPG/openspg/blob/master/dev/release/docker-compose.yml 54 | 然后,进入到配置文件所在目录使用docker部署和启动OpenSPG-Server,运行的指令为: 55 | docker compose -f docker-compose.yml up -d 56 | 启动成功后,对应的服务查看方式如下: 57 | **neo4j:** 浏览器输入 http://127.0.0.1:7474/browser/ , 访问neo4j图数据库,默认用户名和密码:neo4j neo4j@openspg 58 | **Minio:** 浏览器输入 http://127.0.0.1:9000 , 访问Minio存储,默认用户名和密码:minio minio@openspg 59 | **mysql:** 打开mysql客户端软件,远程访问数据库,默认用户名和密码:root openspg 60 | 对于docker的使用,这里不做详细的赘述了,大家可以去看我这期视频,里面有对于docker非常详细的讲解,从安装部署到使用 61 | https://www.bilibili.com/video/BV1LhUAYFEku/?vd_source=30acb5331e4f5739ebbad50f7cc6b949 62 | https://youtu.be/hD09V7jaXSo 63 | ## 4.2 产品模式测试 64 | ### (1) 访问WEB端 65 | 浏览器输入 http://127.0.0.1:8887, 可访问openspg-kag产品模式的WEB端,默认用户名和密码:openspg openspg@kag 66 | 首次登录会要求修改密码 67 | ### (2) 全局配置 68 | **图存储配置参数:** 69 | database:JayChou 自定义 70 | password:neo4j@openspg 71 | uri:neo4j://release-openspg-neo4j:7687 72 | user:neo4j 73 | **向量配置参数:** 74 | type:openai 75 | model:text-embedding-3-small 76 | base_url:https://yunwu.ai/v1 77 | api_key:sk-MqUugKDFN7cgWzmX0XM1reUb6I3rm5WgA2LdHl6WhDqlz2fp 78 | **提示词中英文配置参数:** 79 | biz_scene:default 80 | language:zh 81 | **模型配置参数-maas:** 82 | model:gpt-4o-mini 83 | api_key:sk-MqUugKDFN7cgWzmX0XM1reUb6I3rm5WgA2LdHl6WhDqlz2fp 84 | base_url:https://yunwu.ai/v1 85 | ### (3) 按照如下流程测试 86 | 创建知识库-编辑知识模型-创建任务-构建知识库、查看日志、抽取结果知识图谱-知识探查-知识库配置权限配置-推理问答 87 | ### (4) HTTP API接口测试 88 | 使用Apifox工具进行接口验证测试,并将提供的接口文档KagTest.apifox.json导入到Apifox 89 | 90 | ## 4.3 开发者模式测试 91 | ### (1) 安装依赖 92 | 下载KAG源码 https://github.com/OpenSPG/KAG 解压后将源码工程拷贝到项目根目录,截止2025-01-10,最新版本是v0.6.0 93 | 新建命令行终端,按照如下指令进行依赖安装 94 | cd KAG 95 | pip install -e . 96 | 安装完成之后可以运行如下指令验证是否安装成功 97 | knext --version 98 | ### (2)调整配置文件 99 | 将根目录下的other/config目录下的example_config.yaml文件拷贝一份到根目录,根据自己的业务修改配置参数 100 | KAG支持txt、pdf、markdown、docx、json、csv、语雀等,根据自己要处理的文本类型进行相关设置 101 | ### (3)使用配置文件初始化项目 102 | 新建命令行终端,运行如下命令进行项目创建和初始化 103 | knext project create --config_path ./example_config.yaml 104 | 若项目创建完成,修改了配置文件,需要运行如下命令进行更新 105 | knext project update --proj_path . 106 | ### (3)提交schema 107 | 项目初始化完成后,进入到对应的项目文件夹下,根据实际业务需求调整schema,调整完成后再执行提交schema 108 | knext schema commit 109 | ### (4)构建索引 110 | 首先将文档拷贝到新建项目文件夹中的builder/data下,支持txt、pdf、markdown、docx、json、csv等 111 | 并可以根据自身业务需求,在builder/prompt目录下新增:ner.py、std.py、triple.py 112 | **注意:** 代码中是通过注解的方式配置到配置文件中 113 | 打开命令行终端,进入脚本所在目录cd builder,运行 python indexer.py 命令 114 | 构建脚本启动后,会在当前工作目录下生成任务的 checkpoint 目录,记录了构建链路的 checkpoint 和统计信息 115 | KAG 框架基于 checkpoint 文件提供了断点续跑的功能。如果由于程序出错或其他外部原因(如 LLM 余额不足)导致任务中断,可以重新执行 indexer.py,KAG 会自动加载 checkpoint 文件并复用已有结果 116 | 索引构建成功后,可登录到 http://127.0.0.1:8887/或 http://127.0.0.1:7474/browser/ 查看知识图谱 117 | ### (5)检索 118 | 打开命令行终端,进入脚本所在目录solver,运行 python query.py 命令 119 | 根据自身业务需求,可设置相关prompt内容:如resp_generator.py 120 | 也可以在产品端进行测试 http://127.0.0.1:8887/ 121 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file4.md: -------------------------------------------------------------------------------- 1 | # 第4回 五行山从师取真经 2 | 3 |   五百年以后,观音菩萨奉了如来佛的法旨,带着锦袈裟等五件宝贝,跟惠岸行者一块儿来到东土大唐,寻找去西天求取三藏真经的人。师徒二人在半空中驾着云,来到大唐京城长安的上空。这时已是贞观十三年。 4 |   这一天,正是唐太宗李世民命令高僧陈玄奘在化生寺设坛宣讲佛法的日子。陈玄奘是如来佛二弟子金蝉子转世的,观音暗中选定他为取经人,自己与惠岸行者变成了游方和尚,捧着袈裟等宝贝到皇宫门外,要求拜见唐太宗,给他献宝。 5 |   唐太宗一向喜欢佛经,立即叫他们上殿,问那些宝贝一共要多少钱。观音说∶“如来佛祖那儿有三藏真经,你如果派陈玄奘去西天求取真经,这些宝贝就送给你了。”说完,跟惠岸行者变成原来的样子,驾起云走了。太宗一见是观音菩萨,连忙带领满朝文武官员向天朝拜。 6 |   唐太宗十分高兴,和陈玄奘结成了兄弟,要他去西天取经,将护身袈裟等宝物送给了他,并将他的名字改为“唐三藏”。过了几天,三藏要出发了,唐太宗便率领文武百官一路送到长安城外,和三藏依依惜别。 7 |   唐三藏别名唐僧。他和两个仆人赶了两天路,来到法门寺,寺里的和尚赶忙出来迎接。晚上,和尚们坐在一起议论去西天取经的路途艰险,唐僧用手指着心口说∶“只要有坚定的信念,那么任何危险都算不了什么!”和尚们连声称赞。 8 |   第二天,唐僧主仆含泪辞别了和尚,又骑着马继续向西走去。不几天,就来到了大唐的边界河州,镇边总兵和本地的和尚道士把唐僧主仆接到福原寺休息。 9 |   第二天天还没亮,唐僧就把两个仆人叫了起来,三人借着月光赶路。走了十几里就开始上山了,道路起伏不平,杂草丛生,十分难走。他们只好一边拔草一边走。忽然一脚踏空,三人和马还一起摔进了深坑。主仆三人正在惊慌之时,忽然听见“抓起来!抓起来!”的叫喊声。 10 |   随着一阵狂风,出现了一群妖怪,抓住了主仆三人。唐僧偷偷看了看,上面坐着一个长相凶恶的魔王,那魔王一声令下,妖怪们把唐僧主仆绑了起来。这时一个小妖来报∶“熊山君和特处士到!” 11 |   魔王赶忙出去迎接,那两人称魔王为寅将军。寅将军打算用唐僧等人招待他的客人。熊山君说∶“今天,就选吃两个算了。”于是,寅将军把唐僧的两个仆人剖腹挖心,活活地吃掉了。唐僧差点被吓昏过去。 12 |   天快亮了,妖怪们都躲了起来。唐僧吓傻了,昏昏沉沉地睡着。忽然一个柱拐杖的老人慢慢向他走来,把手一挥,捆绑唐僧的绳子都断了,又向他吹一口气,唐僧醒了过来,连忙躬身施礼感谢老人,老人说∶“这个地方叫双叉岭,是个危险的地方。” 13 |   老人让唐僧拿上包袱,牵着马,把他领到大路上来。唐僧连忙拴好马,准备感谢,抬头一看,老人已乘着一只红顶白鹤飞走了,从空中掉下一张纸条,唐僧接过一看,才知老人就是太白金星,于是赶忙向空中不停地施礼。 14 |   唐僧骑着马,沿着山路往前走,走了半天,也不见一个人。他又渴又饿,想找点水喝。忽然看见前面有两只凶恶的老虎,张开了血盆大嘴,又往四周看看,发现身后是吐着红信的毒蛇,左边是有毒的虫子,右边又是些从未见过的野兽。唐僧被困在中间,急得不知如何是好,只好听天由命了。 15 |   就在这危急关头,野兽忽然都逃跑了。唐僧惊奇地四处观看,只见一个手拿钢叉,腰挂弓箭的大汉从山坡上走了过来。唐僧连忙跪下,合掌高叫∶“大王救命!”那大汉挽起唐僧说∶“我哪里是什么大王,只不过是一个猎户,叫刘伯钦。” 16 |   刘伯钦请唐僧到家中作客,唐僧非常高兴,牵着马,来到了刘伯钦的家。第二天,唐僧要上路了,刘伯钦按照母亲的意思,带了几个人,拿着捕猎的工具,要送一送唐僧。走了半天,他们来到一座大山前。 17 |   他们走到半山腰,刘伯钦等人站住说∶“长老,前面就要到两界山了,山东边归大唐管,山西边是鞑靼的疆域,我们是不能过去的,您自己走吧,一路上可要多多小心啊!”唐僧只好和他们道别,忽听山脚下有人大喊∶“师父快过来,师父快过来!” 18 |   唐僧吓得胆战心惊。刘伯钦赶忙说∶“长老莫怕,听老人说,当年王莽造反的时候,这座山从天而降,山下还压着一个饿不死,冻不坏的神猴,刚才肯定是那个神猴在叫喊,长老不妨过去看看。” 19 |   这神猴正是当年被如来压在山下的孙悟空,他一见唐僧就喊道∶“师父快救我出去,我保护你到西天取经。几天前观音菩萨来劝过我,让我给您当徒弟。”唐僧听了非常高兴,可是又很发愁,没有办法把孙悟空救出来。 20 |   孙悟空说只要把山顶上如来佛的金字压帖拿掉就行了。唐僧拿掉了金字压帖后,按照悟空的要求和刘伯钦等人退到十里之外的地方等着。忽然一声天崩地裂般的巨响,五行山裂成两半,顿时飞沙走石,满天灰尘,让人睁不开眼睛。 21 |   等到唐僧睁开眼睛时,悟空已经跪在地上,给他叩头。唐僧见他赤身裸体,就从包袱里拿出一双鞋和一条裤子让他穿上。刘伯钦见唐僧收了徒弟,非常高兴,告别了唐僧师徒回家去了。悟空立刻收拾行李,和师父一道出发。 22 |   没过多久,师徒二人出了大唐边界。忽然从草丛中跳出一只大老虎。孙悟空赶忙放下行李,从耳朵中取出金箍棒,高兴地说∶“老孙已经五百多年没有用过这宝贝了,今天用它弄件衣服穿穿!”说完抡起金箍棒对着老虎狠命一击,老虎当场就死了。 23 |   唐僧见了,惊得连嘴都合不住。悟空拔了根毫毛,变成一把尖刀,剥了虎皮,做了条皮裙围在腰间,然后,恭恭敬敬地扶唐僧上马,师徒继续赶路。忽然一声口哨声,跳出六个强盗,要抢他们的马和行李。 24 |   悟空放下行李,笑着说∶“我原来也是做山大王的,把你们抢的金银珠宝分我一半吧!”强盗们一听,气得头发都竖了起来,拿着刀枪就往悟空头上砍,可是乒乒乓乓砍了七、八十下,也没伤着悟空半根毫毛。 25 |   悟空见他们打累了,高喊一声∶“该俺老孙玩玩了!”他取出金箍棒一个个打,六个强盗就变成了肉酱。唐僧见了很不高兴地说∶“他们虽然是强盗,但也不至于都要打死,你这样残忍,怎能去西天取经呢?阿弥陀佛。” 26 |   孙悟空最受不了别人的气,他听师父这样一说,压不住心中的怒火,高声说到∶“既然师父这样说,那我就不去西天取经了,你自己去吧!老孙我可要回花果山了!”说完纵身一跳,驾上筋斗云,往东飞去了,等到唐僧抬起头,已经看不见孙悟空了。 27 |   唐僧没有办法,只好把行李放在马背上,一手拄着锡杖,一手牵着马,慢慢地往西走去,不久,就见对面来了位老妇人,手里捧着一件衣服和一顶花帽。唐僧赶忙牵住马,双手合掌,让路给老妇人过。 28 |   那老妇人走到唐僧跟前说道∶“你从哪里来呀,怎么一个人在山中走呢?”唐僧就把悟空不听话的事告诉了老妇人,老妇人听后微微一笑,说∶“我送你一件衣服和一顶花帽,给你那不听话的徒弟穿上吧!” 29 |   唐僧苦笑着说∶“唉,徒弟已经走了!要这些还有什么用呢?”老妇人笑着说∶“别急,徒弟我会帮你找回来的。我这儿呀,还有一篇咒语,叫做紧箍咒,你要牢牢记在心里,你让你的徒弟穿上这衣服,戴上帽子,他如果再不听话,你就念咒,他就不敢不听了!” 30 |   唐僧学会了紧箍咒,低头拜谢老妇人。这时老妇人已经变成一道金光,向东飞去。唐僧抬头一看,原来是观音菩萨,赶忙跪下叩头,然后把衣帽收到包袱里,坐在路边,加紧背诵紧箍咒,直到背得滚瓜烂熟。 31 |   观音菩萨驾着祥云,没走多远,碰上了从东边走过来的孙悟空。原来悟空离开唐僧之后,在东海龙王那儿吃了顿饭,在龙王的苦苦劝告之下,已回心转意。观音菩萨让他赶快回到唐僧身边,悟空二话不说,告别观音菩萨去追赶唐僧了。 32 |   见到唐僧,悟空把去龙王那儿吃饭的事情说了一遍,又问∶“师父,你也饿了吧!我去化些斋饭来。”唐僧摇摇头说∶“不用了,包袱里还有些干粮,你给师父拿来吧!”悟空打开包袱,发现观音菩萨给的衣帽十分漂亮,便向唐僧讨取。 33 |   唐僧点头答应了。悟空高兴得抓耳挠腮,忙穿上了衣服,戴上了帽子。 34 |   唐僧要试试紧箍咒灵不灵,就小声念了起来,悟空马上痛得满地打滚,拼命去扯那帽子,可帽子却像长在肉里一样,取也取不下来,扯也扯不烂。 35 |   悟空发现头痛是因为师父在念咒,嘴里喊着“师父别念了!别念了!” 36 |   暗地里取出金箍棒,想把唐僧一棒打死。唐僧见了,紧箍咒越念越快,悟空的头越来越疼,没有办法,只好跪地求饶∶“师父,是我错了,徒儿知道错了,不要再念咒了吧!” 37 |   唐僧见他已经知错,就住了口。悟空的头马上就不痛了,他想这咒语一定是观音菩萨教的,就吵着要去南海找观音菩萨算帐。唐僧说∶“她既然能教我这紧箍咒,肯定也会念咒!”悟空猛吸了一口气,不再胡来,发誓以后一定听师父的话,保护唐僧西天取经。 38 | 39 | 40 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file5.md: -------------------------------------------------------------------------------- 1 | # 第5回 应愁涧白龙马收缰 2 | 3 |   师徒俩继续向西行。一天,他们来到蛇盘山鹰愁涧,突然从涧中钻出一条白龙来,张着爪子向唐僧冲了过来,悟空慌忙背起唐僧,驾云就跑。那龙追不上悟空,就张开大嘴把白马给吞吃了,然后又钻进深涧了。 4 |   悟空把师父安顿在一个安全地方。转身回到涧边去牵马拿行李,发现马不见了,想着一定是被白龙吃了,就在涧边破口大骂∶“烂泥鳅,把我的马吐出来!”白龙听见有人骂他,气得眼睛都红了,跳出水面,张牙舞爪地向悟空扑来。 5 |   那龙根本不是悟空的对手,几个回合就累得浑身是汗,转身就逃到水里。悟空又骂了一阵,不见白龙出来,便使了个翻江倒海的本领,把这个清澈的涧水弄得泥沙翻滚,浑浊不清。 6 |   那龙在水里待不住了,就硬着头皮跳出来,和悟空打了起来,双方战了几十个回合,白龙实在打不过,摇身变成一条水蛇,钻进了草丛。悟空赶忙追过去,可是连蛇的影子都找不到,气得他把牙咬得乱响。 7 |   于是,悟空念咒语,把山神和土地都叫了出来,问他们白龙从哪里来的。山神和土地小心翼翼地说∶“这白龙是观音菩萨放在这儿等候你们,和你们一起取经的。”悟空一听,气得要找观音菩萨讲道理。 8 |   观音菩萨料事如神,驾云来到鹰愁涧,告诉悟空∶“这白龙原是西海龙王的儿子,犯了死罪,是我讲了个人情,让他给唐僧当马骑的。如果没这匹龙马,你们就去不了西天。”悟空急着说∶“他藏在水里不出来,怎么办? 9 |   ” 10 |   观音菩萨面带微笑,朝涧中喊了一声,那白龙立刻变成一个英俊的公子,来到菩萨跟前。菩萨说∶“小白龙,你师父已经来了!”边说边解下白龙脖上的夜明珠,用柳条蘸些甘露向他身上一挥,吹了口仙气,喊声“变”,白龙就变成了一匹白马。 11 |   观音菩萨叫悟空牵着白马去见唐僧,自己回南海落伽山去了。悟空牵着马,兴高采烈地来到唐僧跟前。唐僧一边用手摸着马头,一边说∶“好马,好马,你是在哪儿找的马?”悟空把经过说了一遍,唐僧连忙向南磕头,感谢观音菩萨。 12 | 13 | 14 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file6.md: -------------------------------------------------------------------------------- 1 | # 第6回 观音院斗宝失袈裟 2 | 3 |   唐僧骑上白龙马,走起路来就轻松了许多。一天傍晚,师徒二人来到山谷里的一座观音院。门口的和尚一听是大唐来的高僧,要到西天去取经,连忙施礼,恭恭敬敬地请他们进院子休息。 4 |   唐僧师徒刚刚坐好,两名小和尚搀扶着一个驼背的和尚,慢慢地走了进来。唐僧连忙起身,双手合掌,施礼相迎。老和尚一边还礼,一边叫人端茶来。不一会儿,两个小童端着精美的茶具进来了。 5 |   唐僧喝了一口茶,夸了几句这茶具。老和尚很高兴,然后卖弄地讲起了茶经,接着又问唐僧有什么东土大唐带来的宝贝,拿出来看一看。悟空见老和尚这般卖弄,心中早有一百个不服气了,不等师父说话,便抢着说∶“师父,把你的袈裟让他们见识见识!” 6 |   老和尚一听袈裟,更是得意,大笑起来,让人拿出十二只箱子,将里面的袈裟全部抖了出来,竟有上百件,而且每一件都很漂亮。悟空见了,也不言语,拿出了唐僧的袈裟抖开,顿时满屋金光四射,让人睁不开眼睛。 7 |   老和尚看呆了,一条毒计爬上心头,找了个借口,请求唐僧把袈裟借给他仔细看上一晚,明早奉还。唐僧还未开口,悟空抢先说∶“就借给他看一晚吧!不会有事的!”唐僧想要阻止已经来不及了,只好很不情愿地把袈裟借给老和尚。 8 |   晚上,老和尚偷偷让小和尚搬来许多木柴,想把唐僧师徒烧死。悟空听到院子里很吵,觉得奇怪,害怕师父被惊醒,就变成一个小蜜蜂,飞到院中,看到眼前的情景,觉得很可笑,眼珠一转,想出了一条妙计。 9 |   悟空驾起筋斗云,来到南天门,守门的天兵天将见是大闹天宫的齐天大圣来了,吓得乱成一团。悟空高叫∶“别怕!别怕!我不是来打架的,是来找广目天王借避火罩,去救师父的!”广目天王只好将宝贝借给悟空。 10 |   悟空拿着避火罩回到观音院,把师父的禅房罩住,然后悠闲地坐在屋顶,看和尚们放火。刹那间,大火熊熊燃烧。悟空想,这些和尚也太狠了,就吹了一口气,立刻刮起一阵狂风,火借风势,整个观音院顿时变成了一片火海。 11 |   这场大火引来了一个妖怪。原来这座观音院的南面有座黑风山,山中黑风洞里住着一个黑风怪。他远远地看见寺庙起火,就想着趁火打劫偷点东西,于是驾云飘进方丈房中,看见桌上的包袱放出金光,打开一看,竟是件价值连城的袈裟。 12 |   黑风怪偷了那件袈裟,驾云回到洞中。悟空只管坐在屋顶吹火,却没注意到黑风怪。天快亮时,悟空见火已经快灭了,才收起避火罩,还给了广目天王。回到禅房,见师父还在熟睡,就轻轻地叫醒了师父。 13 |   唐僧打开房门,见院中四处都是乌黑烧焦的木头,好端端的观音院已经不存在了,感到非常吃惊,悟空就把昨晚发生的事说了一遍。唐僧心中想着袈裟,就和悟空一块去找,寺里的和尚看见他们,还以为是冤魂来了,吓得连连跪地求饶。 14 |   那驼背老和尚看见寺院被烧,又不见了袈裟,正生气,又听唐僧没有烧死,来取袈裟了,吓得不知怎么办才好。最后一狠心,一头往墙上撞去,顿时血流如注,当场就死了。唐僧知道后,埋怨悟空说“唉!徒儿,你何苦要和别人斗气比阔呢?现在可怎么办呀!” 15 |   悟空手拿金箍棒,追问那些和尚袈裟在哪里,和尚都说不知道。悟空想了又想问道∶“这附近可有妖怪?”和尚都说黑风山上有个黑风怪。悟空板着脸说∶“好好侍候我师父,如有不周,小心脑袋!”说着一棒打断了一堵墙。 16 |   悟空一个筋斗来到黑风山,按落云头,往林中走去。忽听坡前有人在说笑,悟空侧身躲在岩石后面,偷偷望去,见地上坐着三个妖魔,为首的一个黑脸大汉说∶“昨天晚上有缘得到了一件佛衣,特地请二位来,开个佛衣盛会!” 17 |   悟空听得一清二楚,一边骂着∶“偷东西的坏家伙!”一边跳上前去,“呼”的就是一捧。黑脸大汉就是黑风怪,变成一股风逃走了;还有个道士也跑了,只有那个白衣秀才没来得及逃走,被悟空一棒打死,现出原形,原来是条大白花蛇。 18 |   悟空紧跟那股风来到一座山峰上,远远地看见对面山崖上有一座洞府,门前有一石碑,上面写着∶“黑风山黑风洞”几个大字。悟空来到洞前,用棒子敲着门,高声叫到∶“坏家伙,还我袈裟来!”小妖怪看到悟空气势汹汹,连忙跑进去报告黑风怪。 19 |   黑风怪刚才在山坡逃走是因为没带武器,现在是在他的地盘上,他可不怕。他穿上乌金甲,提着黑缨枪,出洞和悟空打了起来。打到中午,黑风怪说要吃饭,饭后再打。悟空也不说话,只是打,黑风怪只得再变成一股清风逃回洞中。 20 |   不管悟空在洞外骂得有多难听,黑风怪就是不出来。悟空急得没有办法,只得先回观音院去看师父了。回到院中,随便吃了些东西,又驾云来到黑风山,看见一个小妖拿着一个装请柬的木匣急急忙忙向前走,就一棒把它打死了。 21 |   悟空打开木匣一看,里面装的竟是黑风怪邀请观音院那老和尚的请柬,这才明白,老和尚早就和妖怪有来往,悟空眼珠一转,心生一条妙计,马上变成了老和尚的模样,摇摇摆摆地走到洞口,小妖一见是熟人,连忙开门相迎。 22 |   黑风怪没有看出什么破绽,扶着老和尚走进中厅,还没说几句话,在外面巡逻的小妖进来报告说送信的小妖已经被打死了。黑风怪立刻就明白了是怎么回事,拿出枪来狠狠刺向悟空,悟空侧身躲开,嘿嘿笑了几声,露出了本来面目,和妖怪打了起来。 23 |   两人你一枪,我一棒,打得难分难解,一直到太阳落山。那妖怪说∶“现在天快要黑了,明天再和你打!”悟空知道这家伙又要逃跑,哪肯放过,当头一棒打去,那妖怪化作一股清风,溜回洞中去了。 24 |   悟空没有办法,只好回到观音院。唐僧看到袈裟还没有夺回来,心中非常着急。晚上怎么也睡不着。第二天天刚亮,悟空对唐僧说∶“师父请放心,老孙今天要是夺不回袈裟,就不回来见你!”原来他已决定找观音菩萨想办法。 25 |   悟空驾云来到南海落伽山,见到观音菩萨,上前深深鞠了一躬,说明来意。观音菩萨听后叹了口气说∶“你这猴子,不该当众卖弄宝衣,更不该放火烧了寺院弄成现在这个样子。”说完,嘱咐了童子几句,和悟空驾着云,飞往黑风山。 26 |   他们很快来到黑风山,远远看见那天在山坡前的道士端着玉盘走了过来。悟空上前一棒打死了道士,现出了原形,原来是只大灰狼。悟空捡起盘子,看见里面有两粒仙丹,原来他是去参加佛衣盛会的。 27 |   悟空灵机一动,想出一条妙计,他让观音菩萨变成那道士,自己则变成一颗仙丹,只不过比原来的大一些。观音菩萨把他放在盘中,向洞中走去,按悟空说的计策,要让黑风怪吃下那颗仙丹。 28 |   观音菩萨来到洞中,把仙丹送到黑风怪手中,说∶“小道献上一颗仙丹,祝大王健康长寿!”黑风怪十分高兴,接过仙丹刚送到嘴边,没想到仙丹自动滑了下去。 29 |   悟空一到黑风怪的肚子里,就恢复了原形,在里面打起了猴拳,黑风怪痛得在地上直打滚。观音菩萨也恢复了原形,命令他交出佛衣,黑风怪痛得受不了了,让小妖拿来袈裟。观音菩萨接过佛衣,拿出一个小金圈儿,套在黑风怪头上。 30 |   观音这才让悟空出来。悟空刚从黑风怪的鼻孔里跳出来,黑风怪就摆出一副凶相,拿着黑缨枪向观音刺去。观音浮在空中,念动咒语,黑风怪马上头痛了起来,只好跪在地上,求观音饶命,并说自己愿意出家。 31 |   观音菩萨把佛衣交给悟空,带着黑风怪回南海去了。悟空见黑风洞中的小妖早已逃离,就放了一把火把洞烧了,然后驾云赶回观音院。唐僧和寺里的和尚们看见悟空取回了袈裟,都很高兴。第二天,唐僧师徒离开了观音院,又向西出发 32 | 33 | 34 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file7.md: -------------------------------------------------------------------------------- 1 | # 第7回 高老庄唐僧收八戒 2 | 3 |   这一天天快黑了,他们来到一个叫做高老庄的村子。碰巧,庄主高太公正在到处寻找能捉妖怪的法师。悟空一听非常高兴地说∶“不用找了,我就是专门捉妖怪的。” 4 |   原来,高太公有三个女儿,前两个女儿已经出嫁,到了三女儿,就想找个上门女婿来支撑门户。三年前来了个又黑又壮的青年,自称是福陵山人,姓猪,想到高家当女婿。三女儿对他还算满意,高太公就让他们成了家。 5 |   开始这个女婿很勤快,耕田下地,收割粮食,样样都行。没想到过了一阵,他突然变成一个猪头猪脑的妖怪,一顿饭要吃三五斗米,来去都腾云驾雾。这半年来,竟然把三女儿锁在后院,不让人进去。 6 |   悟空听了高太公的话,拍拍胸脯说∶“这个妖怪我捉定了,今天晚上就让他写退婚书,永远不再碰你女儿。”高太公问他要几个帮手,悟空说∶“一个也不要,只要把我师父照顾好就行了。”高太公连忙照办。 7 |   安顿好了师父,悟空让高太公带路来到后院。他打掉铁锁,走进院中一间黑洞洞的屋子。高太公和女儿见面,忍不住抱在一起痛哭起来。三女儿告诉他们∶“那妖怪知道我爹要请法师捉拿他,每天天一亮就走,晚上才回来。” 8 |   悟空让高太公父女离开,自己变成三女儿的模样。没过多久,院外一阵狂风刮来,那妖怪出现在半空中。悟空连忙向床上一靠,装出有病的样子,那妖怪摸进房中,口中喊着∶“姐姐,姐姐,你在哪儿呀?” 9 |   悟空故意叹口气说∶“我听爹今天在外面骂你,还说请了法师来抓你! 10 |   ”那妖怪说∶“不怕,不怕,咱们上床睡吧!”悟空说∶“我爹请的可是那五百年前大闹天宫的齐天大圣,你不害怕吗?”那妖怪倒吸了口凉气说∶“咱们做不成夫妻了。” 11 |   猪精打开门就往外跑,悟空从后面一把扯住他的后领子,把脸一抹,现出原形大叫道∶“泼怪,你看我是谁?”那妖怪一见是悟空,吓得手脚发麻,“呼”地一下化成一阵狂风跑了。 12 |   悟空跟着这股妖风一路追到高山上,只见那股妖风钻进了一个洞里。悟空刚落下云头,那妖怪已现形从洞中出来了,手里拿着一柄九齿钉耙骂道∶“你这个弼马温!当年大闹天宫,不知连累了我们多少人。今天又来欺负我,让你尝尝我的厉害,看耙!” 13 |   悟空举起棒架住了钉耙,问∶“怎么,你认识俺老孙!”那妖怪说出了自己的来历∶原来他是天上的天蓬元师,在王母娘娘的蟠桃会上喝得酩酊大醉,闯进了广寒宫,见嫦娥长得十分美丽,就上去调戏嫦娥。 14 |   玉皇大帝知道这件事后,要根据天条将他处死。多亏太白金星求情,才保住了性命,但要重打二千铜锤,并打入凡间投胎。没想到他急于投胎转世,竟错投了猪胎,落得如此模样。这时他和悟空打了一会儿,就觉得抵挡不住,拔腿就往洞中逃。 15 |   悟空站在洞口骂,那妖怪也不出来。悟空一见,气得乱蹦乱跳,拿起金箍棒打碎了洞门,那妖怪听见洞门被打碎的声音,只好跳出来骂道∶“我在高老庄招亲,跟你有什么关系,你欺人太甚,我这把钉耙绝不饶你!” 16 |   悟空想跟他玩玩,就站着不动,不管那妖怪怎么打,悟空的头皮连红都不红。那妖怪使劲一打,溅得火星乱飞,这下可把他吓坏了,说∶“好头! 17 |   好头!你原来不是在花果山水帘洞,怎么跑到这儿来了,是不是我丈人到那儿把你请来的?” 18 |   悟空说∶“不是,是我自己改邪归正了,保护唐僧西天取经路过这…… 19 |   ”妖怪一听“取经”二字,“啪”地一声一丢钉耙,拱了拱手说∶“麻烦你引见一下,我受观音菩萨劝导,她叫我在这里等你们,我愿意跟唐僧西天取经,也好将功折罪。” 20 |   两个人放火烧了云栈洞,悟空将妖怪的双手反绑上,押回高老庄。那妖怪“扑通”一声跪在唐僧面前,把观音菩萨劝他行善的事说了一遍。唐僧十分高兴,叫悟空给他松绑,又请高太公抬出香炉烛台,拜谢了观音,还给他取了个法号叫猪悟能,别名猪八戒。 21 |   高太公又给猪八戒准备了一套僧衣、僧鞋、僧帽等穿上。临走的时候,八戒一再叮嘱说∶“丈人啊!你好好照看我老婆,如果取不成经,我还是要还俗的。你不要把我的老婆再许给别人呀!”悟空听了笑骂他胡说八道,八戒却说∶“我这是给自己留条后路呢!” 22 | 23 | 24 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file8.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file8.docx -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/builder/data/incremental_inputs/file9.pdf -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/inputs/file1.md: -------------------------------------------------------------------------------- 1 | # 第1回 惊天地美猴王出世 2 | 3 |   这是一个神话故事,传说在很久很久以前,天下分为东胜神洲、西牛贺洲、南赡部洲、北俱芦洲。在东胜神洲傲来国,有一座花果山,山上有一块仙石,一天仙石崩裂,从石头中滚出一个卵,这个卵一见风就变成一个石猴,猴眼射出一道道金光,向四方朝拜。 4 |   那猴能走、能跑,渴了就喝些山涧中的泉水,饿了就吃些山上的果子。 5 |   整天和山中的动物一起玩乐,过得十分快活。一天,天气特别热,猴子们为了躲避炎热的天气,跑到山涧里洗澡。它们看见这泉水哗哗地流,就顺着涧往前走,去寻找它的源头。 6 |   猴子们爬呀、爬呀,走到了尽头,却看见一股瀑布,像是从天而降一样。猴子们觉得惊奇,商量说∶“哪个敢钻进瀑布,把泉水的源头找出来,又不伤身体,就拜他为王。”连喊了三遍,那石猴呼地跳了出来,高声喊道∶“我进去,我进去!” 7 |   那石猴闭眼纵身跳入瀑布,觉得不像是在水中,这才睁开眼,四处打量,发现自己站在一座铁板桥上,桥下的水冲贯于石窍之间,倒挂着流出来,将桥门遮住,使外面的人看不到里面。石猴走过桥,发现这真是个好地方,石椅、石床、石盆、石碗,样样都有。 8 |   这里就像不久以前有人住过一样,天然的房子,安静整洁,锅、碗、瓢、盆,整齐地放在炉灶上。正当中有一块石碑,上面刻着∶花果山福地,水帘洞洞天。石猴高兴得不得了,忙转身向外走去,嗖的一下跳出了洞。 9 |   猴子们见石猴出来了,身上又一点伤也没有,又惊又喜,把他团团围住,争著问他里面的情况。石猴抓抓腮,挠挠痒,笑嘻嘻地对大家说∶“里面没有水,是一个安身的好地方,刮大风我们有地方躲,下大雨我们也不怕淋。”猴子们一听,一个个高兴得又蹦又跳。 10 |   猴子们随着石猴穿过了瀑布,进入水帘洞中,看见了这么多的好东西,一个个你争我夺,拿盆的拿盆,拿碗的拿碗,占灶的占灶,争床的争床,搬过来,移过去,直到精疲力尽为止。猴子们都遵照诺言,拜石猴为王,石猴从此登上王位,将石字省去,自称“美猴王”。 11 |   美猴王每天带着猴子们游山玩水,很快三、五百年过去了。一天正在玩乐时,美猴王想到自己将来难免一死,不由悲伤得掉下眼泪来,这时猴群中跳出个通背猿猴来,说∶“大王想要长生不老,只有去学佛、学仙、学神之术。” 12 |   美猴王决定走遍天涯海角,也要找到神仙,学那长生不老的本领。第二天,猴子们为他做了一个木筏,又准备了一些野果,于是美猴王告别了群猴们,一个人撑着木筏,奔向汪洋大海。 13 |   大概是美猴王的运气好,连日的东南风,将他送到西北岸边。他下了木筏,登上了岸,看见岸边有许多人都在干活,有的捉鱼,有的打天上的大雁,有的挖蛤蜊,有的淘盐,他悄悄地走过去,没想到,吓得那些人将东西一扔,四处逃命。 14 |   这一天,他来到一座高山前,突然从半山腰的树林里传出一阵美妙的歌声,唱的是一些关于成仙的话。猴王想∶这个唱歌的人一定是神仙,就顺着歌声找去。 15 |   唱歌的是一个正在树林里砍柴的青年人,猴王从这青年人的口中了解到,这座山叫灵台方寸山,离这儿七八里路,有个斜月三星洞,洞中住着一个称为菩提祖师的神仙。 16 |   美猴王告别打柴的青年人,出了树林,走过山坡,果然远远地看见一座洞府,只见洞门紧紧地闭着,洞门对面的山岗上立着一块石碑,大约有三丈多高,八尺多宽,上面写着十个大字∶“灵台方寸山斜月三星洞”。正在看时,门却忽然打开了,走出来一个仙童。 17 |   美猴王赶快走上前,深深地鞠了一个躬,说明来意,那仙童说∶“我师父刚才正要讲道,忽然叫我出来开门,说外面来了个拜师学艺的,原来就是你呀!跟我来吧!”美猴王赶紧整整衣服,恭恭敬敬地跟着仙童进到洞内,来到祖师讲道的法台跟前。 18 |   猴王看见菩提祖师端端正正地坐在台上,台下两边站着三十多个仙童,就赶紧跪下叩头。祖师问清楚他的来意,很高兴,见他没有姓名,便说∶“你就叫悟空吧!” 19 |   祖师叫孙悟空又拜见了各位师兄,并给悟空找了间空房住下。从此悟空跟着师兄学习生活常识,讲究经典,写字烧香,空时做些扫地挑水的活。 20 |   很快七年过去了,一天,祖师讲道结束后,问悟空想学什么本领。孙悟空不管祖师讲什么求神拜佛、打坐修行,只要一听不能长生不老,就不愿意学,菩提祖师对此非常生气。 21 |   祖师从高台上跳了下来,手里拿着戒尺指着孙悟空说∶“你这猴子,这也不学,那也不学,你要学些什么?”说完走过去在悟空头上打了三下,倒背着手走到里间,关上了门。师兄们看到师父生气了,感到很害怕,纷纷责怪孙悟空。 22 |   孙悟空既不怕,又不生气,心里反而十分高兴。当天晚上,悟空假装睡着了,可是一到半夜,就悄悄起来,从前门出去,等到三更,绕到后门口,看见门半开半闭,高兴地不得了,心想∶“哈哈,我没有猜错师父的意思。” 23 |   孙悟空走了进去,看见祖师面朝里睡着,就跪在床前说∶“师父,我跪在这里等着您呢!”祖师听见声音就起来了,盘着腿坐好后,严厉地问孙悟空来做什么,悟空说∶“师父白天当着大家的面不是答应我,让我三更时从后门进来,教我长生不老的法术吗?” 24 |   菩提祖师听到这话心里很高兴。心想∶“这个猴子果然是天地生成的,不然,怎么能猜透我的暗谜。”于是,让孙悟空跪在床前,教给他长生不老的法术。孙悟空洗耳恭听,用心理解,牢牢记住口诀,并叩头拜谢了祖师的恩情。 25 |   很快三年又过去了,祖师又教了孙悟空七十二般变化的法术和驾筋斗云的本领,学会了这个本领,一个筋斗便能翻出十万八千里路程。孙悟空是个猴子,本来就喜欢蹦蹦跳跳的,所以学起筋斗云来很容易。 26 |   有一个夏天,孙悟空和师兄们在洞门前玩耍,大家要孙悟空变个东西看看,孙悟空心里感到很高兴,得意地念起咒语,摇身一变变成了一棵大树。 27 |   师兄们见了,鼓着掌称赞他。 28 |   大家的吵闹声,让菩提祖师听到了,他拄着拐杖出来,问∶“是谁在吵闹?你们这样大吵大叫的,哪里像个出家修行的人呢?”大家都赶紧停住了笑,孙悟空也恢复了原样,给师父解释,请求原谅。 29 |   菩提祖师看见孙悟空刚刚学会了一些本领就卖弄起来,十分生气。祖师叫其他人离开,把悟空狠狠地教训了一顿,并且要把孙悟空赶走。孙悟空着急了,哀求祖师不要赶他走,祖师却不肯留下他,并要他立下誓言∶任何时候都不能说孙悟空是菩提祖师的徒弟。 30 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/inputs/file2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/builder/data/inputs/file2.docx -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/data/inputs/file3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/builder/data/inputs/file3.pdf -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/docxIndexer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from kag.builder.runner import BuilderChainRunner 4 | from kag.common.conf import KAG_CONFIG 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | # 索引构建 9 | def buildKB(files_dir): 10 | runner = BuilderChainRunner.from_config( 11 | KAG_CONFIG.all_config["docx_kag_builder_pipeline"] 12 | ) 13 | runner.invoke(files_dir) 14 | logger.info(f"\n\nbuildKB successfully for {files_dir}\n\n") 15 | 16 | 17 | 18 | if __name__ == "__main__": 19 | # 获取当前位置路径 并指定文件目录 20 | current_dir = Path(__file__).parent 21 | files_dir = current_dir / "data/inputs/file2.docx" 22 | 23 | buildKB(str(files_dir)) 24 | 25 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/mdIndexer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from kag.builder.runner import BuilderChainRunner 4 | from kag.common.conf import KAG_CONFIG 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | # 索引构建 9 | def buildKB(files_dir): 10 | runner = BuilderChainRunner.from_config( 11 | KAG_CONFIG.all_config["md_kag_builder_pipeline"] 12 | ) 13 | runner.invoke(files_dir) 14 | logger.info(f"\n\nbuildKB successfully for {files_dir}\n\n") 15 | 16 | 17 | 18 | if __name__ == "__main__": 19 | # 获取当前位置路径 并指定文件目录 20 | current_dir = Path(__file__).parent 21 | files_dir = current_dir / "data/inputs/file1.md" 22 | 23 | buildKB(str(files_dir)) 24 | 25 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/mixIndexer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from pathlib import Path 4 | from kag.builder.runner import BuilderChainRunner 5 | from kag.common.conf import KAG_CONFIG 6 | 7 | 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | # 针对TXT文件进行索引构建 12 | def buildMdKB(dir_path): 13 | try: 14 | runner = BuilderChainRunner.from_config( 15 | KAG_CONFIG.all_config.get("md_kag_builder_pipeline") 16 | ) 17 | if runner is None: 18 | raise ValueError("Missing 'md_kag_builder_pipeline' configuration.") 19 | runner.invoke(dir_path) 20 | logger.info(f"\n\nbuildMdKB successfully for {dir_path}\n\n") 21 | except Exception as e: 22 | logger.error(f"Error building MD KB for {dir_path}: {e}") 23 | 24 | # 针对DOCX文件进行索引构建 25 | def buildDocxKB(dir_path): 26 | try: 27 | runner = BuilderChainRunner.from_config( 28 | KAG_CONFIG.all_config.get("docx_kag_builder_pipeline") 29 | ) 30 | if runner is None: 31 | raise ValueError("Missing 'docx_kag_builder_pipeline' configuration.") 32 | runner.invoke(dir_path) 33 | logger.info(f"\n\nbuildDocxKB successfully for {dir_path}\n\n") 34 | except Exception as e: 35 | logger.error(f"Error building DOCX KB for {dir_path}: {e}") 36 | 37 | # 针对PDF文件进行索引构建 38 | def buildPdfKB(dir_path): 39 | try: 40 | runner = BuilderChainRunner.from_config( 41 | KAG_CONFIG.all_config.get("pdf_kag_builder_pipeline") 42 | ) 43 | if runner is None: 44 | raise ValueError("Missing 'pdf_kag_builder_pipeline' configuration.") 45 | runner.invoke(dir_path) 46 | logger.info(f"\n\nbuildPdfKB successfully for {dir_path}\n\n") 47 | except Exception as e: 48 | logger.error(f"Error building PDF KB for {dir_path}: {e}") 49 | 50 | # 索引构建分诊 51 | def process_files(files_dir): 52 | if not isinstance(files_dir, Path): 53 | logger.error("Invalid files_dir parameter. Expected a Path object.") 54 | return 55 | 56 | if not files_dir.exists() or not files_dir.is_dir(): 57 | logger.error(f"Directory {files_dir} does not exist or is not a directory.") 58 | return 59 | 60 | for file_path in files_dir.iterdir(): # 遍历文件夹中的所有文件和子目录 61 | if file_path.is_file(): # 确保是文件 62 | try: 63 | logger.info(f"Processing file: {file_path}") 64 | # 根据扩展名和文件名前缀判断类型 65 | if file_path.suffix == ".md" and file_path.name.startswith("file"): 66 | buildMdKB(str(file_path)) 67 | elif file_path.suffix == ".docx" and file_path.name.startswith("file"): 68 | buildDocxKB(str(file_path)) 69 | elif file_path.suffix == ".pdf" and file_path.name.startswith("file"): 70 | buildPdfKB(str(file_path)) 71 | else: 72 | logger.warning(f"Skipped unsupported file type or invalid file: {file_path}") 73 | except Exception as e: 74 | logger.error(f"Error processing file {file_path}: {e}") 75 | 76 | if __name__ == "__main__": 77 | logging.basicConfig(level=logging.INFO) 78 | try: 79 | # 获取当前位置路径 并指定文件目录 80 | current_dir = Path(__file__).parent 81 | files_dir = current_dir / "data/incremental_inputs" 82 | 83 | # 检查目录是否存在并进行索引构建 84 | process_files(files_dir) 85 | except Exception as e: 86 | logger.critical(f"Unhandled exception in main: {e}") 87 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/builder/pdfIndexer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from kag.builder.runner import BuilderChainRunner 4 | from kag.common.conf import KAG_CONFIG 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | # 索引构建 9 | def buildKB(files_dir): 10 | runner = BuilderChainRunner.from_config( 11 | KAG_CONFIG.all_config["pdf_kag_builder_pipeline"] 12 | ) 13 | runner.invoke(files_dir) 14 | logger.info(f"\n\nbuildKB successfully for {files_dir}\n\n") 15 | 16 | 17 | 18 | if __name__ == "__main__": 19 | # 获取当前位置路径 并指定文件目录 20 | current_dir = Path(__file__).parent 21 | files_dir = current_dir / "data/inputs/file3.pdf" 22 | 23 | buildKB(str(files_dir)) 24 | 25 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/config/example_config.yaml: -------------------------------------------------------------------------------- 1 | #------------project configuration start----------------# 2 | openie_llm: &openie_llm 3 | api_key: sk-V4rpB2LLM5sC6CGxmUWs1LRqhZpddDEIVmsGBmwH9rm67y6F 4 | base_url: https://yunwu.ai/v1 5 | model: gpt-4o-mini 6 | type: maas 7 | 8 | chat_llm: &chat_llm 9 | api_key: sk-V4rpB2LLM5sC6CGxmUWs1LRqhZpddDEIVmsGBmwH9rm67y6F 10 | base_url: https://yunwu.ai/v1 11 | model: gpt-4o-mini 12 | type: maas 13 | 14 | vectorize_model: &vectorize_model 15 | api_key: sk-V4rpB2LLM5sC6CGxmUWs1LRqhZpddDEIVmsGBmwH9rm67y6F 16 | base_url: https://yunwu.ai/v1 17 | model: text-embedding-3-small 18 | type: openai 19 | vector_dimensions: 1536 20 | vectorizer: *vectorize_model 21 | 22 | log: 23 | level: INFO 24 | 25 | project: 26 | biz_scene: default 27 | host_addr: http://127.0.0.1:8887 28 | id: '1' 29 | language: zh 30 | namespace: XiYouJiTest 31 | checkpoint_path: ./runner-ckpt 32 | #------------project configuration end----------------# 33 | 34 | #------------txt-kag-builder configuration start----------------# 35 | txt_kag_builder_pipeline: 36 | chain: 37 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 38 | extractor: 39 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 40 | llm: *openie_llm 41 | ner_prompt: 42 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 43 | std_prompt: 44 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 45 | triple_prompt: 46 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 47 | reader: 48 | type: txt_reader # kag.builder.component.reader.dict_reader.txt_reader 49 | post_processor: 50 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 51 | similarity_threshold: 0.9 52 | splitter: 53 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 54 | split_length: 800 55 | window_length: 100 56 | vectorizer: 57 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 58 | vectorize_model: *vectorize_model 59 | writer: 60 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 61 | num_threads_per_chain: 1 62 | num_chains: 16 63 | scanner: 64 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 65 | #------------txt-kag-builder configuration end----------------# 66 | 67 | #------------md-kag-builder configuration start----------------# 68 | md_kag_builder_pipeline: 69 | chain: 70 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 71 | extractor: 72 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 73 | llm: *openie_llm 74 | ner_prompt: 75 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 76 | std_prompt: 77 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 78 | triple_prompt: 79 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 80 | reader: 81 | type: md_reader # kag.builder.component.reader.dict_reader.txt_reader 82 | post_processor: 83 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 84 | similarity_threshold: 0.9 85 | splitter: 86 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 87 | split_length: 800 88 | window_length: 100 89 | vectorizer: 90 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 91 | vectorize_model: *vectorize_model 92 | writer: 93 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 94 | num_threads_per_chain: 1 95 | num_chains: 16 96 | scanner: 97 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 98 | #------------md-kag-builder configuration end----------------# 99 | 100 | #------------pdf-kag-builder configuration start----------------# 101 | pdf_kag_builder_pipeline: 102 | chain: 103 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 104 | extractor: 105 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 106 | llm: *openie_llm 107 | ner_prompt: 108 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 109 | std_prompt: 110 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 111 | triple_prompt: 112 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 113 | reader: 114 | type: pdf_reader # kag.builder.component.reader.dict_reader.txt_reader 115 | post_processor: 116 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 117 | similarity_threshold: 0.9 118 | splitter: 119 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 120 | split_length: 800 121 | window_length: 100 122 | vectorizer: 123 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 124 | vectorize_model: *vectorize_model 125 | writer: 126 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 127 | num_threads_per_chain: 1 128 | num_chains: 16 129 | scanner: 130 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 131 | #------------pdf-kag-builder configuration end----------------# 132 | 133 | #------------docx-kag-builder configuration start----------------# 134 | docx_kag_builder_pipeline: 135 | chain: 136 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 137 | extractor: 138 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 139 | llm: *openie_llm 140 | ner_prompt: 141 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 142 | std_prompt: 143 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 144 | triple_prompt: 145 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 146 | reader: 147 | type: docx_reader # kag.builder.component.reader.dict_reader.txt_reader 148 | post_processor: 149 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 150 | similarity_threshold: 0.9 151 | splitter: 152 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 153 | split_length: 800 154 | window_length: 100 155 | vectorizer: 156 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 157 | vectorize_model: *vectorize_model 158 | writer: 159 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 160 | num_threads_per_chain: 1 161 | num_chains: 16 162 | scanner: 163 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 164 | #------------docx-kag-builder configuration end----------------# 165 | 166 | #------------kag-solver configuration start----------------# 167 | search_api: &search_api 168 | type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI 169 | 170 | graph_api: &graph_api 171 | type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi 172 | 173 | exact_kg_retriever: &exact_kg_retriever 174 | type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever 175 | el_num: 5 176 | llm_client: *chat_llm 177 | search_api: *search_api 178 | graph_api: *graph_api 179 | 180 | fuzzy_kg_retriever: &fuzzy_kg_retriever 181 | type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever 182 | el_num: 5 183 | vectorize_model: *vectorize_model 184 | llm_client: *chat_llm 185 | search_api: *search_api 186 | graph_api: *graph_api 187 | 188 | chunk_retriever: &chunk_retriever 189 | type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever 190 | llm_client: *chat_llm 191 | recall_num: 10 192 | rerank_topk: 10 193 | 194 | kag_solver_pipeline: 195 | memory: 196 | type: default_memory # kag.solver.implementation.default_memory.DefaultMemory 197 | llm_client: *chat_llm 198 | max_iterations: 3 199 | reasoner: 200 | type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner 201 | llm_client: *chat_llm 202 | lf_planner: 203 | type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner 204 | llm_client: *chat_llm 205 | vectorize_model: *vectorize_model 206 | lf_executor: 207 | type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor 208 | llm_client: *chat_llm 209 | force_chunk_retriever: true 210 | exact_kg_retriever: *exact_kg_retriever 211 | fuzzy_kg_retriever: *fuzzy_kg_retriever 212 | chunk_retriever: *chunk_retriever 213 | merger: 214 | type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger 215 | vectorize_model: *vectorize_model 216 | chunk_retriever: *chunk_retriever 217 | generator: 218 | type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator 219 | llm_client: *chat_llm 220 | generate_prompt: 221 | type: default_resp_generator # kag.solver.prompt.default.resp_generator.RespGenerator 222 | reflector: 223 | type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector 224 | llm_client: *chat_llm 225 | 226 | #------------kag-solver configuration end----------------# 227 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/kag_config.yaml: -------------------------------------------------------------------------------- 1 | #------------project configuration start----------------# 2 | openie_llm: &openie_llm 3 | api_key: sk-V4rpB2LLM5sC6CGxmUWs1LRqhZpddDEIVmsGBmwH9rm67y6F 4 | base_url: https://yunwu.ai/v1 5 | model: gpt-4o-mini 6 | type: maas 7 | 8 | chat_llm: &chat_llm 9 | api_key: sk-V4rpB2LLM5sC6CGxmUWs1LRqhZpddDEIVmsGBmwH9rm67y6F 10 | base_url: https://yunwu.ai/v1 11 | model: gpt-4o-mini 12 | type: maas 13 | 14 | vectorize_model: &vectorize_model 15 | api_key: sk-V4rpB2LLM5sC6CGxmUWs1LRqhZpddDEIVmsGBmwH9rm67y6F 16 | base_url: https://yunwu.ai/v1 17 | model: text-embedding-3-small 18 | type: openai 19 | vector_dimensions: 1536 20 | vectorizer: *vectorize_model 21 | 22 | log: 23 | level: INFO 24 | 25 | project: 26 | biz_scene: default 27 | host_addr: http://127.0.0.1:8887 28 | id: '3' 29 | language: zh 30 | namespace: XiYouJiTest 31 | checkpoint_path: ./runner-ckpt 32 | #------------project configuration end----------------# 33 | 34 | #------------txt-kag-builder configuration start----------------# 35 | txt_kag_builder_pipeline: 36 | chain: 37 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 38 | extractor: 39 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 40 | llm: *openie_llm 41 | ner_prompt: 42 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 43 | std_prompt: 44 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 45 | triple_prompt: 46 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 47 | reader: 48 | type: txt_reader # kag.builder.component.reader.dict_reader.txt_reader 49 | post_processor: 50 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 51 | similarity_threshold: 0.9 52 | splitter: 53 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 54 | split_length: 800 55 | window_length: 100 56 | vectorizer: 57 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 58 | vectorize_model: *vectorize_model 59 | writer: 60 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 61 | num_threads_per_chain: 1 62 | num_chains: 16 63 | scanner: 64 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 65 | #------------txt-kag-builder configuration end----------------# 66 | 67 | #------------md-kag-builder configuration start----------------# 68 | md_kag_builder_pipeline: 69 | chain: 70 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 71 | extractor: 72 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 73 | llm: *openie_llm 74 | ner_prompt: 75 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 76 | std_prompt: 77 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 78 | triple_prompt: 79 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 80 | reader: 81 | type: md_reader # kag.builder.component.reader.dict_reader.txt_reader 82 | post_processor: 83 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 84 | similarity_threshold: 0.9 85 | splitter: 86 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 87 | split_length: 800 88 | window_length: 100 89 | vectorizer: 90 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 91 | vectorize_model: *vectorize_model 92 | writer: 93 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 94 | num_threads_per_chain: 1 95 | num_chains: 16 96 | scanner: 97 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 98 | #------------md-kag-builder configuration end----------------# 99 | 100 | #------------pdf-kag-builder configuration start----------------# 101 | pdf_kag_builder_pipeline: 102 | chain: 103 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 104 | extractor: 105 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 106 | llm: *openie_llm 107 | ner_prompt: 108 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 109 | std_prompt: 110 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 111 | triple_prompt: 112 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 113 | reader: 114 | type: pdf_reader # kag.builder.component.reader.dict_reader.txt_reader 115 | post_processor: 116 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 117 | similarity_threshold: 0.9 118 | splitter: 119 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 120 | split_length: 800 121 | window_length: 100 122 | vectorizer: 123 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 124 | vectorize_model: *vectorize_model 125 | writer: 126 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 127 | num_threads_per_chain: 1 128 | num_chains: 16 129 | scanner: 130 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 131 | #------------pdf-kag-builder configuration end----------------# 132 | 133 | #------------docx-kag-builder configuration start----------------# 134 | docx_kag_builder_pipeline: 135 | chain: 136 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 137 | extractor: 138 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 139 | llm: *openie_llm 140 | ner_prompt: 141 | type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 142 | std_prompt: 143 | type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 144 | triple_prompt: 145 | type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 146 | reader: 147 | type: docx_reader # kag.builder.component.reader.dict_reader.txt_reader 148 | post_processor: 149 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 150 | similarity_threshold: 0.9 151 | splitter: 152 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 153 | split_length: 800 154 | window_length: 100 155 | vectorizer: 156 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 157 | vectorize_model: *vectorize_model 158 | writer: 159 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 160 | num_threads_per_chain: 1 161 | num_chains: 16 162 | scanner: 163 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 164 | #------------docx-kag-builder configuration end----------------# 165 | 166 | #------------kag-solver configuration start----------------# 167 | search_api: &search_api 168 | type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI 169 | 170 | graph_api: &graph_api 171 | type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi 172 | 173 | exact_kg_retriever: &exact_kg_retriever 174 | type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever 175 | el_num: 5 176 | llm_client: *chat_llm 177 | search_api: *search_api 178 | graph_api: *graph_api 179 | 180 | fuzzy_kg_retriever: &fuzzy_kg_retriever 181 | type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever 182 | el_num: 5 183 | vectorize_model: *vectorize_model 184 | llm_client: *chat_llm 185 | search_api: *search_api 186 | graph_api: *graph_api 187 | 188 | chunk_retriever: &chunk_retriever 189 | type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever 190 | llm_client: *chat_llm 191 | recall_num: 10 192 | rerank_topk: 10 193 | 194 | kag_solver_pipeline: 195 | memory: 196 | type: default_memory # kag.solver.implementation.default_memory.DefaultMemory 197 | llm_client: *chat_llm 198 | max_iterations: 3 199 | reasoner: 200 | type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner 201 | llm_client: *chat_llm 202 | lf_planner: 203 | type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner 204 | llm_client: *chat_llm 205 | vectorize_model: *vectorize_model 206 | lf_executor: 207 | type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor 208 | llm_client: *chat_llm 209 | force_chunk_retriever: true 210 | exact_kg_retriever: *exact_kg_retriever 211 | fuzzy_kg_retriever: *fuzzy_kg_retriever 212 | chunk_retriever: *chunk_retriever 213 | merger: 214 | type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger 215 | vectorize_model: *vectorize_model 216 | chunk_retriever: *chunk_retriever 217 | generator: 218 | type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator 219 | llm_client: *chat_llm 220 | generate_prompt: 221 | type: default_resp_generator # kag.solver.prompt.default.resp_generator.RespGenerator 222 | reflector: 223 | type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector 224 | llm_client: *chat_llm 225 | 226 | #------------kag-solver configuration end----------------# 227 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/reasoner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/reasoner/__init__.py -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/schema/XiYouJiTest.schema: -------------------------------------------------------------------------------- 1 | namespace XiYouJiTest 2 | 3 | Chunk(文本块): EntityType 4 | properties: 5 | content(内容): Text 6 | index: TextAndVector 7 | 8 | ArtificialObject(人造物体): EntityType 9 | properties: 10 | desc(描述): Text 11 | index: TextAndVector 12 | semanticType(语义类型): Text 13 | index: Text 14 | 15 | Astronomy(天文学): EntityType 16 | properties: 17 | desc(描述): Text 18 | index: TextAndVector 19 | semanticType(语义类型): Text 20 | index: Text 21 | 22 | Building(建筑): EntityType 23 | properties: 24 | desc(描述): Text 25 | index: TextAndVector 26 | semanticType(语义类型): Text 27 | index: Text 28 | 29 | Creature(生物): EntityType 30 | properties: 31 | desc(描述): Text 32 | index: TextAndVector 33 | semanticType(语义类型): Text 34 | index: Text 35 | 36 | Concept(概念): EntityType 37 | properties: 38 | desc(描述): Text 39 | index: TextAndVector 40 | semanticType(语义类型): Text 41 | index: Text 42 | 43 | Date(日期): EntityType 44 | properties: 45 | desc(描述): Text 46 | index: TextAndVector 47 | semanticType(语义类型): Text 48 | index: Text 49 | 50 | GeographicLocation(地理位置): EntityType 51 | properties: 52 | desc(描述): Text 53 | index: TextAndVector 54 | semanticType(语义类型): Text 55 | index: Text 56 | 57 | Keyword(关键词): EntityType 58 | properties: 59 | desc(描述): Text 60 | index: TextAndVector 61 | semanticType(语义类型): Text 62 | index: Text 63 | 64 | Medicine(药物): EntityType 65 | properties: 66 | desc(描述): Text 67 | index: TextAndVector 68 | semanticType(语义类型): Text 69 | index: Text 70 | 71 | 72 | NaturalScience(自然科学): EntityType 73 | properties: 74 | desc(描述): Text 75 | index: TextAndVector 76 | semanticType(语义类型): Text 77 | index: Text 78 | 79 | Organization(组织机构): EntityType 80 | properties: 81 | desc(描述): Text 82 | index: TextAndVector 83 | semanticType(语义类型): Text 84 | index: Text 85 | 86 | Person(人物): EntityType 87 | properties: 88 | desc(描述): Text 89 | index: TextAndVector 90 | semanticType(语义类型): Text 91 | index: Text 92 | 93 | Transport(运输): EntityType 94 | properties: 95 | desc(描述): Text 96 | index: TextAndVector 97 | semanticType(语义类型): Text 98 | index: Text 99 | 100 | Works(作品): EntityType 101 | properties: 102 | desc(描述): Text 103 | index: TextAndVector 104 | semanticType(语义类型): Text 105 | index: Text 106 | 107 | Others(其他): EntityType 108 | properties: 109 | desc(描述): Text 110 | index: TextAndVector 111 | semanticType(语义类型): Text 112 | index: Text 113 | 114 | Event(事件): EventType 115 | properties: 116 | subject(主体): Person 117 | participants(参与者): Person 118 | constraint: MultiValue 119 | time(时间): Date 120 | location(地点): GeographicLocation 121 | abstract(摘要): Text 122 | index: TextAndVector 123 | type(事件类型): Text 124 | index: Text 125 | 126 | 127 | SemanticConcept(语义概念): EntityType 128 | properties: 129 | desc(内容): Text 130 | index: Text -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/schema/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/schema/__init__.py -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/solver/prompt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NanGePlus/KagTest/f4ecd3247c681e74f19649e442b029d9eb2ee71f/KagV6Test/XiYouJiTest_KAG_V6/solver/prompt/__init__.py -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/solver/prompt/resp_generator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from string import Template 3 | from typing import List 4 | import logging 5 | 6 | from kag.interface import PromptABC 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @PromptABC.register("resp_simple") 12 | class RespGenerator(PromptABC): 13 | template_zh = ( 14 | "基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'" 15 | ) 16 | template_en = ( 17 | "Answer the question based on the given reference." 18 | "\nOnly give me the answer and do not output any other words." 19 | "\nThe following are given reference:'$memory'\nQuestion: '$instruction'" 20 | ) 21 | 22 | @property 23 | def template_variables(self) -> List[str]: 24 | return ["memory", "instruction"] 25 | 26 | def parse_response(self, response: str, **kwargs): 27 | logger.debug("推理器判别:{}".format(response)) 28 | return response 29 | -------------------------------------------------------------------------------- /KagV6Test/XiYouJiTest_KAG_V6/solver/query.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import time 5 | from concurrent.futures import ThreadPoolExecutor, as_completed 6 | 7 | from tqdm import tqdm 8 | 9 | from kag.common.benchmarks.evaluate import Evaluate 10 | from kag.solver.logic.solver_pipeline import SolverPipeline 11 | from kag.common.conf import KAG_CONFIG 12 | from kag.common.registry import import_modules_from_path 13 | 14 | from kag.common.checkpointer import CheckpointerManager 15 | 16 | 17 | def qa(query): 18 | resp = SolverPipeline.from_config(KAG_CONFIG.all_config["kag_solver_pipeline"]) 19 | answer, traceLog = resp.run(query) 20 | 21 | print(f"\n\nso the answer for '{query}' is: {answer}\n\n") # 22 | print(traceLog) 23 | return answer, traceLog 24 | 25 | 26 | if __name__ == "__main__": 27 | import_modules_from_path("./prompt") 28 | queries = [ 29 | "唐僧的人物关系有哪些?", 30 | ] 31 | for q in queries: 32 | qa(q) 33 | -------------------------------------------------------------------------------- /KagV6Test/other/apiFile/KagTest.apifox.json: -------------------------------------------------------------------------------- 1 | {"apifoxProject":"1.0.0","$schema":{"app":"apifox","type":"project","version":"1.2.0"},"info":{"name":"KagTest","description":"","mockRule":{"rules":[],"enableSystemRule":true}},"apiCollection":[{"name":"根目录","id":48785278,"auth":{},"parentId":0,"serverId":"","description":"","identityPattern":{"httpApi":{"type":"methodAndPath","bodyType":"","fields":[]}},"shareSettings":{},"visibility":"SHARED","preProcessors":[{"id":"inheritProcessors","type":"inheritProcessors","data":{}}],"postProcessors":[{"id":"inheritProcessors","type":"inheritProcessors","data":{}}],"inheritPostProcessors":{},"inheritPreProcessors":{},"items":[{"name":"1、搜索实体","api":{"id":"253148745","method":"get","path":"/v1/datas/search","parameters":{"query":[{"id":"SN9o4d3oiH","name":"projectId","example":["1"],"required":false,"description":"","enable":true,"type":"array"},{"id":"QIwfT8T3I6","name":"label","example":["all"],"required":false,"description":"","enable":true,"type":"array"},{"id":"vhrfwDOCOZ","name":"queryStr","example":["周杰伦"],"required":false,"description":"","enable":true,"type":"array"},{"id":"7h9f9RveZl","name":"size","example":["10"],"required":false,"description":"","enable":true,"type":"array"},{"id":"OnfM2cuhKU","name":"page","example":["1"],"required":false,"description":"","enable":true,"type":"array"}]},"auth":{},"commonParameters":{"query":[],"body":[],"cookie":[],"header":[]},"responses":[{"id":"600580841","code":200,"name":"成功","headers":[],"jsonSchema":{"type":"object","properties":{}},"description":"","contentType":"json","mediaType":""}],"responseExamples":[],"requestBody":{"type":"multipart/form-data","parameters":[],"jsonSchema":{"type":"object","properties":{}},"example":""},"description":"搜索项目知识库符合条件的知识","tags":[],"status":"developing","serverId":"","operationId":"","sourceUrl":"","ordering":10,"cases":[{"id":226722669,"type":"http","path":null,"name":"成功","responseId":600580841,"parameters":{"query":[{"id":"iZOXdJBCFp","relatedName":"projectId","relatedId":"SN9o4d3oiH","value":["1"],"enable":true,"isDelete":false},{"id":"MFzW6CuPEA","relatedName":"label","relatedId":"QIwfT8T3I6","value":["all"],"enable":true,"isDelete":false},{"id":"mDpNDPb7Rq","relatedName":"queryStr","relatedId":"vhrfwDOCOZ","value":["周杰伦"],"enable":true,"isDelete":false},{"id":"wnmAMyPq4T","relatedName":"size","relatedId":"7h9f9RveZl","value":["10"],"enable":true,"isDelete":false},{"id":"71SGemGgrA","relatedName":"page","relatedId":"OnfM2cuhKU","value":["1"],"enable":true,"isDelete":false}]},"commonParameters":{"query":[],"body":[],"header":[],"cookie":[]},"requestBody":{"parameters":[],"data":"","type":"multipart/form-data"},"auth":{},"advancedSettings":{"disabledSystemHeaders":{}},"requestResult":null,"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}],"mocks":[],"customApiFields":"{}","advancedSettings":{"disabledSystemHeaders":{}},"mockScript":{},"codeSamples":[],"commonResponseStatus":{},"responseChildren":["BLANK.600580841"],"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}},{"name":"2、实体详情","api":{"id":"253161158","method":"post","path":"/v1/datas/getEntityDetail","parameters":{},"auth":{},"commonParameters":{"query":[],"body":[],"cookie":[],"header":[]},"responses":[{"id":"600603549","code":200,"name":"成功","headers":[],"jsonSchema":{"type":"object","properties":{"result":{"type":"object","properties":{"projectId":{"type":"integer"},"dsl":{"type":"string"},"params":{"type":"object","properties":{"id":{"type":"string"}},"required":["id"]},"status":{"type":"string"},"resultTable":{"type":"object","properties":{"total":{"type":"integer"},"header":{"type":"array","items":{"type":"string"}},"rows":{"type":"array","items":{}}},"required":["total","header","rows"]},"resultNodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"name":{"type":"string"},"label":{"type":"string"},"properties":{"type":"object","properties":{"semanticType":{"type":"string"},"name":{"type":"string"},"desc":{"type":"string"},"id":{"type":"string"},"content":{"type":"string"}},"required":["name","id","content"]}},"required":["id","name","label","properties"]}},"resultEdges":{"type":"array","items":{}}},"required":["projectId","dsl","params","status","resultTable","resultNodes","resultEdges"]},"success":{"type":"boolean"},"remote":{"type":"string"}},"required":["result","success","remote"]},"description":"","contentType":"json","mediaType":""}],"responseExamples":[],"requestBody":{"type":"application/json","parameters":[],"jsonSchema":{"type":"object","properties":{}},"example":"{\n \"label\":\"JayChou.Albums\",\n \"projectId\":1,\n \"params\":{\n \"id\":\"七里香\"\n }\n}"},"description":"","tags":[],"status":"developing","serverId":"","operationId":"","sourceUrl":"","ordering":20,"cases":[{"id":226725087,"type":"http","path":null,"name":"成功","responseId":600603549,"parameters":{},"commonParameters":{"query":[],"body":[],"header":[],"cookie":[]},"requestBody":{"parameters":[],"data":"{\n \"label\":\"JayChou.Albums\",\n \"projectId\":1,\n \"params\":{\n \"id\":\"七里香\"\n }\n}","type":"application/json"},"auth":{},"advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"requestResult":null,"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}],"mocks":[],"customApiFields":"{}","advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"mockScript":{},"codeSamples":[],"commonResponseStatus":{},"responseChildren":["BLANK.600603549"],"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}},{"name":"3、实体一度邻居子图","api":{"id":"253164371","method":"post","path":"/v1/datas/getOneHopGraph","parameters":{},"auth":{},"commonParameters":{"query":[],"body":[],"cookie":[],"header":[]},"responses":[{"id":"600604548","code":200,"name":"成功","headers":[],"jsonSchema":{"type":"object","properties":{"result":{"type":"object","properties":{"projectId":{"type":"integer"},"dsl":{"type":"string"},"params":{"type":"object","properties":{"id":{"type":"string"}},"required":["id"]},"status":{"type":"string"},"resultTable":{"type":"object","properties":{"total":{"type":"integer"},"header":{"type":"array","items":{"type":"string"}},"rows":{"type":"array","items":{}}},"required":["total","header","rows"]},"resultNodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"name":{"type":"string"},"label":{"type":"string"},"properties":{"type":"object","properties":{"semanticType":{"type":"string"},"name":{"type":"string"},"id":{"type":"string"},"desc":{"type":"string"},"content":{"type":"string"}},"required":["name","id","content"]}},"required":["id","name","label","properties"]}},"resultEdges":{"type":"array","items":{"type":"object","properties":{"docId":{"type":"string"},"id":{"type":"string"},"from":{"type":"string"},"fromId":{"type":"string"},"fromType":{"type":"string"},"to":{"type":"string"},"toId":{"type":"string"},"toType":{"type":"string"},"label":{"type":"string"},"properties":{"type":"object","properties":{}}},"required":["docId","id","from","fromId","fromType","to","toId","toType","label","properties"]}}},"required":["projectId","dsl","params","status","resultTable","resultNodes","resultEdges"]},"success":{"type":"boolean"},"remote":{"type":"string"}},"required":["result","success","remote"]},"description":"","contentType":"json","mediaType":""}],"responseExamples":[],"requestBody":{"type":"application/json","parameters":[],"jsonSchema":{"type":"object","properties":{}},"example":"{\n \"label\":\"JayChou.Albums\",\n \"projectId\":1,\n \"params\":{\n \"id\":\"七里香\"\n }\n}"},"description":"","tags":[],"status":"developing","serverId":"","operationId":"","sourceUrl":"","ordering":30,"cases":[{"id":226727277,"type":"http","path":null,"name":"成功","responseId":600604548,"parameters":{},"commonParameters":{"query":[],"body":[],"header":[],"cookie":[]},"requestBody":{"parameters":[],"data":"{\n \"label\":\"JayChou.Albums\",\n \"projectId\":1,\n \"params\":{\n \"id\":\"七里香\"\n }\n}","type":"application/json"},"auth":{},"advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"requestResult":null,"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}],"mocks":[],"customApiFields":"{}","advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"mockScript":{},"codeSamples":[],"commonResponseStatus":{},"responseChildren":["BLANK.600604548"],"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}},{"name":"4、智能问答","api":{"id":"253170901","method":"post","path":"/v1/datas/asyncSubmit","parameters":{},"auth":{},"commonParameters":{"query":[],"body":[],"cookie":[],"header":[]},"responses":[{"id":"600614561","code":200,"name":"成功","headers":[],"jsonSchema":{"type":"object","properties":{"success":{"type":"boolean"},"errorCode":{"type":"string"},"errorMsg":{"type":"string"},"remote":{"type":"string"}},"required":["success","errorCode","errorMsg","remote"]},"description":"","contentType":"json","mediaType":""}],"responseExamples":[],"requestBody":{"type":"application/json","parameters":[],"jsonSchema":{"type":"object","properties":{}},"example":"{\n \"sessionId\": 1,\n \"projectId\": 1,\n \"instruction\": \"周杰伦是谁\",\n \"type\": \"NL\"\n}"},"description":"","tags":[],"status":"developing","serverId":"","operationId":"","sourceUrl":"","ordering":40,"cases":[{"id":226735577,"type":"http","path":null,"name":"成功","responseId":600614561,"parameters":{},"commonParameters":{"query":[],"body":[],"header":[],"cookie":[]},"requestBody":{"parameters":[],"data":"{\n \"sessionId\": 1,\n \"projectId\": 1,\n \"instruction\": \"周杰伦是谁\",\n \"type\": \"NL\"\n}","type":"application/json"},"auth":{},"advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"requestResult":null,"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}],"mocks":[],"customApiFields":"{}","advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"mockScript":{},"codeSamples":[],"commonResponseStatus":{},"responseChildren":["BLANK.600614561"],"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}},{"name":"5、问答详情","api":{"id":"253178776","method":"get","path":"/v1/datas/query/5","parameters":{"path":[]},"auth":{},"commonParameters":{"query":[],"body":[],"cookie":[],"header":[]},"responses":[{"id":"600630609","code":200,"name":"成功","headers":[],"jsonSchema":{"type":"object","properties":{"result":{"type":"object","properties":{"id":{"type":"integer"},"projectId":{"type":"integer"},"userId":{"type":"integer"},"sessionId":{"type":"integer"},"dsl":{"type":"string"},"nl":{"type":"string"},"params":{"type":"object","properties":{}},"mark":{"type":"string"},"status":{"type":"string"},"resultMessage":{"type":"string"}},"required":["id","projectId","userId","sessionId","dsl","nl","params","mark","status","resultMessage"]},"success":{"type":"boolean"},"remote":{"type":"string"}},"required":["result","success","remote"]},"description":"","contentType":"json","mediaType":""}],"responseExamples":[],"requestBody":{"type":"none","parameters":[]},"description":"","tags":[],"status":"developing","serverId":"","operationId":"","sourceUrl":"","ordering":50,"cases":[{"id":226736371,"type":"http","path":null,"name":"成功","responseId":600630609,"parameters":{"path":[]},"commonParameters":{"query":[],"body":[],"header":[],"cookie":[]},"requestBody":{"parameters":[],"type":"none"},"auth":{},"advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"requestResult":null,"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}],"mocks":[],"customApiFields":"{}","advancedSettings":{"disabledSystemHeaders":{},"isDefaultUrlEncoding":1},"mockScript":{},"codeSamples":[],"commonResponseStatus":{},"responseChildren":["BLANK.600630609"],"visibility":"INHERITED","preProcessors":[],"postProcessors":[],"inheritPostProcessors":{},"inheritPreProcessors":{}}}]}],"socketCollection":[],"docCollection":[],"responseCollection":[{"_databaseId":5995222,"updatedAt":"2025-01-10T01:51:57.000Z","name":"根目录","type":"root","children":[],"parentId":0,"id":5995222,"ordering":[],"items":[]}],"schemaCollection":[{"id":12557363,"name":"根目录","visibility":"SHARED","items":[],"ordering":[]}],"requestCollection":[{"name":"根目录","children":[],"ordering":["requestFolder.6043615"],"items":[]}],"apiTestCaseCollection":[{"name":"根目录","children":[],"items":[]}],"testCaseReferences":[],"environments":[{"name":"KagTest","requestProxyAgentSettings":{},"variables":[],"parameters":{"cookie":[],"query":[],"header":[],"body":[]},"type":"normal","visibility":"protected","ordering":30,"tags":[{"name":"","color":"#9373EE"}],"id":"28682709","baseUrl":"http://127.0.0.1:8887","baseUrls":{"default":"http://127.0.0.1:8887"}}],"commonScripts":[],"databaseConnections":[],"globalVariables":[],"commonParameters":null,"projectSetting":{"id":"5769310","auth":{},"servers":[{"id":"default","name":"默认服务"}],"gateway":[],"language":"zh-CN","apiStatuses":["developing","testing","released","deprecated"],"mockSettings":{},"preProcessors":[],"postProcessors":[],"advancedSettings":{"enableJsonc":false,"enableBigint":false,"responseValidate":true,"enableTestScenarioSetting":false,"enableYAPICompatScript":false,"isDefaultUrlEncoding":2,"publishedDocUrlRules":{"defaultRule":"RESOURCE_KEY_ONLY","resourceKeyStandard":"NEW"}},"initialDisabledMockIds":[],"cloudMock":{"security":"free","enable":false,"tokenKey":"apifoxToken"}},"customFunctions":[],"projectAssociations":[]} -------------------------------------------------------------------------------- /KagV6Test/other/config/example_config.yaml: -------------------------------------------------------------------------------- 1 | #------------project configuration start----------------# 2 | openie_llm: &openie_llm 3 | api_key: sk-MqUugKDFN7cgWzmX0XM1reUb6I3rm5WgA2LdHl6WhDqlz2fp 4 | base_url: https://yunwu.ai/v1 5 | model: gpt-4o-mini 6 | type: maas 7 | 8 | chat_llm: &chat_llm 9 | api_key: sk-MqUugKDFN7cgWzmX0XM1reUb6I3rm5WgA2LdHl6WhDqlz2fp 10 | base_url: https://yunwu.ai/v1 11 | model: gpt-4o-mini 12 | type: maas 13 | 14 | vectorize_model: &vectorize_model 15 | api_key: sk-MqUugKDFN7cgWzmX0XM1reUb6I3rm5WgA2LdHl6WhDqlz2fp 16 | base_url: https://yunwu.ai/v1 17 | model: text-embedding-3-small 18 | type: openai 19 | vector_dimensions: 1536 20 | vectorizer: *vectorize_model 21 | 22 | log: 23 | level: INFO 24 | 25 | project: 26 | biz_scene: default 27 | host_addr: http://127.0.0.1:8887 28 | id: "1" 29 | language: zh 30 | namespace: JayChouTest 31 | #------------project configuration end----------------# 32 | 33 | #------------kag-builder configuration start----------------# 34 | kag_builder_pipeline: 35 | chain: 36 | type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain 37 | extractor: 38 | type: schema_constraint_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor 39 | llm: *openie_llm 40 | ner_prompt: 41 | type: jaychou_ner # kag.builder.prompt.default.ner.OpenIENERPrompt 42 | std_prompt: 43 | type: jaychou_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt 44 | triple_prompt: 45 | type: jaychou_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt 46 | reader: 47 | type: txt_reader # kag.builder.component.reader.dict_reader.txt_reader 48 | post_processor: 49 | type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor 50 | similarity_threshold: 0.9 51 | splitter: 52 | type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter 53 | split_length: 800 54 | window_length: 100 55 | vectorizer: 56 | type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer 57 | vectorize_model: *vectorize_model 58 | writer: 59 | type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter 60 | num_threads_per_chain: 1 61 | num_chains: 16 62 | scanner: 63 | type: file_scanner # kag.builder.component.scanner.dataset_scanner.file_scanner 64 | #------------kag-builder configuration end----------------# 65 | 66 | #------------kag-solver configuration start----------------# 67 | search_api: &search_api 68 | type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI 69 | 70 | graph_api: &graph_api 71 | type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi 72 | 73 | exact_kg_retriever: &exact_kg_retriever 74 | type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever 75 | el_num: 5 76 | llm_client: *chat_llm 77 | search_api: *search_api 78 | graph_api: *graph_api 79 | 80 | fuzzy_kg_retriever: &fuzzy_kg_retriever 81 | type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever 82 | el_num: 5 83 | vectorize_model: *vectorize_model 84 | llm_client: *chat_llm 85 | search_api: *search_api 86 | graph_api: *graph_api 87 | 88 | chunk_retriever: &chunk_retriever 89 | type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever 90 | llm_client: *chat_llm 91 | recall_num: 10 92 | rerank_topk: 10 93 | 94 | kag_solver_pipeline: 95 | memory: 96 | type: default_memory # kag.solver.implementation.default_memory.DefaultMemory 97 | llm_client: *chat_llm 98 | max_iterations: 3 99 | reasoner: 100 | type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner 101 | llm_client: *chat_llm 102 | lf_planner: 103 | type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner 104 | llm_client: *chat_llm 105 | vectorize_model: *vectorize_model 106 | lf_executor: 107 | type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor 108 | llm_client: *chat_llm 109 | force_chunk_retriever: true 110 | exact_kg_retriever: *exact_kg_retriever 111 | fuzzy_kg_retriever: *fuzzy_kg_retriever 112 | chunk_retriever: *chunk_retriever 113 | merger: 114 | type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger 115 | vectorize_model: *vectorize_model 116 | chunk_retriever: *chunk_retriever 117 | generator: 118 | type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator 119 | llm_client: *chat_llm 120 | generate_prompt: 121 | type: default_resp_generator # kag.solver.prompt.default.resp_generator.RespGenerator 122 | reflector: 123 | type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector 124 | llm_client: *chat_llm 125 | 126 | #------------kag-solver configuration end----------------# 127 | -------------------------------------------------------------------------------- /KagV6Test/other/docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | server: 4 | restart: always 5 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-server:latest 6 | container_name: release-openspg-server 7 | ports: 8 | - "8887:8887" 9 | depends_on: 10 | - mysql 11 | - neo4j 12 | - minio 13 | volumes: 14 | - /etc/localtime:/etc/localtime:ro 15 | environment: 16 | TZ: Asia/Shanghai 17 | LANG: C.UTF-8 18 | command: [ 19 | "java", 20 | "-Dfile.encoding=UTF-8", 21 | "-Xms2048m", 22 | "-Xmx8192m", 23 | "-jar", 24 | "arks-sofaboot-0.0.1-SNAPSHOT-executable.jar", 25 | '--server.repository.impl.jdbc.host=mysql', 26 | '--server.repository.impl.jdbc.password=openspg', 27 | '--builder.model.execute.num=5', 28 | '--cloudext.graphstore.url=neo4j://release-openspg-neo4j:7687?user=neo4j&password=neo4j@openspg&database=neo4j', 29 | '--cloudext.searchengine.url=neo4j://release-openspg-neo4j:7687?user=neo4j&password=neo4j@openspg&database=neo4j' 30 | ] 31 | 32 | mysql: 33 | restart: always 34 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-mysql:latest 35 | container_name: release-openspg-mysql 36 | volumes: 37 | - /etc/localtime:/etc/localtime:ro 38 | environment: 39 | TZ: Asia/Shanghai 40 | LANG: C.UTF-8 41 | MYSQL_ROOT_PASSWORD: openspg 42 | MYSQL_DATABASE: openspg 43 | ports: 44 | - "3306:3306" 45 | command: [ 46 | '--character-set-server=utf8mb4', 47 | '--collation-server=utf8mb4_general_ci' 48 | ] 49 | 50 | neo4j: 51 | restart: always 52 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-neo4j:latest 53 | container_name: release-openspg-neo4j 54 | ports: 55 | - "7474:7474" 56 | - "7687:7687" 57 | environment: 58 | - TZ=Asia/Shanghai 59 | - NEO4J_AUTH=neo4j/neo4j@openspg 60 | - NEO4J_PLUGINS=["apoc"] 61 | - NEO4J_server_memory_heap_initial__size=1G 62 | - NEO4J_server_memory_heap_max__size=4G 63 | - NEO4J_server_memory_pagecache_size=1G 64 | - NEO4J_apoc_export_file_enabled=true 65 | - NEO4J_apoc_import_file_enabled=true 66 | - NEO4J_dbms_security_procedures_unrestricted=* 67 | - NEO4J_dbms_security_procedures_allowlist=* 68 | volumes: 69 | - /etc/localtime:/etc/localtime:ro 70 | - $HOME/dozerdb/logs:/logs 71 | 72 | minio: 73 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-minio:latest 74 | container_name: release-openspg-minio 75 | command: server --console-address ":9001" /data 76 | restart: always 77 | environment: 78 | MINIO_ACCESS_KEY: minio 79 | MINIO_SECRET_KEY: minio@openspg 80 | TZ: Asia/Shanghai 81 | ports: 82 | - 9000:9000 83 | - 9001:9001 84 | volumes: 85 | - /etc/localtime:/etc/localtime:ro 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 NanGePlus 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 1、介绍 2 | ## 1.1 主要内容 3 | **(第一期)[2024.12.10]KAG开源框架介绍及使用KAG实现知识增强生成应用** 4 | 主要内容:实现功能为产品模式测试、开发者模式测试 5 | 相关视频: 6 | https://www.bilibili.com/video/BV1qWCsY1EtZ/ 7 | https://youtu.be/uhg5l5-K6rE 8 | **(第二期)[2025.01.08]KAG知识增强生成开发模式高阶开发指南,比RAG更强大的检索和推理框架,自定义schema、构建索引、求解推理、知识图谱、GPT大模型、国产大模型、开源大模型** 9 | 主要内容:着重为大家演示开发者模式开发测试流程,自定义schema、构建索引(自定义prompt)、检索(自定义prompt),支持GPT大模型、国产大模型(阿里通义千问)、本地开源大模型(Ollama) 10 | 相关视频: 11 | https://www.bilibili.com/video/BV1SJreYpEBT/ 12 | https://youtu.be/pqSdp6yCg7U 13 | **(第三期)[2025.01.10]OpenSPG KAG新版本V0.6重大更新,开发者模式和产品模式实测,提供了一个更加强大且适应多样化应用场景的知识管理平台,极大地提升了系统的灵活性、易用性** 14 | 主要内容:测试版本升级后的产品模式、开发者模式使用 15 | 相关视频: 16 | https://www.bilibili.com/video/BV1MBcneaE7M/ 17 | https://youtu.be/-OeZkqnAGaM 18 | **(第四期)[2025.01.18]OpenSPG KAG框架V0.6版本在开发者模式下进行多种文件类型、多个文件按批次进行知识索引增量构建测试** 19 | 主要内容:提供一个测试脚本,提供一种方案思路处理多种文件类型、多个文件按批次进行知识索引增量构建 20 | https://www.bilibili.com/video/BV1YswPeFEYw/ 21 | https://youtu.be/YhwV7nIxvUI 22 | 23 | 24 | ## 1.2 KAG框架 25 | **(1)KAG是什么** 26 | KAG是OpenSPG发布v0.5版本中推出的知识增强生成(KAG)的专业领域知识服务框架,旨在充分利用知识图谱和向量检索的优势,增强大型语言模型和知识图谱,以解决 RAG 挑战 27 | OpenSPG是蚂蚁集团结合多年金融领域多元场景知识图谱构建与应用业务经验的总结,并与OpenKG联合推出的基于SPG(Semantic-enhanced Programmable Graph)框架研发的知识图谱引擎 28 | 检索增强生成(RAG)技术推动了领域应用与大模型结合。然而,RAG 存在着向量相似度与知识推理相关性差距大、对知识逻辑(如数值、时间关系、专家规则等)不敏感等问题,这些缺陷阻碍了专业知识服务的落地 29 | 官方网址:https://openspg.yuque.com/ndx6g9/0.5/figkrornp0qwelhl 30 | Github地址:https://github.com/OpenSPG/KAG 31 | **(2)KAG技术框架** 32 | kag框架包括 kag-builder、kag-solver、kag-model 三部分。v0.5版本发布只涉及前两部分,kag-model 将在后续逐步开源发布 33 | **kag-builder** 34 | 实现了一种对大型语言模型(LLM)友好的知识表示,在 DIKW(Data、Information、Knowledge和Wisdom)的层次结构基础上,升级 SPG 知识表示能力 35 | 在同一知识类型(如实体类型、事件类型)上兼容无 schema 约束的信息提取和有 schema 约束的专业知识构建,并支持图结构与原始文本块之间的互索引表示,为推理问答阶段的高效检索提供支持 36 | DIKW金字塔很好地描述了人类认识世界的规律和层次结构,分别是: 37 | 数据(Data原始的事实集合)、信息(Information可被分析测量的结构化数据)、知识(Knowledge需要洞察力和理解力进行学习)、智慧(Wisdom推断未来发生的相关性,指导行动) 38 | 数据是基础,信息是支撑,知识是核心,智慧是灵魂 39 | 自底向上每一层都比下一层增加某些特质。数据层是最基本的原始素材;信息层加入了有逻辑的数据内容;知识层提炼信息之间的联系,加入“如何去使用”;智慧层加入预测能力,能回答“为什么用” 40 | **kag-solver** 41 | 采用逻辑符号引导的混合求解和推理引擎,该引擎包括三种类型的运算符:规划、推理和检索,将自然语言问题转化为结合语言和符号的问题求解过程 42 | 在这个过程中,每一步都可以利用不同的运算符,如精确匹配检索、文本检索、数值计算或语义推理,从而实现四种不同问题求解过程的集成:检索、知识图谱推理、语言推理和数值计算 43 | 44 | 45 | # 2、前期准备工作 46 | ## 2.1 集成开发环境搭建 47 | anaconda提供python虚拟环境,pycharm提供集成开发环境 48 | **具体参考如下视频:** 49 | 【大模型应用开发-入门系列】03 集成开发环境搭建-开发前准备工作 50 | https://youtu.be/KyfGduq5d7w 51 | https://www.bilibili.com/video/BV1nvdpYCE33/ 52 | 53 | ## 2.2 大模型LLM服务接口调用方案 54 | (1)gpt大模型等国外大模型使用方案 55 | 国内无法直接访问,可以使用代理的方式,具体代理方案自己选择 56 | 这里推荐大家使用:https://nangeai.top/register?aff=Vxlp 57 | (2)非gpt大模型方案 OneAPI方式或大模型厂商原生接口 58 | (3)本地开源大模型方案(Ollama方式) 59 | **具体参考如下视频:** 60 | 【大模型应用开发-入门系列】04 大模型LLM服务接口调用方案 61 | https://youtu.be/mTrgVllUl7Y 62 | https://www.bilibili.com/video/BV1BvduYKE75/ 63 | 64 | 65 | # 3、项目初始化 66 | ## 3.1 下载源码 67 | GitHub或Gitee中下载工程文件到本地,下载地址如下: 68 | https://github.com/NanGePlus/KagTest 69 | https://gitee.com/NanGePlus/KagTest 70 | 71 | ## 3.2 构建项目 72 | 使用pycharm构建一个项目,为项目配置虚拟python环境 73 | 项目名称:KagTest 74 | 虚拟环境名称保持与项目名称一致 75 | 76 | ## 3.3 将相关代码拷贝到项目工程中 77 | 将下载的代码文件夹中的文件全部拷贝到新建的项目根目录下 78 | 79 | 80 | # 4、功能测试 81 | ## 4.1 产品模式测试 82 | ## (1) 部署OpenSPG-Server 83 | 首先,使用docker部署和启动OpenSPG-Server,运行的指令为: 84 | docker compose -f docker-compose.yml up -d 85 | 对于docker的使用,这里不做详细的赘述了,大家可以去看我这期视频,里面有对于docker非常详细的讲解,从安装部署到使用 86 | https://www.bilibili.com/video/BV1LhUAYFEku/?vd_source=30acb5331e4f5739ebbad50f7cc6b949 87 | https://youtu.be/hD09V7jaXSo 88 | ## (2) 产品访问 89 | 浏览器输入 http://127.0.0.1:8887, 可访问openspg-kag 产品界面 90 | 浏览器输入 http://127.0.0.1:7474/browser/ , 可访问neo4j图数据库,用户名和密码分为neo4j neo4j@openspg 91 | ## (3) 功能测试 92 | **图存储配置:** 93 | {"database":"test","password":"neo4j@openspg","uri":"neo4j://release-openspg-neo4j:7687","user":"neo4j"} 94 | **模型配置:** 95 | {"api_key":"sk-zL8dD8hTwv0d5GRlYC0eUPH8QvWxnXIR6XTWsx7WKzoSO1uo","base_url":"https://yunwu.ai/v1","model":"gpt-4o-mini","client_type":"maas"} 96 | **向量配置:** 97 | {"vectorizer":"kag.common.vectorizer.OpenAIVectorizer","api_key":"sk-zL8dD8hTwv0d5GRlYC0eUPH8QvWxnXIR6XTWsx7WKzoSO1uo","vector_dimensions":"1536","base_url":"https://yunwu.ai/v1","model":"text-embedding-ada-002"} 98 | **提示词中英文配置:** 99 | {"biz_scene":"default","language":"zh"} 100 | 101 | ## 4.2 开发者模式测试-默认配置参数 102 | ### (1)安装依赖 103 | 新建命令行终端,按照如下指令进行依赖安装 104 | cd KAG 105 | pip install -e . 106 | 安装完成之后可以运行如下指令验证是否安装成功 107 | knext --version 108 | ### (2)调整配置文件 109 | 先修改项目配置文件example.cfg,根据自己的实际情况,设置embedding、LLM配置参数 110 | ### (3)使用配置文件初始化项目 111 | 新建命令行终端,运行如下命令进行项目创建和初始化 112 | knext project create --config_path ./example.cfg 113 | 项目初始化完成后,进入到对应的文件夹下,根据实际业务需求调整schema,调整完成后再执行提交schema 114 | knext schema commit 115 | ### (4)脚本测试 116 | 相关代码参考根目录下Demo文件夹 117 | ### (4-1)准备测试文档 118 | 将测试文档拷贝到新建项目文件夹中的builder/data下,支持txt、pdf、markdown等 119 | ### (4-2)构建索引 120 | 打开命令行终端,进入脚本所在目录,运行 python indexer.py 命令 121 | ### (4-3)检索 122 | 打开命令行终端,进入脚本所在目录,运行 python query.py 命令 123 | 124 | ## 4.3 开发者模式测试-自定义schema、构建索引(自定义prompt)、检索(自定义prompt),支持GPT大模型、国产大模型(阿里通义千问)、本地开源大模型(Ollama) 125 | ## 4.3.1 前置工作 126 | ### (1) 部署OpenSPG-Server 127 | 部署方式两种:docker服务或源码部署,这里使用docker部署和启动OpenSPG-Server,运行的指令为: 128 | docker compose -f docker-compose.yml up -d 129 | 对于docker的使用,这里不做详细的赘述了,大家可以去看我这期视频,里面有对于docker非常详细的讲解,从安装部署到使用 130 | https://www.bilibili.com/video/BV1LhUAYFEku/?vd_source=30acb5331e4f5739ebbad50f7cc6b949 131 | https://youtu.be/hD09V7jaXSo 132 | ### (2)安装依赖 133 | 下载KAG源码 https://github.com/OpenSPG/KAG 解压后将源码工程拷贝到项目根目录,截止2025-01-07,最新版本是v0.5.1 134 | 新建命令行终端,按照如下指令进行依赖安装 135 | cd KAG 136 | pip install -e . 137 | 安装完成之后可以运行如下指令验证是否安装成功 138 | knext --version 139 | ## 4.3.2 测试案例 140 | ### (1)调整配置文件 141 | 将根目录下的other/config目录下的example.cfg文件拷贝一份到根目录,根据自己的业务修改配置参数,namespace、embedding、LLM等配置参数 142 | ### (2)使用配置文件初始化项目 143 | 新建命令行终端,运行如下命令进行项目创建和初始化 144 | knext project create --config_path ./example.cfg 145 | ### (3)提交schema 146 | 项目初始化完成后,进入到对应的项目文件夹下,根据实际业务需求调整schema,调整完成后再执行提交schema 147 | knext schema commit 148 | ### (4)构建索引 149 | 首先将文档拷贝到新建项目文件夹中的builder/data下,支持txt、pdf、markdown、docx、json、csv等 150 | 并可以根据自身业务需求,设置相关prompt内容:ner.py、std.py、triple.py 151 | 打开命令行终端,进入脚本所在目录builder,运行 python indexer.py 命令 152 | 索引构建成功后,可登录到 http://127.0.0.1:8887/或 http://127.0.0.1:7474/browser/ 查看知识图谱 153 | 图数据库账号密码:neo4j neo4j@openspg 154 | ### (5)检索 155 | 打开命令行终端,进入脚本所在目录solver,运行 python query.py 命令 156 | 根据自身业务需求,可设置相关prompt内容:logic_form_plan.py、question_ner.py、resp_generator.py 157 | 也可以在产品端进行测试 http://127.0.0.1:8887/ 158 | 159 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | server: 4 | restart: always 5 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-server:latest 6 | container_name: release-openspg-server 7 | ports: 8 | - "8887:8887" 9 | depends_on: 10 | - mysql 11 | - neo4j 12 | volumes: 13 | - /etc/localtime:/etc/localtime:ro 14 | environment: 15 | TZ: Asia/Shanghai 16 | LANG: C.UTF-8 17 | command: [ 18 | "java", 19 | "-Dfile.encoding=UTF-8", 20 | "-Xms2048m", 21 | "-Xmx8192m", 22 | "-jar", 23 | "arks-sofaboot-0.0.1-SNAPSHOT-executable.jar", 24 | '--server.repository.impl.jdbc.host=mysql', 25 | '--server.repository.impl.jdbc.password=openspg', 26 | '--builder.model.execute.num=5', 27 | '--cloudext.graphstore.url=neo4j://release-openspg-neo4j:7687?user=neo4j&password=neo4j@openspg&database=neo4j', 28 | '--cloudext.searchengine.url=neo4j://release-openspg-neo4j:7687?user=neo4j&password=neo4j@openspg&database=neo4j' 29 | ] 30 | 31 | mysql: 32 | restart: always 33 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-mysql:latest 34 | container_name: release-openspg-mysql 35 | volumes: 36 | - /etc/localtime:/etc/localtime:ro 37 | environment: 38 | TZ: Asia/Shanghai 39 | LANG: C.UTF-8 40 | MYSQL_ROOT_PASSWORD: openspg 41 | MYSQL_DATABASE: openspg 42 | ports: 43 | - "3306:3306" 44 | command: [ 45 | '--character-set-server=utf8mb4', 46 | '--collation-server=utf8mb4_general_ci' 47 | ] 48 | 49 | neo4j: 50 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-neo4j:latest 51 | container_name: release-openspg-neo4j 52 | ports: 53 | - "7474:7474" 54 | - "7687:7687" 55 | environment: 56 | - TZ=Asia/Shanghai 57 | - NEO4J_AUTH=neo4j/neo4j@openspg 58 | - NEO4J_PLUGINS=["apoc"] 59 | - NEO4J_server_memory_heap_initial__size=1G 60 | - NEO4J_server_memory_heap_max__size=4G 61 | - NEO4J_server_memory_pagecache_size=1G 62 | - NEO4J_apoc_export_file_enabled=true 63 | - NEO4J_apoc_import_file_enabled=true 64 | - NEO4J_dbms_security_procedures_unrestricted=* 65 | - NEO4J_dbms_security_procedures_allowlist=* 66 | volumes: 67 | - /etc/localtime:/etc/localtime:ro 68 | - $HOME/dozerdb/logs:/logs 69 | -------------------------------------------------------------------------------- /example.cfg: -------------------------------------------------------------------------------- 1 | [project] 2 | namespace = KagDemo 3 | host_addr = http://localhost:8887 4 | 5 | # vectorizer loaded by OpenAI 6 | [vectorizer] 7 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 8 | model = text-embedding-ada-002 9 | api_key = sk-zL8dD8hTwv0d5GRlYC0eUPH8QvWxnXIR6XTWsx7WKzoSO1uo 10 | base_url = https://yunwu.ai/v1 11 | vector_dimensions = 1536 12 | 13 | [llm] 14 | client_type = maas 15 | base_url = https://yunwu.ai/v1 16 | api_key = sk-zL8dD8hTwv0d5GRlYC0eUPH8QvWxnXIR6XTWsx7WKzoSO1uo 17 | model = gpt-4o-mini 18 | 19 | [prompt] 20 | biz_scene = default 21 | language = zh 22 | 23 | [log] 24 | level = INFO -------------------------------------------------------------------------------- /other/config/example.cfg: -------------------------------------------------------------------------------- 1 | [project] 2 | namespace = KagDemo 3 | host_addr = http://localhost:8887 4 | 5 | ; GPT大模型,参数替换为自己的 6 | [vectorizer] 7 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 8 | model = text-embedding-3-small 9 | api_key = sk-DK45tcPWxP9azYfMTXAltpR3vTxdZcG205zdLndwK9CoAUe0 10 | base_url = https://yunwu.ai/v1 11 | vector_dimensions = 1536 12 | 13 | [llm] 14 | client_type = maas 15 | base_url = https://yunwu.ai/v1 16 | api_key = sk-DK45tcPWxP9azYfMTXAltpR3vTxdZcG205zdLndwK9CoAUe0 17 | model = gpt-4o-mini 18 | 19 | ; 国产大模型(OneAPI方案),参数替换为自己的 20 | [vectorizer] 21 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 22 | model = text-embedding-v1 23 | api_key = sk-UFkSKbkCGrSQB33IAa2037E89bF34572B21d785e31Ab3194 24 | base_url = http://139.224.72.218:3000/v1 25 | vector_dimensions = 1536 26 | 27 | [llm] 28 | client_type = maas 29 | base_url = http://139.224.72.218:3000/v1 30 | api_key = sk-UFkSKbkCGrSQB33IAa2037E89bF34572B21d785e31Ab3194 31 | model = qwen-plus 32 | 33 | ; 本地开源大模型(Ollama方案),参数替换为自己的 34 | [vectorizer] 35 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 36 | model = nomic-embed-text 37 | api_key = EMPTY 38 | base_url = http://IP:11434/v1 39 | vector_dimensions = 1536 40 | 41 | [llm] 42 | client_type = ollama 43 | base_url = http://IP:11434/v1 44 | api_key = EMPTY 45 | model = qwen2.5:14b 46 | 47 | ; 阿里通义千问大模型(官方),参数替换为自己的 48 | [vectorizer] 49 | vectorizer = kag.common.vectorizer.OpenAIVectorizer 50 | model = text-embedding-v1 51 | api_key = sk-045b1244d099e90 52 | base_url = https://dashscope.aliyuncs.com/compatible-mode/v1 53 | vector_dimensions = 1536 54 | 55 | [llm] 56 | client_type = maas 57 | base_url = https://dashscope.aliyuncs.com/compatible-mode/v1 58 | api_key = sk-045b124d099e90 59 | model = qwen-plus 60 | 61 | [prompt] 62 | biz_scene = default 63 | language = zh 64 | 65 | [log] 66 | level = INFO 67 | -------------------------------------------------------------------------------- /other/docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | server: 4 | restart: always 5 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-server:latest 6 | container_name: release-openspg-server 7 | ports: 8 | - "8887:8887" 9 | depends_on: 10 | - mysql 11 | - neo4j 12 | volumes: 13 | - /etc/localtime:/etc/localtime:ro 14 | environment: 15 | TZ: Asia/Shanghai 16 | LANG: C.UTF-8 17 | command: [ 18 | "java", 19 | "-Dfile.encoding=UTF-8", 20 | "-Xms2048m", 21 | "-Xmx8192m", 22 | "-jar", 23 | "arks-sofaboot-0.0.1-SNAPSHOT-executable.jar", 24 | '--server.repository.impl.jdbc.host=mysql', 25 | '--server.repository.impl.jdbc.password=openspg', 26 | '--builder.model.execute.num=5', 27 | '--cloudext.graphstore.url=neo4j://release-openspg-neo4j:7687?user=neo4j&password=neo4j@openspg&database=neo4j', 28 | '--cloudext.searchengine.url=neo4j://release-openspg-neo4j:7687?user=neo4j&password=neo4j@openspg&database=neo4j' 29 | ] 30 | 31 | mysql: 32 | restart: always 33 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-mysql:latest 34 | container_name: release-openspg-mysql 35 | volumes: 36 | - /etc/localtime:/etc/localtime:ro 37 | environment: 38 | TZ: Asia/Shanghai 39 | LANG: C.UTF-8 40 | MYSQL_ROOT_PASSWORD: openspg 41 | MYSQL_DATABASE: openspg 42 | ports: 43 | - "3306:3306" 44 | command: [ 45 | '--character-set-server=utf8mb4', 46 | '--collation-server=utf8mb4_general_ci' 47 | ] 48 | 49 | neo4j: 50 | image: spg-registry.cn-hangzhou.cr.aliyuncs.com/spg/openspg-neo4j:latest 51 | container_name: release-openspg-neo4j 52 | ports: 53 | - "7474:7474" 54 | - "7687:7687" 55 | environment: 56 | - TZ=Asia/Shanghai 57 | - NEO4J_AUTH=neo4j/neo4j@openspg 58 | - NEO4J_PLUGINS=["apoc"] 59 | - NEO4J_server_memory_heap_initial__size=1G 60 | - NEO4J_server_memory_heap_max__size=4G 61 | - NEO4J_server_memory_pagecache_size=1G 62 | - NEO4J_apoc_export_file_enabled=true 63 | - NEO4J_apoc_import_file_enabled=true 64 | - NEO4J_dbms_security_procedures_unrestricted=* 65 | - NEO4J_dbms_security_procedures_allowlist=* 66 | volumes: 67 | - /etc/localtime:/etc/localtime:ro 68 | - $HOME/dozerdb/logs:/logs 69 | --------------------------------------------------------------------------------