├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── rag_with_chat.iml └── vcs.xml ├── Dockerfile ├── LICENSE ├── README.md ├── benchmark ├── bench_data.json ├── benchmark.py └── model_serve.py ├── chatgpt_proxy.py ├── config.py ├── data ├── car_user_manual.pdf ├── gold_result.json └── test_question.json ├── example_test.py ├── generate_answer.py ├── images ├── .DS_Store ├── image_RiYKWHwtQa.png ├── image_fChhMjnifo.png └── image_tL0rUhQiZB.png ├── models ├── Baichuan2-7B-Chat │ └── README.md ├── Qwen2-7B-Instruct │ └── README.md └── chatglm3-6b │ └── README.md ├── pdf_parse.py ├── pre_train_model ├── bce-reranker-base_v1 │ └── README.md ├── bge-m3 │ └── README.md ├── bge-reranker-large │ └── README.md ├── m3e-large │ └── README.md └── text2vec-base-chinese │ └── README.md ├── requirements.txt ├── rerank_model.py ├── retriever ├── bge_retriever.py ├── bm25_retriever.py ├── m3e_retriever.py └── tfidf_retriever.py ├── run.py ├── test_score.py └── vllm_model.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # PyPI configuration file 171 | .pypirc 172 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/rag_with_chat.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用官方 Python 3.9 镜像作为基础镜像 2 | FROM python:3.9 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | # 复制当前目录下的所有文件到工作目录 8 | COPY . /app 9 | 10 | # 安装依赖 11 | RUN pip install -r requirements.txt 12 | 13 | # 容器启动时运行的命令 14 | CMD ["python", "./run.py"] 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 zhangzg1 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 基于RAG的汽车知识问答系统 2 | 3 | ## 1、介绍 4 | 5 | 本项目属于大模型 RAG 任务,使用现有的车主手册构建知识库,然后选择知识库中的相关知识用于辅助大模型生成。整个方案的构建流程主要分为三大部分:构建知识库、知识检索、答案生成。该项目主要结合了 LLM、Langchain、提示工程、优化知识库结构和检索生成流程、vllm 推理优化框架等技术。 6 | 7 | ## 2、下载源码与环境安装(Linux) 8 | 9 | ``` 10 | # 下载源码 11 | git clone https://github.com/zhangzg1/rag_with_chat.git 12 | cd rag_with_chat 13 | 14 | # 创建虚拟环境 15 | conda create -n rag_with_chat python=3.9 16 | conda activate rag_with_chat 17 | 18 | # 安装其他依赖包 19 | pip install -r requirements.txt 20 | 21 | # 选择docker容器化部署(需要先将模型下载到本地) 22 | docker build -t rag_with_chat:latest . 23 | ``` 24 | 25 | ## 3、代码结构 26 | 27 | ```text 28 | . 29 | ├── benchmark 30 | └── bench_data.json # 基准测试数据 31 | └── benchmark.py # 基座测试 32 | └── model_server.py # 模型服务 33 | ├── data 34 | └── gold_result.jsonn # 标准答案数据集 35 | └── test_question.json # 测试数据集 36 | └── car_user_manual.pdf # 汽车用户手册文件 37 | ├── images 38 | ├── models # 基座大语言模型 39 | └── Baichuan2-7B-Chat 40 | └── chatglm3-6b 41 | └── Qwen2-7B-Instruct 42 | ├── pre_train_model 43 | └── bce-reranker-base_v1 # bce重排序模型 44 | └── bge-reranker-large # bge重排序模型 45 | └── bge-m3 # bge文本嵌入模型 46 | └── m3e-large # m3e文本嵌入模型 47 | └── text2vec-base-chinese # 相似度模型 48 | ├── retriever 49 | └── bge_retriever.py # bge召回 50 | └── bm25_retriever.py # bm25召回 51 | └── m3e_retriever.py # m3e召回 52 | └── tfidf_retriever.py # tf-idf召回 53 | ├── .env # API 密钥 54 | ├── config.py # 配置文件 55 | ├── pdf_parse.py # pdf文档解析器 56 | ├── rerank_model.py # 重排序逻辑 57 | ├── generate_answer.py # rag流程 58 | ├── chatgpt_proxy.py # chatgpt代理 59 | ├── vllm_model.py # vllm大模型加速 60 | ├── test_score.py # 测试集得分计算 61 | ├── example_test.py # 测试样例 62 | ├── run.py # 主函数 63 | ├── requirements.txt # 第三方依赖库 64 | ├── README.md # 说明文档 65 | ``` 66 | 67 | ## 4、代码运行 68 | 69 | ``` 70 | # 运行RAG系统,并测试问答数据集 71 | python run.py 72 | 73 | # 样例测试,基于RAG的知识问答 74 | python example_test.py 75 | 76 | # 对RAG系统进行基准测试 77 | python model_server # 启动大模型API服务 78 | python benchmark # 大模型压力测试 79 | ``` 80 | 81 | ## 5、项目概述 82 | 83 | ### 5.1 基于大模型的文档检索问答 84 | 85 | 该项目主要以大模型为中心制作一个问答系统,回答用户的汽车相关问题。需要根据问题,在文档中定位相关信息的位置,并根据文档内容通过大模型生成相应的答案。本项目涉及的问题主要围绕汽车使用、维修、保养等方面,具体可参考下面的例子: 86 | 87 | ```text 88 | 问题1:怎么打开危险警告灯? 89 | 答案1:危险警告灯开关在方向盘下方,按下开关即可打开危险警告灯。 90 | 91 | 问题2:车辆如何保养? 92 | 答案2:为了保持车辆处于最佳状态,建议您定期关注车辆状态,包括定期保养、洗车、内部清洁、外部清洁、轮胎的保养、低压蓄电池的保养等。 93 | 94 | 问题3:靠背太热怎么办? 95 | 答案3:您好,如果您的座椅靠背太热,可以尝试关闭座椅加热功能。在多媒体显示屏上依次点击空调开启按键→座椅→加热,在该界面下可以关闭座椅加热。 96 | ``` 97 | 98 | ### 5.2 数据集 99 | 100 | 这里的训练数据集主要是一本汽车的用户手册( pdf 文件): 101 | 102 | ![image](https://github.com/zhangzg1/rag_with_chat/blob/main/images/image_fChhMjnifo.png) 103 | 104 | 测试集问题示例: 105 | 106 | ```json 107 | { 108 | "question": "自动模式下,中央显示屏是如何切换日间和夜间模式的?", 109 | "answer_1": "", 110 | "answer_2": "", 111 | "answer_3": "" 112 | }, 113 | { 114 | "question": "如何通过中央显示屏进行副驾驶员座椅设置?", 115 | "answer_1": "", 116 | "answer_2": "", 117 | "answer_3": "" 118 | } 119 | ``` 120 | 121 | ## 6、项目流程 122 | 123 | ### 6.1 pdf 解析 124 | 125 | ![image](https://github.com/zhangzg1/rag_with_chat/blob/main/images/image_RiYKWHwtQa.png) 126 | 127 | 对于 pdf 文件中这里类似的文本内容,该项目最终采用了三种解析方案的综合(具体代码[pdf_parse.py](https://github.com/zhangzg1/rag_with_chat/blob/main/pdf_parse.py)): 128 | 129 | - pdf 分块解析,尽量保证一个小标题 + 对应文档在一个文档块,其中文档块的长度分别是 512 和 1024。 130 | 131 | - pdf 滑窗法解析,把文档句号分割,然后构建滑动窗口,其中文档块的长度分别是 256 和 512。 132 | 133 | - pdf 非滑窗法解析,把文档句号分割,然后按照文档块预设尺寸均匀切分,其中文档块的长度分别是 256 和 512。 134 | 135 | 按照这个三种解析方案对数据处理之后,然后对文档块做了一个去重,最后把这些文档块输入给召回模块。使用三种解析方法的综合,可以保证文本内容的完整性和跨页连续性。 136 | 137 | ### 6.2 召回 138 | 139 | 召回主要使用 langchain 中的 retrievers 进行文本的召回。我们知道深度语义召回,侧重泛化性,字面召回,侧重关键词/实体的字面相关性,这两个召回方法也比较有代表性,因此选用了这两个召回方法。(具体代码[retriever](https://github.com/zhangzg1/rag_with_chat/tree/main/retriever)) 140 | 141 | 1. 深度语义召回:这里我们使用了 m3e 召回和 bge 召回两种方法,m3e和bge都是文本嵌入模型,所以我们使用这两种模型分别将处理后的 pdf 文件转换成向量,最后都使用 faiss 向量数据库进行存储。 142 | 2. 字面召回:这里我们使用了 BM25 召回和 TF-IDF 召回两种方法,它们通常用于计算两个文本,或者文本与文档之间的相关性。所以可以用于文本相似度计算和文本检索等应用场景。BM25 召回利用 LangChain 的 BM25Retrievers,TF-IDF 召回利用 LangChain 的TFIDFRetriever。 143 | 144 | ### 6.3 重排序 145 | 146 | Reranker 是信息检索生态系统中的一个重要组成部分,用于评估搜索结果,并进行重新排序,从而提升查询结果相关性。在 RAG 应用中,主要在拿到召回结果后使用 Reranker,能够更有效地确定文档和查询之间的语义相关性,更精细地对结果重排,最终提高搜索质量。将 Reranker 整合到 RAG 应用中可以显著提高生成答案的精确度,因为 Reranker 能够在单路或多路的召回结果中挑选出和问题最接近的文档。此外,扩大检索结果的丰富度(例如多路召回)配合精细化筛选最相关结果(Reranker)还能进一步提升最终结果质量。使用 Reranker 可以排除掉第一层召回中和问题关系不大的内容,将输入给大模型的上下文范围进一步缩小到最相关的一小部分文档中。通过缩短上下文, LLM 能够更“关注”上下文中的所有内容,避免忽略重点内容,还能节省推理成本。 147 | 148 | ![image](https://github.com/zhangzg1/rag_with_chat/blob/main/images/image_tL0rUhQiZB.png) 149 | 150 | 上图为增加了 Reranker 的 RAG 应用架构。可以看出,这个检索系统包含两个阶段: 151 | 152 | 1. 在向量数据库中检索出 Top-K 相关文档,同时也可以配合 Sparse embedding(稀疏向量模型,例如TF-DF)覆盖全文检索能力 153 | 2. Reranker 根据这些检索出来的文档与查询的相关性进行打分和重排。重排后挑选最靠前的结果作为 Prompt 中的 Context 传入 LLM,最终生成质量更高、相关性更强的答案。 154 | 155 | 在该项目中。我们分别使用了 bge-reranker 和 bce-reranker-base_v1 模型对检索召回的文档进行重排。(具体代码[rerank_model.py](https://github.com/zhangzg1/rag_with_chat/blob/main/rerank_model.py)) 156 | 157 | ### 6.4 vllm 推理优化 158 | 159 | vLLM 是一个基于 Python 的 LLM 推理和服务框架,它的主要优势在于简单易用和性能高效。通过 PagedAttention 技术、连续批处理、CUDA 核心优化以及分布式推理支持,vLLM 能够显著提高 LLM 的推理速度,降低显存占用,更好地满足实际应用需求。vLLM 推理框架使大模型推理速度得到明显提升,推理速度比普通推理有 1 倍的加速。在产品级的部署上,vLLM 既能满足 batch 推理的要求,又能实现高并发下的 continuous batching,在实际产品部署中应用是非常广泛的。 160 | 161 | 在这个项目中,LLM 分别采用 ChatGLM3-6B,Qwen2-7B-Chat 和 Baichuan2-7B-Chat 作为大模型基座,并且都使用了vllm框架来进行加速推理优化。(具体代码[vllm_model.py](https://github.com/zhangzg1/rag_with_chat/blob/main/vllm_model.py)) 162 | -------------------------------------------------------------------------------- /benchmark/benchmark.py: -------------------------------------------------------------------------------- 1 | import aiohttp 2 | import asyncio 3 | import json 4 | import logging 5 | import time 6 | from typing import List, Tuple 7 | import numpy as np 8 | 9 | # 日志配置 10 | logger = logging.getLogger(__name__) 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | # 存储请求的延迟信息 14 | REQUEST_LATENCY: List[Tuple[int, int, float]] = [] 15 | 16 | # API配置 17 | API_URL = 'http://127.0.0.1:8000/qwen' 18 | HEADERS = {'Content-Type': 'application/json'} 19 | 20 | 21 | # 发送HTTP POST请求到指定的API,并处理响应结果 22 | async def send_request(session, payload, prompt_len): 23 | try: 24 | request_start_time = time.time() 25 | async with session.post(API_URL, data=payload, headers=HEADERS) as response: 26 | if response.status == 200: 27 | result = await response.json() 28 | content = result.get('content', '') 29 | completion_tokens = len(content) 30 | request_end_time = time.time() 31 | request_latency = request_end_time - request_start_time 32 | REQUEST_LATENCY.append((prompt_len, completion_tokens, request_latency)) 33 | return result 34 | else: 35 | error_msg = await response.text() 36 | logger.error(f"Error {response.status}: {error_msg}") 37 | return {'error': True, 'message': error_msg} 38 | except Exception as e: 39 | logger.error(f"Request failed: {str(e)}") 40 | return {'error': True, 'message': str(e)} 41 | 42 | 43 | # 基准测试运行器 44 | class BenchMarkRunner: 45 | def __init__(self, requests: List[Tuple[str, int, int]], concurrency: int): 46 | self.concurrency = concurrency 47 | self.requests = requests 48 | self.request_left = len(requests) 49 | self.request_queue = asyncio.Queue() 50 | 51 | # 启动基准测试 52 | async def run(self): 53 | tasks = [] 54 | for i in range(self.concurrency): 55 | task = asyncio.create_task(self.worker()) 56 | tasks.append(task) 57 | 58 | for req in self.requests: 59 | await self.request_queue.put(req) 60 | 61 | await asyncio.gather(*tasks) 62 | 63 | # 处理队列中的请求 64 | async def worker(self): 65 | timeout = aiohttp.ClientTimeout(total=2 * 60) # 增加超时时间 66 | async with aiohttp.ClientSession(timeout=timeout) as session: 67 | while self.request_left > 0: 68 | try: 69 | prompt = await self.request_queue.get() 70 | message = [{"role": "user", "content": prompt}] 71 | payload = json.dumps({"message": message}) 72 | 73 | response = await send_request(session, payload, len(prompt)) 74 | if 'error' in response: 75 | logger.error(f"Request failed: {response['message']}") 76 | else: 77 | logger.info(f"Response {len(self.requests) - self.request_left}") 78 | 79 | self.request_left -= 1 80 | except Exception as e: 81 | logger.error(f"Worker error: {str(e)}") 82 | break 83 | 84 | 85 | # 主函数 86 | def main(): 87 | # 并发任务数量 88 | concurrency = 10 89 | logger.info("Preparing for benchmark.") 90 | 91 | # 加载测试数据 92 | with open("bench_data.json", "r") as f: 93 | test_set = json.load(f) 94 | input_requests = list(test_set.values()) 95 | 96 | logger.info("Benchmark starts.") 97 | benchmark_start_time = time.time() 98 | asyncio.run(BenchMarkRunner(input_requests, concurrency).run()) 99 | benchmark_end_time = time.time() 100 | benchmark_time = benchmark_end_time - benchmark_start_time 101 | 102 | # 计算并打印基准测试时间,和吞吐量(请求/秒):请求数量除以总时间 103 | print(f"Total time: {benchmark_time:.4f} s") 104 | print(f"Throughput: {len(REQUEST_LATENCY) / benchmark_time:.2f} requests/s") 105 | 106 | # 计算并打印所有延迟的平均值 107 | avg_latency = np.mean([latency for _, _, latency in REQUEST_LATENCY]) 108 | print(f"Average latency: {avg_latency:.4f} s") 109 | 110 | # 计算并打印每个token的平均延迟:延迟除以提示长度和输出长度之和 111 | avg_per_token_latency = np.mean( 112 | [latency / (prompt_len + output_len) for prompt_len, output_len, latency in REQUEST_LATENCY] 113 | ) 114 | print(f"Average latency per token: {avg_per_token_latency:.4f} s") 115 | 116 | # 计算并打印每输出令牌平均延迟:延迟除以输出长度 117 | avg_per_output_token_latency = np.mean( 118 | [latency / output_len for _, output_len, latency in REQUEST_LATENCY] 119 | ) 120 | print(f"Average latency per output token: {avg_per_output_token_latency:.4f} s") 121 | 122 | # 计算并打印token吞吐量(token/s):总输出长度除以基准测试时间 123 | throughput = sum([output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time 124 | print(f"Throughput: {throughput} tokens/s") 125 | 126 | 127 | if __name__ == '__main__': 128 | main() 129 | -------------------------------------------------------------------------------- /benchmark/model_serve.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | from collections import defaultdict 3 | from fastapi import FastAPI 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from pydantic import BaseModel 6 | from vllm import LLM, SamplingParams 7 | 8 | app = FastAPI() 9 | app.add_middleware( 10 | CORSMiddleware, 11 | allow_origins=["*"], 12 | allow_credentials=True, 13 | allow_methods=["*"], 14 | allow_headers=["*"], 15 | ) 16 | 17 | 18 | # 定义请求体的数据结构 19 | class ChatRequest(BaseModel): 20 | message: List[Dict[str, str]] 21 | 22 | 23 | # 全局变量 24 | model_path = "../models/Qwen2-7B-Instruct" 25 | llm = None 26 | sampling_params = SamplingParams(temperature=0, max_tokens=64, top_k=1) 27 | 28 | 29 | # 在服务启动时加载模型 30 | @app.on_event("startup") 31 | async def load_model(): 32 | global llm, model_path 33 | llm = LLM(model=model_path, tensor_parallel_size=1) 34 | 35 | 36 | # 处理聊天请求 37 | @app.post("/qwen") 38 | async def qwen(chat_request: ChatRequest): 39 | message = chat_request.message 40 | result = defaultdict(str) 41 | 42 | # 使用模型生成回复 43 | prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" 44 | outputs = llm.generate([prompt], sampling_params) 45 | response = outputs[0].outputs[0].text 46 | 47 | result["role"] = "assistant" 48 | result["content"] = response 49 | return result 50 | 51 | 52 | # 启动服务 53 | if __name__ == "__main__": 54 | import uvicorn 55 | 56 | uvicorn.run("model_serve:app", host="127.0.0.1", port=8000, reload=True) 57 | -------------------------------------------------------------------------------- /chatgpt_proxy.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from concurrent.futures import ThreadPoolExecutor 3 | from dotenv import load_dotenv 4 | import os 5 | 6 | load_dotenv() 7 | 8 | api_key = os.getenv('chatgpt_api_key') 9 | 10 | 11 | class ChatGPTProxy(): 12 | def __init__(self, model="gpt-3.5-turbo", temperature=0.1): 13 | self.api_key = api_key 14 | self.model = model 15 | self.temperature = temperature 16 | 17 | def get_response(self, prompt): 18 | url = "https://api.openai.com/v1/chat/completions" 19 | headers = { 20 | "Authorization": f"Bearer {api_key}", 21 | "Content-Type": "application/json" 22 | } 23 | payload = { 24 | "model": self.model, 25 | "messages": [{"role": "user", "content": prompt}], 26 | "temperature": self.temperature 27 | } 28 | try: 29 | response = requests.post(url, json=payload, headers=headers) 30 | return response.json()['choices'][0]['message']['content'] 31 | except Exception as e: 32 | return f"请求失败: {str(e)}" 33 | 34 | def infer(self, prompts): 35 | # 使用线程池并行处理 36 | with ThreadPoolExecutor() as executor: 37 | results = list(executor.map(self.get_response, prompts)) 38 | return results 39 | 40 | 41 | if __name__ == "__main__": 42 | prompts = [ 43 | "你好", 44 | "你会干什么", 45 | "推荐5本人工智能入门书籍" 46 | ] 47 | llm = ChatGPTProxy() 48 | results = llm.infer(prompts) 49 | print(results) 50 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | 4 | # 加载文本嵌入模型所使用的设备 5 | EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" 6 | # 加载大模型所使用的设备 7 | LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" 8 | # 设备上GPU的数量 9 | num_gpus = torch.cuda.device_count() 10 | 11 | # LLM模型路径 12 | Qwen2_path = './models/Qwen2-7B-Instruct' 13 | Baichuan_path = './models/Baichuan2-7B-Chat' 14 | ChatGLM_path = './models/chatglm3-6b' 15 | 16 | # 召回模型路径 17 | M3E_embeddings_model_path = "./pre_train_model/m3e-large" 18 | BGE_embeddings_model_path = "./pre_train_model/bge-m3" 19 | 20 | # 重排模型路径 21 | BGE_reranker_model = "./pre_train_model/bge-reranker-large" 22 | BCE_reranker_model = "./pre_train_model/bce-reranker-base_v1" 23 | 24 | # 相似度模型 25 | SimModel_path = './pre_train_model/text2vec-base-chinese' 26 | -------------------------------------------------------------------------------- /data/car_user_manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangzg1/rag_with_chat/9f3632bbf355244ea4e52722edda080cd10d7c48/data/car_user_manual.pdf -------------------------------------------------------------------------------- /data/gold_result.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "question": "中国足球的队长是谁", 4 | "answer": "无答案", 5 | "keywords": [] 6 | }, 7 | { 8 | "question": "新冠肺炎如何预防?", 9 | "answer": "无答案", 10 | "keywords": [] 11 | }, 12 | { 13 | "question": "交通事故如何处理?", 14 | "answer": " 在发生交通事故时,首先确保自身安全,离开车辆到达安全区域,然后报警或寻求帮助。如果安全气囊触发或安全带触发,会进入安全模式,车门会自动解锁以协助乘员逃生和救援。此时,不要尝试重新启动车辆或自行修理,应使用平板拖车将车辆运至Lynk&Co领克中心进行检查和修理。如果车辆保险公司已提供救援服务,领克中心将不再重复提供。", 15 | "keywords": [ 16 | "平板拖车", 17 | "安全气囊", 18 | "领克中心", 19 | "安全带", 20 | "安全模式" 21 | ] 22 | }, 23 | { 24 | "question": "怎样加热座椅?", 25 | "answer": "可以通过中央显示屏来调节前排座椅加热。首先点击-座舱体验-座椅进入座椅设置界面,选择副驾驶员座椅后,点击座椅加热功能按键。接着,点击指示箭头调节加热强度,有“关-低-中-高”四个选项。也可以通过Lynk&CoApp来打开或关闭前排座椅加热。", 26 | "keywords": [ 27 | "加热强度", 28 | "座椅加热", 29 | "座舱体验", 30 | "Lynk&CoApp", 31 | "中央显示屏" 32 | ] 33 | }, 34 | { 35 | "question": "自动模式下,中央显示屏是如何切换日间和夜间模式的?", 36 | "answer": "在自动模式下,中央显示屏会根据光线条件自动切换日间模式和夜间模式。如果选择了\"日出到日落\"模式,它会在白天显示日间模式,晚上显示夜间模式。如果选择了\"自定时段\",则会依据用户设置的具体时间段来切换显示模式。", 37 | "keywords": [ 38 | "夜间模式", 39 | "中央显示屏", 40 | "自定时段", 41 | "日间模式", 42 | "日出到日落" 43 | ] 44 | }, 45 | { 46 | "question": "如何通过中央显示屏进行副驾驶员座椅设置?", 47 | "answer": "在中央显示屏中点击-座舱体验-座椅,进入座椅设置界面。选择副驾驶员座椅后,点击需要调节的座椅功能按键,如靠背、座椅位置等。然后点击指示箭头来调节所选功能。", 48 | "keywords": [ 49 | "靠背", 50 | "座椅设置界面", 51 | "副驾驶员座椅", 52 | "中央显示屏", 53 | "座椅位置" 54 | ] 55 | }, 56 | { 57 | "question": "副仪表台按钮如何操作中央显示屏?", 58 | "answer": "副仪表台的旋转按钮可以用来增加或减小音量,按压按钮可以短按暂停或播放音乐/视频,长按约5秒进入锁屏/待机模式。在锁屏模式下,可以通过点击中央显示屏上的任意位置或长按中控台按钮来重新解锁屏幕。", 59 | "keywords": [ 60 | "中控台按钮", 61 | "中央显示屏", 62 | "音量", 63 | "旋转按钮", 64 | "锁屏模式" 65 | ] 66 | }, 67 | { 68 | "question": "如何从锁定状态唤醒中央显示器?", 69 | "answer": "您可以通过语音助手唤醒中央显示器。", 70 | "keywords": [ 71 | "中央显示器", 72 | "语音助手" 73 | ] 74 | }, 75 | { 76 | "question": "如何正确使用颈椎保护系统?", 77 | "answer": "颈椎保护系统是作为安全带的补充装置。为了正确使用,您应该始终佩戴并正确调整安全带。在发生追尾事故时,这个系统有助于降低颈椎受伤的风险。不要在前排座椅靠背后面放置任何物品,因为这可能影响系统的功能。如果车辆遭受追尾,即使座椅看起来没有损坏,也应该由Lynk&Co领克中心检查前排靠背,因为内部部件可能需要更换。不要试图自行维修颈椎保护系统的任何部分。同时,乘员应坐在前排座椅的适当位置,背部紧贴靠背,并保持坐直。如果座椅靠背折叠,要确保后备厢内的物品固定,防止在事故中向前移动并干扰颈椎保护系统。如果前排座椅靠背与折叠的后排座椅靠背接触,也可能影响系统功能,因此需要调整前排座椅的位置。", 78 | "keywords": [ 79 | "追尾事故", 80 | "颈椎保护系统", 81 | "Lynk&Co领克中心", 82 | "安全带", 83 | "前排座椅" 84 | ] 85 | }, 86 | { 87 | "question": "前方交叉路口预警系统(FCTA)的作用是什么?", 88 | "answer": "前方交叉路口预警系统(FCTA)的作用是在车速处于4-15km/h时,当车辆驶出停车位或经过交叉路口,如果检测到存在前方侧向碰撞风险,会通过仪表提示信息和蜂鸣声提醒驾驶员注意观察前方道路情况,以降低发生前方侧向碰撞的风险。", 89 | "keywords": [ 90 | "前方交叉路口预警系统", 91 | "车速", 92 | "侧向碰撞风险", 93 | "交叉路口", 94 | "停车位" 95 | ] 96 | }, 97 | { 98 | "question": "在使用FCTA时需要注意哪些事项?", 99 | "answer": "在使用FCTA(前方交叉路口预警系统)时,需要注意以下事项:\n1. FCTA是驾驶辅助系统,不能保证在所有情况下都能正常工作,紧急情况下驾驶员应立即接管车辆。\n2. 通过交叉路口或其他视线受阻的地方时,应环顾四周确保安全,不能完全依赖FCTA。\n3. 系统对三轮车、外表不规则的车辆、行人、骑行者和动物可能无法有效识别。\n4. 系统性能可能因车辆、场景和路况不同而有所变化。\n5. 猛烈转向或方向盘转角过大可能限制系统功能。\n6. FCTA在驾驶员未系安全带、车辆刚启动或目标车辆速度不在10-60km/h范围内时不会启用。\n\n请始终保持警惕,正确使用该系统以增强驾驶安全性。", 100 | "keywords": [ 101 | "FCTA", 102 | "驾驶辅助系统", 103 | "交叉路口", 104 | "驾驶员接管", 105 | "驾驶安全性" 106 | ] 107 | }, 108 | { 109 | "question": "如何打开车辆尾门?", 110 | "answer": "在紧急情况下,您可以折叠后排座椅靠背进入后备厢,然后使用机械钥匙或类似工具拆下保护盖,操作尾门开启按键打开尾门。如果车辆处于解锁状态,您也可以轻按尾门下部的开启按键来自动打开尾门。此外,还可以通过中央显示屏点击车辆设置-电动尾门来控制尾门的打开和关闭。", 111 | "keywords": [ 112 | "后排座椅靠背", 113 | "机械钥匙", 114 | "尾门开启按键", 115 | "中央显示屏", 116 | "电动尾门" 117 | ] 118 | }, 119 | { 120 | "question": "在哪些情况下智能钥匙可能会受到干扰,导致功能异常?", 121 | "answer": "当整车处于充电桩、大型停车场、变电站等有信号干扰的地方,或者智能钥匙和干扰设备放在一起时,智能钥匙可能会受到干扰,导致功能异常。", 122 | "keywords": [ 123 | "信号干扰", 124 | "变电站", 125 | "充电桩", 126 | "停车场", 127 | "智能钥匙" 128 | ] 129 | }, 130 | { 131 | "question": "车辆尾门的防夹保护功能是如何工作的?", 132 | "answer": "车辆尾门的防夹保护功能在车辆运动和静止时都有作用。如果在关闭过程中尾门碰到障碍物,车辆运动时尾门会停止关闭并保持原位,同时发出长鸣音;车辆静止时,尾门不仅会停止关闭,还会打开到预设的开启位置,并同样伴随长鸣音警告。这个功能旨在防止尾门对人员或物体造成伤害。", 133 | "keywords": [ 134 | "尾门", 135 | "防夹保护", 136 | "长鸣音", 137 | "预设开启位置", 138 | "障碍物" 139 | ] 140 | }, 141 | { 142 | "question": "在操作电动后备厢时需要注意哪些事项?", 143 | "answer": "在操作电动后备厢时,应注意以下事项:\n1. 在使用后排座椅之前,确保后排座椅靠背已锁定到位。\n2. 在将座椅靠背向下折叠之前,移除头枕,以防损坏。\n3. 确保座椅上没有物体,且两侧安全带绕开座椅靠背,以免在折叠过程中损坏座椅。\n4. 可以通过中央显示屏的车辆设置-电动尾门来打开/关闭尾门。\n5. 座椅展开后,检查并确认座椅安全带没有扭曲或夹在座椅靠背中。\n\n同时,装载后备厢时也要注意:\n6. 严禁使用不合适或损坏的固定带,以防止紧急情况下行李抛起。\n7. 禁止使用后备厢挂钩固定儿童安全座椅。\n8. 使用挂钩固定行李时,不要超过挂钩的最大拉力。\n9. 遮物帘上不应放置任何物品,以避免事故或紧急制动时造成伤害。\n10. 折叠后座椅时,要收存好遮物帘,避免影响儿童约束装置的顶部系带,防止其损坏导致儿童在碰撞中受伤。", 144 | "keywords": [ 145 | "固定带", 146 | "电动后备厢", 147 | "儿童安全座椅", 148 | "遮物帘", 149 | "座椅靠背" 150 | ] 151 | }, 152 | { 153 | "question": "如何进入车辆功能界面?", 154 | "answer": "您可以通过从中央显示屏主界面上边缘向下滑动屏幕来进入车辆功能界面。", 155 | "keywords": [ 156 | "车辆功能界面", 157 | "滑动屏幕", 158 | "中央显示屏", 159 | "主界面", 160 | "车辆功能" 161 | ] 162 | }, 163 | { 164 | "question": "在车辆功能界面有哪些操作选项?", 165 | "answer": "在车辆功能界面,您可以看到以下操作选项:\n01 连接状态栏:可以开启/关闭相关功能的图标。\n02 功能栏:显示各项车辆功能的快捷开关图标。\n03 音量调节:用于调整导航、电话、语音、媒体音量的音量条。\n04 屏幕亮度调节:用于调整中央显示屏亮度的进度条。\n\n此外,还可以点击界面上的“编辑”按键来选择删除或添加快捷开关图标。", 166 | "keywords": [ 167 | "功能栏", 168 | "屏幕亮度调节", 169 | "音量调节", 170 | "连接状态栏", 171 | "快捷开关图标" 172 | ] 173 | }, 174 | { 175 | "question": "如何编辑快捷开关图标?", 176 | "answer": "点击界面上的“编辑”按键,可以选择删除或添加快捷开关图标。", 177 | "keywords": [ 178 | "删除", 179 | "开关图标", 180 | "按键", 181 | "编辑", 182 | "添加快捷" 183 | ] 184 | }, 185 | { 186 | "question": "如何减少车辆腐蚀风险?", 187 | "answer": "为了减少车辆腐蚀风险,需要定期检查和保养。具体措施包括定期清洗和打蜡车辆以保持清洁,及时检查并修复车漆的小损伤,以及检查车底是否有泥沙、污垢或盐累积并及时用水清洗。如果需要额外的防腐措施,可以联系Lynk&Co领克中心。", 188 | "keywords": [ 189 | "保养", 190 | "车辆腐蚀", 191 | "定期检查", 192 | "防腐措施", 193 | "车漆修复" 194 | ] 195 | }, 196 | { 197 | "question": "如何通过空调系统面板调节空调风量?", 198 | "answer": "您可以通过空调系统面板上的风速/副驾温度调节切换按钮来调节空调风量。首先按压此按钮,当对应的图标亮起时,再旋转风速/副驾温度调节旋钮即可开启/关闭鼓风机或调节鼓风机转速,共有9个速度等级可设置。", 199 | "keywords": [ 200 | "空调系统", 201 | "风速调节", 202 | "副驾温度调节", 203 | "速度等级", 204 | "鼓风机" 205 | ] 206 | }, 207 | { 208 | "question": "如何创建新的Lynk&CoID?", 209 | "answer": "要创建新的Lynk&CoID,您需要遵循以下步骤:\n1. 下载并安装Lynk&CoApp手机应用程序。\n2. 打开应用程序,然后使用您的手机号码注册新账户。\n - 如果您是车主,请务必使用购车时预留的手机号码进行注册。\n\n完成上述步骤后,系统会根据您预留的信息自动绑定车主关系。请注意,创建过程可能因系统数据传输而有延迟,建议等待30分钟后再检查账户状态。如果需要更改车主账户,应联系Lynk&Co领克中心。", 210 | "keywords": [ 211 | "Lynk&Co", 212 | "Lynk&CoApp", 213 | "车主", 214 | "注册", 215 | "领克中心" 216 | ] 217 | }, 218 | { 219 | "question": "什么是车主账户?", 220 | "answer": "车主账户是指使用购车时预留的手机号码创建的Lynk&Co ID,它会根据购车时预留的信息自动绑定车主关系。一部车辆只能创建一个车主账户。", 221 | "keywords": [ 222 | "车主账户", 223 | "购车", 224 | "Lynk&Co ID", 225 | "车主关系", 226 | "手机号码" 227 | ] 228 | }, 229 | { 230 | "question": "如何创建人脸识别?", 231 | "answer": "要创建人脸识别,您需遵循以下步骤:\n1. 确保车辆挡位处于驻车挡(P)。\n2. 进入中央显示屏的用户中心。\n3. 通过Lynk&Co App扫描二维码登录账户。\n4. 输入登录密码进入“安全与隐私”设置。\n5. 在人脸识别设置界面开启功能。\n6. 点击添加图标以开始创建人脸信息。\n7. 创建时,保持面部清晰,不要遮挡五官。\n8. 完成后,中央显示屏会显示成功或失败的信息。\n\n请注意,人脸识别功能允许创建1个车主账户和4个亲情账户的人脸信息。", 232 | "keywords": [ 233 | "安全与隐私", 234 | "中央显示屏", 235 | "Lynk&Co App", 236 | "人脸识别", 237 | "驻车挡" 238 | ] 239 | }, 240 | { 241 | "question": "如何添加亲情账号?", 242 | "answer": "要添加亲情账号,您需要在中央显示屏中点击“用户中心”,进入账户登录界面,然后通过Lynk&Co App扫描二维码登录。接着点击“亲情帐号”,进入亲情帐号界面,再点击“+”进入添加帐号界面。最后,输入需添加的亲情人已注册过的Lynk&Co ID的手机号,添加成功后,中央显示屏会显示相关提示信息。请注意,添加的亲情账号必须已经注册了Lynk&Co ID。", 243 | "keywords": [ 244 | "亲情账号", 245 | "Lynk&Co", 246 | "二维码", 247 | "中央显示屏", 248 | "用户中心" 249 | ] 250 | }, 251 | { 252 | "question": "如何开启或关闭用车偏好自动同步?", 253 | "answer": "在中央显示屏中点击\"用户中心\",进入用户账户登录界面,通过Lynk&Co App扫描二维码登录账户。然后点击“用车偏好”,进入用车偏好界面。在这里,你可以点击开启/关闭用户偏好自动同步。", 254 | "keywords": [ 255 | "用车偏好", 256 | "自动同步", 257 | "中央显示屏", 258 | "用户中心", 259 | "Lynk&Co App" 260 | ] 261 | }, 262 | { 263 | "question": "如何熄火我的车辆?", 264 | "answer": "要熄火车辆,确保车辆已静止,将挡位切换到驻车挡(P),激活EPB电子驻车系统,然后短按START/STOP按钮。", 265 | "keywords": [ 266 | "START/STOP按钮", 267 | "熄火", 268 | "静止", 269 | "EPB电子驻车系统", 270 | "驻车挡" 271 | ] 272 | }, 273 | { 274 | "question": "如何通过遥控钥匙启动车辆?", 275 | "answer": "要通过遥控钥匙启动吉利汽车,需遵循以下步骤:\n1. 确保携带已正确编码的遥控钥匙。\n2. 将挡位放在驻车挡(P)。\n3. 将遥控钥匙放在车内。\n4. 踩下制动踏板。\n5. 长按START/STOP按钮,直到车辆启动后松开。\n\n如果遥控钥匙电池电量低,车辆无法检测到钥匙,可将钥匙放入前杯托底部,然后按照上述步骤重新尝试启动车辆。如果连续尝试三次都无法启动,应避免再次尝试,并联系Lynk&Co领克中心寻求帮助。", 276 | "keywords": [ 277 | "遥控钥匙", 278 | "START/STOP按钮", 279 | "Lynk&Co领克中心", 280 | "吉利汽车", 281 | "驻车挡" 282 | ] 283 | }, 284 | { 285 | "question": "如果遥控钥匙电池电量低,我应该如何启动车辆?", 286 | "answer": "当遥控钥匙电池电量低,系统无法检测到钥匙时,您需要将遥控钥匙放入前杯托底部,然后按照正常的启动步骤操作。如果尝试三次都无法启动,应避免再次尝试并联系Lynk&Co领克中心。", 287 | "keywords": [ 288 | "电池电量", 289 | "遥控钥匙", 290 | "Lynk&Co领克中心", 291 | "前杯托", 292 | "启动步骤" 293 | ] 294 | }, 295 | { 296 | "question": "如何调节外后视镜?", 297 | "answer": "您可以使用驾驶员侧车门饰板上的后视镜控制按钮来调节外后视镜。具体步骤如下:\n1. 按下L按钮调节左侧外后视镜,按下R按钮调节右侧外后视镜,相应的按钮指示灯会点亮。\n2. 使用中间的控制杆来调节外后视镜的角度。\n3. 调整完成后,再次按下L或R按钮,按钮上的指示灯会熄灭。请注意,驾驶过程中不要调节外后视镜。", 298 | "keywords": [ 299 | "控制杆", 300 | "L按钮", 301 | "R按钮", 302 | "后视镜控制按钮", 303 | "外后视镜" 304 | ] 305 | }, 306 | { 307 | "question": "外部反光境显示物体距离是否准确?", 308 | "answer": "不准确。外后视镜中显示的物体比其实际距离远。", 309 | "keywords": [ 310 | "实际", 311 | "物体", 312 | "显示", 313 | "距离", 314 | "外后视镜" 315 | ] 316 | }, 317 | { 318 | "question": "什么是自动驻车系统?", 319 | "answer": "自动驻车系统(Auto Hold)是一种在正常驾驶过程中短暂停车时提供制动的功能,它允许驾驶员不必持续踩住制动踏板。当需要车辆行驶时,只需踩下加速踏板或用力踩下制动踏板后再松开,车辆就能正常启动。", 320 | "keywords": [ 321 | "Auto Hold", 322 | "加速踏板", 323 | "制动踏板", 324 | "短暂停车", 325 | "自动驻车系统" 326 | ] 327 | }, 328 | { 329 | "question": "在什么情况下会停用AutoHold并启用EPB功能?", 330 | "answer": "当AutoHold处于激活状态时,如果发生以下任一情况,AutoHold会自动停用并启用EPB功能:\n1. 松开驾驶员安全带。\n2. 打开驾驶员侧车门。\n3. 超出AutoHold工作时间。\n4. 挂入驻车挡(P)。", 331 | "keywords": [ 332 | "AutoHold", 333 | "驾驶员侧车门", 334 | "驾驶员安全带", 335 | "EPB", 336 | "驻车挡" 337 | ] 338 | }, 339 | { 340 | "question": "中央扶手箱的USB接口有几个?它们分别是什么类型?", 341 | "answer": "中央扶手箱内设有两个USB接口,分别是Type A接口和Type C接口。Type A接口支持数据传输和充电,Type C接口则仅用于充电。", 342 | "keywords": [ 343 | "USB接口", 344 | "充电", 345 | "Type C接口", 346 | "Type A接口", 347 | "数据传输" 348 | ] 349 | }, 350 | { 351 | "question": "中央扶手箱所支持的U盘及数据传输格式有哪些?", 352 | "answer": "中央扶手箱支持的U盘格式包括FAT16、FAT32、NTFS、exFAT。数据传输格式支持MP3、AAC、WAV、FLAC、OGG等音频格式,以及3GP、FLV、MKV、WMV、MPG、MP4、AVI、MOV等视频格式,同时支持BMP、JPG、JPEG、PNG、GIF等图片格式。", 353 | "keywords": [ 354 | "数据传输格式", 355 | "U盘格式", 356 | "图片格式", 357 | "音频格式", 358 | "视频格式" 359 | ] 360 | }, 361 | { 362 | "question": "如何通过中央显示屏调节驾驶员侧座椅通风强度?", 363 | "answer": "在中央显示屏中点击进入驾驶员侧座椅通风控制界面,然后设置驾驶员侧座椅通风强度及开关控制。驾驶员侧座椅通风有三级可调,点击控制开关可在“关-低-中-高”之间循环选择。", 364 | "keywords": [ 365 | "通风强度", 366 | "驾驶员侧座椅通风", 367 | "中央显示屏", 368 | "开关控制", 369 | "循环选择" 370 | ] 371 | }, 372 | { 373 | "question": "如何关闭前排座行车通风功能?", 374 | "answer": "可以通过两种方式关闭前排座椅通风功能:\n1. 登录Lynk&CoApp,点击前排座椅通风图标以关闭该功能。\n2. 在中央显示屏中进入空调功能菜单,找到座椅通风设置,点击控制开关,在“关-低-中-高”之间切换至“关”的状态。", 375 | "keywords": [ 376 | "前排座椅通风", 377 | "Lynk&CoApp", 378 | "中央显示屏", 379 | "座椅通风设置", 380 | "空调功能菜单" 381 | ] 382 | }, 383 | { 384 | "question": "如何进入系统设置界面?", 385 | "answer": " 在中央显示屏中点击-设置-系统,即可进入系统设置界面。", 386 | "keywords": [ 387 | "系统设置", 388 | "中央显示屏", 389 | "设置界面" 390 | ] 391 | }, 392 | { 393 | "question": "在系统界面可以进行哪些操作?", 394 | "answer": " 在系统界面,你可以进行如下操作:\n1. 连接设置:包括蓝牙设置、Wi-Fi设置和车载热点设置。\n2. 蓝牙设置:可以开启/关闭蓝牙功能,设置蓝牙名称,查看已连接的蓝牙设备,搜索并连接新的蓝牙设备。\n3. Wi-Fi设置:查看已连接的网络,搜索可用的网络,并选择连接。\n4. 车载热点设置:配置热点的名称和设置。\n\n此外,你还可以在车辆功能界面中:\n1. 查看连接状态栏,点击图标以开启或关闭相关功能。\n2. 使用功能栏中的快捷开关图标来控制各项车辆功能。\n3. 调节音量条来调整导航、电话、语音、媒体音量。\n4. 调整中央显示屏的屏幕亮度。\n\n在应用程序界面,你可以:\n1. 选择应用和小程序。\n2. 查看已下载的应用程序列表。\n3. 查看历史记录区域,找到上次打开的应用程序。\n4. 安装新应用、更新或卸载已安装的应用。", 395 | "keywords": [ 396 | "蓝牙设置", 397 | "Wi-Fi设置", 398 | "应用程序界面", 399 | "车辆功能", 400 | "车载热点设置" 401 | ] 402 | }, 403 | { 404 | "question": "什么是无钥匙进入系统?", 405 | "answer": "无钥匙进入系统是一种汽车技术,允许车主在不操作传统遥控钥匙或蓝牙钥匙的情况下,通过触摸车门外把手上的特定传感器区域来解锁和闭锁车辆。这个系统提高了便利性,简化了进入和离开车辆的过程。", 406 | "keywords": [ 407 | "蓝牙钥匙", 408 | "无钥匙进入系统", 409 | "传感器区域", 410 | "闭锁", 411 | "解锁" 412 | ] 413 | }, 414 | { 415 | "question": "如何设置无钥匙解锁模式?", 416 | "answer": "要设置无钥匙解锁模式,您需要在中央显示屏中点击“车辆设置”-“门锁”,进入门锁设置界面。在那里,您可以选择设置为“单门”或“全车”解锁模式。单门模式下,触摸前门外把手上的内部传感器区域仅解锁当前车门;全车模式下,会解锁所有车门。", 417 | "keywords": [ 418 | "无钥匙解锁", 419 | "全车模式", 420 | "门锁设置", 421 | "中央显示屏", 422 | "单门模式" 423 | ] 424 | }, 425 | { 426 | "question": "设置无钥匙解锁中单门和全车的区别在于什么?", 427 | "answer": "设置无钥匙解锁为单门时,触摸前门外把手上的内部传感器区域仅解锁当前车门;而设置为全车时,触摸同一区域会解锁所有车门。", 428 | "keywords": [ 429 | "全车", 430 | "前门外把手", 431 | "无钥匙解锁", 432 | "内部传感器区域", 433 | "单门" 434 | ] 435 | }, 436 | { 437 | "question": "驾驶车辆时应遵守哪些注意事项?", 438 | "answer": "驾驶车辆时,应始终将手放在方向盘上,避免受乘员干扰或使用电子设备,确保脚垫正确安装且不影响操纵踏板。在驾驶过程中,不要调节显示屏、方向盘、座椅、内后视镜或外后视镜的位置,以防车辆失控。禁止乘员将手臂、头或身体其他部位伸出车外。如果车辆底部受到碰撞,应及时到Lynk&Co领克中心检查。此外,启动车辆前要检查方向盘、座椅、内后视镜和外后视镜是否处于安全舒适位置,制动踏板是否可以踩到底,周围环境是否适合启动车辆。在发动机未达到工作温度时,避免高转速和全油门操作。行驶时不要将遥控钥匙带出车辆或使用START/STOP按钮,以防止熄火。保持清醒驾驶状态,避免酒后或服药后驾驶。谨慎驾驶,根据天气和路况判断是否出行,注意其他车辆和行人动向。专心驾驶,避免接电话、查看短信或调节控制按钮等活动分散注意力。仔细阅读手册中的警告和注意事项,以防止人身伤害和车辆损坏。当深踩油门踏板超过设定车速时,应保持警惕,与其他车辆保持安全距离,并遵守交通法规。", 439 | "keywords": [ 440 | "座椅", 441 | "方向盘", 442 | "外后视镜", 443 | "制动踏板", 444 | "内后视镜" 445 | ] 446 | }, 447 | { 448 | "question": "如何启用或停用手套箱密码保护功能?", 449 | "answer": "要启用或停用手套箱密码保护功能,您需要在中央显示屏中点击“座舱体验”-“快捷设置”,进入快捷设置界面。然后,点击“启用/停用手套箱密码保护功能”。要锁止手套箱,点击快捷设置界面中的手套箱密码锁按键并设定密码;要解锁手套箱,同样点击该按键或直接按下手套箱开关,然后输入密码。", 450 | "keywords": [ 451 | "密码保护", 452 | "座舱体验", 453 | "手套箱", 454 | "快捷设置", 455 | "中央显示屏" 456 | ] 457 | }, 458 | { 459 | "question": "驾驶员状态监测系统是如何工作的?", 460 | "answer": "驾驶员状态监测系统通过驾驶员状态检测摄像头监测驾驶员的状态。如果系统判断驾驶员处于疲劳驾驶状态,它会发出声音或视觉信号来提醒驾驶员,以确保驾驶安全。当该系统和疲劳休息站导航功能同时开启,若检测到重度疲劳状态,系统还会推荐驾驶员前往最近的休息站休息。然而,该系统可能在摄像头被遮挡、驾驶员佩戴遮挡面部的物品或强光环境下受到影响。驾驶员应始终负责安全驾驶,不能依赖此系统,并在收到警告后及时调整驾驶行为或停车休息。", 461 | "keywords": [ 462 | "疲劳驾驶", 463 | "强光环境", 464 | "摄像头遮挡", 465 | "驾驶员状态监测系统", 466 | "休息站导航" 467 | ] 468 | }, 469 | { 470 | "question": "什么情况下会影响到驾驶员状态监测系统的工作?", 471 | "answer": "驾驶员状态监测系统在以下情况下可能会受到影响:\n1. 驾驶员状态监测摄像头被遮挡。\n2. 佩戴墨镜、口罩等会遮挡面部的饰物。\n3. 强烈的光照降低摄像头检测能力。", 472 | "keywords": [ 473 | "口罩", 474 | "光照", 475 | "驾驶员状态监测系统", 476 | "墨镜", 477 | "驾驶员状态监测摄像头" 478 | ] 479 | }, 480 | { 481 | "question": "如何启用后排儿童锁功能?", 482 | "answer": "要启用后排儿童锁功能,您需要使用可分开的机械钥匙。对于左后门,顺时针转动控制器90°;对于右后门,逆时针转动钥匙90°。这样,儿童锁就启用了,此时不能从车内打开后门。要停用儿童锁,只需按相反方向转动相应的控制器即可。儿童锁位于后门的后缘,需在后门打开时操作。", 483 | "keywords": [ 484 | "逆时针转动", 485 | "顺时针转动", 486 | "控制器", 487 | "机械钥匙", 488 | "后排儿童锁" 489 | ] 490 | }, 491 | { 492 | "question": "安全气囊是什么?它的作用是什么?", 493 | "answer": "安全气囊是一种被动式辅助保护装置,它与安全带一起使用,旨在为乘员的头部和胸部在碰撞中提供额外的保护。", 494 | "keywords": [ 495 | "安全气囊", 496 | "保护装置", 497 | "碰撞", 498 | "安全带", 499 | "乘员" 500 | ] 501 | }, 502 | { 503 | "question": "如果未使用或未正确使用安全带,会对安全气囊有何影响? ", 504 | "answer": "如果未使用或未正确使用安全带,安全气囊在碰撞事故中提供的保护作用会减小或失效。", 505 | "keywords": [ 506 | "碰撞事故", 507 | "安全气囊", 508 | "失效", 509 | "安全带", 510 | "保护作用" 511 | ] 512 | }, 513 | { 514 | "question": "在使用车辆时,有哪些安全气囊的注意事项?", 515 | "answer": "在使用车辆时,关于安全气囊的注意事项包括:\n1. 不要在装有安全气囊的位置安装或放置任何附件。\n2. 禁止私自更改、拆卸、安装安全气囊及相关部件。\n3. 所有驾乘人员必须正确佩戴安全带并保持正确坐姿。\n4. 成人应使用安全带,儿童应使用合适的儿童安全座椅。\n5. 不要在安全气囊装置部件或电器件附近使用电气测试设备或装置。\n6. 安全气囊展开后,不要驾驶车辆,应立即联系服务中心。\n7. 安全气囊展开后,不要立即触摸相关组件和周围车内部件。\n8. 长时间接触安全气囊产生的粉尘可能引起不适,需及时处理。\n\n请始终遵守这些警告,以确保行车安全。", 516 | "keywords": [ 517 | "儿童安全座椅", 518 | "安全气囊展开", 519 | "安全气囊", 520 | "行车安全", 521 | "安全带" 522 | ] 523 | }, 524 | { 525 | "question": "如何开启动力电池电量保持功能?", 526 | "answer": "在中央显示屏中点击-充电供电-能量控制,进入能量控制设置界面后,点击“混动模式电量保持”按键即可开启动力电池电量保持功能。", 527 | "keywords": [ 528 | "充电供电", 529 | "动力电池电量保持", 530 | "混动模式电量保持", 531 | "中央显示屏", 532 | "能量控制" 533 | ] 534 | }, 535 | { 536 | "question": "在开启动力电池的情况下,选择经济性优先和充电速度优先有什么区别?", 537 | "answer": "选择经济性优先时,发动机主要驱动车辆,不会主动为动力电池充电。而选择充电速度优先时,发动机在驱动车辆的同时,也会为动力电池充电。", 538 | "keywords": [ 539 | "动力电池", 540 | "充电速度优先", 541 | "驱动车辆", 542 | "发动机", 543 | "经济性优先" 544 | ] 545 | }, 546 | { 547 | "question": "后方碰撞预警系统在什么情况下会启动?", 548 | "answer": "后方碰撞预警系统在车辆行驶速度处于0-150km/h时,如果系统检测到存在后方碰撞风险,会通过声音和视觉信号警告驾驶员,并在必要时开启危险警告灯。当车辆静止且无法避免后方碰撞时,系统也会启动,对车辆进行自动制动,以防二次碰撞。", 549 | "keywords": [ 550 | "声音警告", 551 | "自动制动", 552 | "视觉信号", 553 | "后方碰撞预警系统", 554 | "危险警告灯" 555 | ] 556 | }, 557 | { 558 | "question": "如何调整方向盘的位置?", 559 | "answer": "要调整方向盘的位置,您需要遵循以下步骤:\n1. 向下推锁定杆,这会松开方向盘。\n2. 将方向盘移动到适合您的位置。\n3. 向上推锁定杆,将方向盘锁止到位。\n4. 确认方向盘已牢固锁止,试着轻轻上下前后移动以检查。\n\n请记住,只能在车辆停止时调整方向盘。", 560 | "keywords": [ 561 | "锁止", 562 | "位置", 563 | "调整", 564 | "方向盘", 565 | "锁定杆" 566 | ] 567 | }, 568 | { 569 | "question": "什么情况下不能调节车辆的方向盘?", 570 | "answer": "只有在车辆停止时才能调节方向盘。", 571 | "keywords": [ 572 | "车辆停止", 573 | "调节方向盘" 574 | ] 575 | }, 576 | { 577 | "question": "如何通过手机APP启动车辆?", 578 | "answer": "可以通过Lynk&Co APP使用蓝牙启动或远程启动来操作:\n1. 蓝牙启动:手机在车内且与车辆连接时,打开APP中的蓝牙钥匙并点击相关图标启动。\n2. 远程启动:当距离车辆较远时,确保车辆已锁且挡位在驻车挡(P),登录Lynk &Co App并按下相应图标远程启动发动机。\n\n请注意,蓝牙启动需要手机在车内且APP开启,而远程启动需满足车辆锁定且在驻车挡的状态。此外,某些情况下(如燃油油位低、手机在车外或APP关闭、发动机故障等)可能无法通过蓝牙钥匙启动,同样,远程启动也需要避免挡位不在驻车挡、冷却液液位低或燃油油位低的情况。", 579 | "keywords": [ 580 | "蓝牙启动", 581 | "远程启动", 582 | "蓝牙钥匙", 583 | "Lynk&Co APP", 584 | "驻车挡" 585 | ] 586 | }, 587 | { 588 | "question": "什么是陡坡缓降系统(HDC什么是陡坡缓降系统(HDC)?", 589 | "answer": "陡坡缓降系统(HDC)是一种车辆辅助系统,它在车辆下坡时能主动控制车辆,帮助驾驶员以稳定的速度安全地驶下陡坡。驾驶员需要了解,HDC并非万能,不能应对所有路况,驾驶员应始终保持对车辆的控制,并在必要时通过踩下制动踏板来控制车速。该系统在车速0-40km/h且车辆低速下陡坡时激活,超过60km/h时会自动退出。开启和关闭HDC需通过中央显示屏的车辆设置界面操作。当HDC工作时,组合仪表会有相应指示灯显示其状态。如果系统出现故障,应立即使用刹车控制车速。", 590 | "keywords": [ 591 | "陡坡缓降系统", 592 | "HDC", 593 | "制动踏板", 594 | "车辆辅助系统", 595 | "中央显示屏" 596 | ] 597 | }, 598 | { 599 | "question": "当坡度过大时,如何操作才能使车辆保持匀速地行驶?", 600 | "answer": "当坡度过大,陡坡缓降系统(HDC)可能无法使车辆保持匀速下坡,此时您应该通过踩下制动踏板来控制车速。", 601 | "keywords": [ 602 | "陡坡缓降系统", 603 | "HDC", 604 | "车速", 605 | "制动踏板", 606 | "坡度" 607 | ] 608 | }, 609 | { 610 | "question": "在什么情况下HDC会激活?", 611 | "answer": "当车速在0-40km/h的范围内,且车辆在陡坡上低速下坡行驶时,HDC会激活。", 612 | "keywords": [ 613 | "HDC", 614 | "低速下坡", 615 | "车速", 616 | "陡坡", 617 | "0-40km/h" 618 | ] 619 | }, 620 | { 621 | "question": "在什么情况下无法激活或自动退出HDC功能?", 622 | "answer": "当车速在40-60km/h范围内时无法激活HDC功能,车速超过60km/h时,HDC会自动退出。", 623 | "keywords": [ 624 | "HDC", 625 | "自动退出", 626 | "车速", 627 | "60km/h", 628 | "40-60km/h" 629 | ] 630 | }, 631 | { 632 | "question": "开启陡坡缓降系统后,组合仪表显示什么颜色的指示灯?", 633 | "answer": "开启陡坡缓降系统后,组合仪表显示白色指示灯。", 634 | "keywords": [ 635 | "组合仪表", 636 | "陡坡缓降系统", 637 | "白色指示灯" 638 | ] 639 | }, 640 | { 641 | "question": "激活陡坡缓降系统时,组合仪表显示什么颜色的指示灯?", 642 | "answer": "激活陡坡缓降系统时,组合仪表显示绿色指示灯。", 643 | "keywords": [ 644 | "组合仪表", 645 | "陡坡缓降系统", 646 | "绿色指示灯" 647 | ] 648 | }, 649 | { 650 | "question": "当陡坡缓降系统出现故障时,组合仪表会显示怎样的提示?", 651 | "answer": "当陡坡缓降系统出现故障时,组合仪表会显示黄色指示灯。", 652 | "keywords": [ 653 | "组合仪表", 654 | "陡坡缓降系统", 655 | "黄色指示灯" 656 | ] 657 | }, 658 | { 659 | "question": "坡道辅助系统的主要功能是什么?", 660 | "answer": "坡道辅助系统的主要功能是在坡道起步时帮助防止车辆向后移动。当驾驶员松开制动踏板后,系统会在短时间内保持车辆静止状态,通常约为2秒,以便于在坡道上平稳起步。", 661 | "keywords": [ 662 | "平稳起步", 663 | "制动踏板", 664 | "坡道起步", 665 | "车辆静止", 666 | "坡道辅助系统" 667 | ] 668 | }, 669 | { 670 | "question": "使用坡道辅助系统时需要注意哪些警告?", 671 | "answer": "使用坡道辅助系统时,需要注意以下警告:\n1. 坡道辅助系统不能替代电子驻车制动器。当您离开车辆时,应将挡位切换至驻车挡(P),并启用电子驻车制动(EPB)。\n2. 如果汽车开始向后滑动,应立即踩下制动踏板。坡道辅助系统不能防止在全部负荷情况或全部路况下的车辆从陡坡上向后滑动。\n3. 坡道起步时,禁止同时踩下制动踏板及加速踏板超过5秒。", 672 | "keywords": [ 673 | "加速踏板", 674 | "制动踏板", 675 | "电子驻车制动器", 676 | "坡道辅助系统", 677 | "驻车挡" 678 | ] 679 | }, 680 | { 681 | "question": "在有路缘石的上坡和下坡驻车时,应如何操作?", 682 | "answer": "在有路缘石的上坡路段驻车时,可以转动方向盘让车辆向后移动,直至靠路缘石一侧的前轮轻轻触碰路缘石。在下坡路段,应转动方向盘让车辆向前移动,直至靠路缘石一侧的前轮轻轻触碰路缘石。这样可以利用路缘石来帮助固定车轮,防止车辆溜车。此外,还应将变速器挂入驻车挡(P)并启用EPB功能,确保安全。", 683 | "keywords": [ 684 | "路缘石", 685 | "驻车", 686 | "EPB功能", 687 | "车轮", 688 | "方向盘" 689 | ] 690 | }, 691 | { 692 | "question": "新车在最初的2000km行驶期间应遵守哪些事项?", 693 | "answer": "新车在最初的2000km行驶期间应遵守以下事项:\n1. 不要在空挡下高速空转发动机。\n2. 避免在启动及任何挡位踩尽油门加速,发动机转速不应超过3000rpm。\n3. 避免车辆长时间满载情况下低速或超高速行驶。\n4. 尽量避免行车时急刹车。\n5. 车辆应选择平坦路面上行驶,避免在泥泞路或沙土路上行驶。\n6. 避免以固定的车速或发动机转速长时间行驶。", 694 | "keywords": [ 695 | "新车", 696 | "急刹车", 697 | "空挡", 698 | "发动机转速", 699 | "平坦路面" 700 | ] 701 | }, 702 | { 703 | "question": "当刹车片将达到最小安全厚度时会有什么表现?", 704 | "answer": "当刹车片将达到最小安全厚度时,车辆制动或惯性滑行时可能会发出持续的高频尖锐噪声。这是提示您需要检查刹车片并及时联系Lynk&Co领克中心进行更换。", 705 | "keywords": [ 706 | "高频尖锐噪声", 707 | "最小安全厚度", 708 | "Lynk&Co领克中心", 709 | "制动", 710 | "刹车片" 711 | ] 712 | }, 713 | { 714 | "question": "如果听到持续的高频尖锐噪声应该怎么做?", 715 | "answer": "当您听到持续的高频尖锐噪声时,这可能是刹车片达到最小安全厚度的信号。应检查刹车片,并及时联系Lynk&Co领克中心进行更换,以确保制动系统的正常运行和行车安全。", 716 | "keywords": [ 717 | "行车安全", 718 | "Lynk&Co领克中心", 719 | "制动系统", 720 | "刹车片", 721 | "安全厚度" 722 | ] 723 | }, 724 | { 725 | "question": "风挡摄像头和前雷达在什么情况下会弹出警示消息?", 726 | "answer": "当风挡摄像头、前雷达及其周围出现污物或积雪时,仪表显示屏上会弹出警示消息。", 727 | "keywords": [ 728 | "前雷达", 729 | "仪表显示屏", 730 | "污物", 731 | "警示消息", 732 | "风挡摄像头" 733 | ] 734 | }, 735 | { 736 | "question": "如何通过蓝牙钥匙启动车辆?", 737 | "answer": "通过蓝牙钥匙启动吉利汽车的步骤如下:\n1. 打开Lynk&Co APP并确保蓝牙钥匙功能已开启。\n2. 在Lynk&Co APP中找到相应图标并点击,然后按照应用中的指示操作。\n\n请注意,如果燃油油位低、手机在车外或APP关闭、发动机有故障、车辆位于信号干扰区域或者周围有无线设备,可能无法通过蓝牙钥匙启动车辆。", 738 | "keywords": [ 739 | "信号干扰", 740 | "燃油油位", 741 | "蓝牙钥匙", 742 | "Lynk&Co APP", 743 | "吉利汽车" 744 | ] 745 | }, 746 | { 747 | "question": "在什么情况下无法通过蓝牙钥匙启动车辆?", 748 | "answer": "无法通过蓝牙钥匙启动车辆的情况包括:\n1. 燃油油位较低。\n2. 手机在车外或Lynk&CoAPP处于关闭状态。\n3. 发动机出现故障(例如出现发动机关键故障代码)。\n4. 车辆位置靠近无线电塔/电视塔或发电站。\n5. 车辆周围存在无线设备,如发射机或无线电广播设备。", 749 | "keywords": [ 750 | "燃油油位", 751 | "蓝牙钥匙", 752 | "发动机故障", 753 | "无线设备", 754 | "无线电塔" 755 | ] 756 | }, 757 | { 758 | "question": "如何通过网络远程启动发动机?", 759 | "answer": "当车辆已锁且挡位在驻车挡(P)时,您可以登录Lynk&Co App,然后按下图标以远程启动发动机。但请注意,如果挡位不在驻车挡、冷却液液位低、燃油油位低、车内有遥控钥匙、网络信号丢失、发动机机舱盖/车门未锁定、发动机有故障、车辆设置为维修模式或者连续两次远程启动后未手动启动车辆,您将无法远程启动发动机。", 760 | "keywords": [ 761 | "网络信号", 762 | "维修模式", 763 | "远程启动", 764 | "发动机故障", 765 | "驻车挡" 766 | ] 767 | }, 768 | { 769 | "question": "变道辅助系统由哪三部分组成?", 770 | "answer": "变道辅助系统由盲点监测系统(BSD)、车辆快速接近警示系统(CVW)和变道警示系统(LCW)三部分组成。", 771 | "keywords": [ 772 | "BSD", 773 | "变道警示系统", 774 | "盲点监测系统", 775 | "变道辅助系统", 776 | "车辆快速接近警示系统" 777 | ] 778 | }, 779 | { 780 | "question": "如何检查制动液液位?", 781 | "answer": "请不定期地检查制动液液位,确保液位在MIN(最低)和MAX(最高)标记之间。", 782 | "keywords": [ 783 | "MIN", 784 | "制动液", 785 | "标记", 786 | "MAX", 787 | "液位" 788 | ] 789 | }, 790 | { 791 | "question": "如何调节车辆的背光亮度?", 792 | "answer": "可以通过转动调光旋钮来调节背光亮度。在白天且未开启整车背光联动时,转动旋钮调节开关背光亮度;若开启整车背光联动,旋钮则会同时调节中央显示屏和开关背光亮度。在夜晚且未开启整车背光联动时,旋钮会同时调节仪表显示屏和开关背光亮度;开启整车背光联动后,会同时调节仪表显示屏、中央显示屏和开关背光亮度。此外,也可以在中央显示屏中点击“设置”-“显示”,进入显示设置界面来开启或关闭整车背光联动。", 793 | "keywords": [ 794 | "仪表显示屏", 795 | "调光旋钮", 796 | "整车背光联动", 797 | "背光亮度", 798 | "中央显示屏" 799 | ] 800 | }, 801 | { 802 | "question": "在白天和夜晚,开启和未开启整车背光联动时,转动调光旋钮有什么不同?", 803 | "answer": "在白天,未开启整车背光联动时,转动调光旋钮只调节开关背光亮度;开启整车背光联动时,它会同时调节中央显示屏和开关背光的亮度。在夜晚,未开启整车背光联动时,调光旋钮可以同时调节仪表显示屏和开关背光的亮度;而开启整车背光联动后,则会同时调节仪表显示屏、中央显示屏和开关背光的亮度。", 804 | "keywords": [ 805 | "仪表显示屏", 806 | "调光旋钮", 807 | "开关背光", 808 | "整车背光联动", 809 | "中央显示屏" 810 | ] 811 | }, 812 | { 813 | "question": "当车辆发生故障时,应该如何处理?", 814 | "answer": "当车辆发生故障时,应首先查看仪表显示屏和中央显示屏是否有警告/指示消息,根据图标含义采取相应处理措施,如参照指示灯和警告灯说明。接着检查是否有系统功能下降或受限,并采取适当措施,例如关闭前机舱盖或系上安全带。如果轮胎漏气,使用补胎套装;如果电池电量低,参照相关章节处理。如果以上步骤无法解决问题,应联系Lynk&Co领克中心寻求帮助。在所有操作中,避免自行修理,以免造成危险。在紧急情况下,可以使用SOS按键手动或自动激活紧急救援服务。同时,Lynk&Co领克提供终身免费紧急道路救援服务,可以通过车主APP或客服热线寻求帮助。", 815 | "keywords": [ 816 | "紧急救援服务", 817 | "仪表显示屏", 818 | "轮胎漏气", 819 | "中央显示屏", 820 | "电池电量低" 821 | ] 822 | }, 823 | { 824 | "question": "为什么不建议自行修理车辆故障?", 825 | "answer": "车辆故障处理需要专业技能,自行修理可能会导致更严重的损坏或安全风险。", 826 | "keywords": [ 827 | "严重损坏", 828 | "自行修理", 829 | "车辆故障", 830 | "专业技能", 831 | "安全风险" 832 | ] 833 | }, 834 | { 835 | "question": "什么是能量回收系统?", 836 | "answer": "能量回收系统是一种在车辆制动或松开加速踏板时工作的系统。它利用驱动电机作为发电机,将制动产生的效能转换为电能,并储存在动力电池中。", 837 | "keywords": [ 838 | "发电机", 839 | "动力电池", 840 | "制动", 841 | "能量回收系统", 842 | "驱动电机" 843 | ] 844 | }, 845 | { 846 | "question": "如何知道车辆正在进行能量回收?", 847 | "answer": "当01能量回收图标变亮时,表明车辆正在进行能量回收,此时动力电池正在充电。", 848 | "keywords": [ 849 | "能量回收", 850 | "充电", 851 | "动力电池" 852 | ] 853 | }, 854 | { 855 | "question": "影响能量回收多少的因素有哪些?", 856 | "answer": " 影响能量回收多少的因素包括当前动力电池的状态、设置的能量回收等级以及车速。此外,当车身稳定控制系统(ESP)和防抱死制动系统(ABS)触发时,制动能量回收系统将不工作。", 857 | "keywords": [ 858 | "ESP", 859 | "动力电池", 860 | "车速", 861 | "ABS", 862 | "能量回收等级" 863 | ] 864 | }, 865 | { 866 | "question": "什么是智能远近光控制系统?", 867 | "answer": "智能远近光控制系统是一种自动切换远近光灯的系统,它会在环境光线较暗且车速超过40km/h时开启。该系统能够根据迎面车辆的大灯或前方同向行驶车辆的尾灯状态自动切换,以防止对其他驾驶员造成眩目,从而提高夜间行车的安全性和舒适性。然而,这个系统在光线对比不足(如雾天或雨天)时可能无法正常工作,并且仅作为辅助工具,驾驶员仍需根据实际交通和天气状况手动调整灯光。此外,某些特定情况如使用非原装挡风玻璃、特殊天气或路况等,也可能需要手动切换远近光灯。", 868 | "keywords": [ 869 | "自动切换", 870 | "眩目", 871 | "辅助工具", 872 | "智能远近光控制系统", 873 | "夜间行车" 874 | ] 875 | }, 876 | { 877 | "question": "在什么情况下可以开启智能远近光控制系统?", 878 | "answer": " 当环境光线较暗,且车速在40km/h以上时,滚轮旋转到相应位置可以开启智能远近光控制系统。", 879 | "keywords": [ 880 | "滚轮旋转", 881 | "车速", 882 | "环境光线", 883 | "智能远近光控制系统", 884 | "相应位置" 885 | ] 886 | }, 887 | { 888 | "question": "哪些因素可能导致智能远近光控制系统无法正常工作?", 889 | "answer": "光线明暗对比不足(如雾天或雨天)、使用非Lynk &Co领克原装或二手市场挡风玻璃、特定的天气条件(如大雾、大雨、风雪、冻雨)以及某些道路情况(如灯光昏暗的街道、有行人、反光物、车辆行驶、山顶或坑洼道路、急转弯弯道等)都可能导致智能远近光控制系统无法正常工作。此外,当系统出现故障时,也需要手动切换远光灯和近光灯。", 890 | "keywords": [ 891 | "Lynk &Co领克", 892 | "天气条件", 893 | "道路情况", 894 | "挡风玻璃", 895 | "智能远近光控制系统" 896 | ] 897 | }, 898 | { 899 | "question": "如何启用前除霜/除雾功能?", 900 | "answer": "您可以使用空调系统面板上的按钮启用前除霜/除雾功能,或者通过中央显示屏空调控制面板进行操作。在显示屏上唤起空调控制界面,然后点击开启/关闭前除霜/除雾功能。当按下按钮后,最大除霜功能将启动,相应的指示灯也会点亮。", 901 | "keywords": [ 902 | "空调系统", 903 | "中央显示屏", 904 | "指示灯", 905 | "除雾功能", 906 | "前除霜" 907 | ] 908 | }, 909 | { 910 | "question": "当我开启前挡风玻璃的去冰去雾模式时,车辆会有什么反应?", 911 | "answer": "当您开启前挡风玻璃的去冰去雾模式时,车辆会自动激活最大除霜功能,空调系统启动,按钮上的指示灯会点亮。此外,如果您的空调系统配备有联动按键,A/C可能会自动激活并关闭空调内循环。同时,空调自动模式(AUTO)会开启最大风量,以快速清除挡风玻璃和车窗上的冰或雾气。", 912 | "keywords": [ 913 | "空调系统", 914 | "联动按键", 915 | "自动模式", 916 | "前挡风玻璃", 917 | "除霜功能" 918 | ] 919 | }, 920 | { 921 | "question": "为什么驾驶之前需要确保挡风玻璃无冰渣、积雪或冷凝水?", 922 | "answer": "这是一项重要的安全措施,因为冰渣、积雪或冷凝水会影响驾驶员的视线,降低驾驶安全性,可能导致交通事故。", 923 | "keywords": [ 924 | "交通事故", 925 | "安全措施", 926 | "冷凝水", 927 | "冰渣", 928 | "积雪" 929 | ] 930 | }, 931 | { 932 | "question": "什么情况下车辆需要进行报废处理?", 933 | "answer": "当车辆达到使用年限或不能满足道路使用条件时,应按照国家环保法规要求进行报废处理。", 934 | "keywords": [ 935 | "国家环保法规", 936 | "车辆", 937 | "道路使用条件", 938 | "使用年限", 939 | "报废处理" 940 | ] 941 | }, 942 | { 943 | "question": "什么是遥控泊车(RPA)?", 944 | "answer": "遥控泊车(RPA)是一种远程泊车控制功能,允许用户在车外通过Lynk&Co App控制车辆进行泊入、泊出、直线前进或后退。此功能主要包括直线遥控、遥控泊入和遥控泊出。在使用RPA前,需要阅读并同意免责声明,并且驾驶员在整个过程中仍需关注周围环境,确保安全。RPA适用于水平车位、垂直车位和斜列车位。", 945 | "keywords": [ 946 | "遥控泊车", 947 | "远程泊车", 948 | "免责声明", 949 | "直线遥控", 950 | "Lynk&Co App" 951 | ] 952 | }, 953 | { 954 | "question": "使用Lynk&CoApp的RPA功能需要注意什么?", 955 | "answer": "使用Lynk&CoApp的RPA功能时,需要注意以下几点:\n1. 确保车辆附近没有人员或障碍物,并时刻观察环境,必要时暂停RPA。\n2. 确认方向盘已回正。\n3. 不要在车内使用RPA功能。\n4. 避免在坡度、积雪、积水、凹坑等道路使用。\n5. 松开前进或后退按键后,车辆可能因延迟继续移动,察觉碰撞风险要提前停止。\n6. 泊车辅助传感器可能不发出警告或延迟警告,不能完全依赖其警示。\n7. 保持摄像头和传感器清洁,否则可能影响功能性能。\n8. 强光或雨雪可能影响传感器工作,降低RPA性能。\n9. RPA只适用于可预测的停车场或车库。\n10. 注意传感器可能无法有效检测某些类型的障碍物,如低矮、狭窄或高处的物体。\n11. 使用前确保车门、尾门、前舱盖关闭且车辆闭锁。\n12. RPA激活至车辆运动可能需要较长时间,要保持耐心。\n13. 蓝牙信号不稳定可能影响RPA功能。\n\n请始终谨慎操作,遵守相关法律法规,对安全停车负全责。", 956 | "keywords": [ 957 | "摄像头", 958 | "传感器", 959 | "Lynk&CoApp", 960 | "泊车辅助", 961 | "RPA" 962 | ] 963 | }, 964 | { 965 | "question": "如何开启方向盘助力与驾驶模式联动?", 966 | "answer": "要开启方向盘助力与驾驶模式联动,您需要按照以下步骤操作:\n\n1. 进入中央显示屏的“座舱体验”菜单。\n2. 选择“方向盘”选项,进入方向盘设置界面。\n3. 在设置界面中,点击“设置方向盘驾驶模式联动”。\n4. 接着,您可以选择方向盘转向助力模式,包括“轻便”、“舒适”和“运动”。\n\n完成以上步骤后,车辆将根据您所选的驾驶模式自动调节方向盘助力特性。", 967 | "keywords": [ 968 | "方向盘设置", 969 | "转向助力模式", 970 | "驾驶模式", 971 | "中央显示屏", 972 | "方向盘助力" 973 | ] 974 | }, 975 | { 976 | "question": "有哪些可选的方向盘转向助力模式?", 977 | "answer": "可选的方向盘转向助力模式有轻便、舒适、运动三种。", 978 | "keywords": [ 979 | "舒适", 980 | "运动", 981 | "轻便" 982 | ] 983 | }, 984 | { 985 | "question": "主动式座舱清洁系统的作用是什么?", 986 | "answer": "主动式座舱清洁系统的作用是在上车前通过打开空调外循环模式,引入车外空气,以保持车内空气新鲜,同时在高温天气下可以帮助降低车内温度。当车辆长时间停车(如锁车3小时以上)后解锁,该系统会自动启动;打开车门后,系统会停止工作。如果车门一直未打开,系统会在一段时间后自动停止。", 987 | "keywords": [ 988 | "降低车内温度", 989 | "空调外循环模式", 990 | "车内空气新鲜", 991 | "自动启动", 992 | "主动式座舱清洁系统" 993 | ] 994 | }, 995 | { 996 | "question": "全景天窗是由几部分组成的?", 997 | "answer": "全景天窗由两部分组成。", 998 | "keywords": [ 999 | "全景天窗", 1000 | "两部分" 1001 | ] 1002 | }, 1003 | { 1004 | "question": "如何使用位置记忆功能?", 1005 | "answer": "要使用位置记忆功能,首先需要将驾驶员座椅、外后视镜和抬头显示(HUD)调整到所需位置。然后,打开中央显示屏,进入“座舱体验”菜单,再选择“座椅”界面。在那里,您可以选择“位置一”、“位置二”或“位置三”按键,并点击“保存”来存储设置。要恢复已存储的位置,只需点击“恢复”按键,座椅和外后视镜将会移动到之前存储的位置。请注意,车辆行驶时不应使用位置记忆功能,且在座椅移动时避免阻碍,以免造成损坏。", 1006 | "keywords": [ 1007 | "抬头显示", 1008 | "位置记忆功能", 1009 | "外后视镜", 1010 | "中央显示屏", 1011 | "驾驶员座椅" 1012 | ] 1013 | }, 1014 | { 1015 | "question": "如何更换后挡风玻璃雨刮片?", 1016 | "answer": "更换后挡风玻璃雨刮片的步骤如下:\n1. 将雨刮臂抬起。\n2. 沿箭头方向拆除旧的雨刮片。\n3. 按照与拆除相反的顺序安装新的雨刮片。\n\n注意:在更换前要确保雨刮片未冻结在挡风玻璃上,且在操作时应在挡位处于驻车挡(P),启用电子驻车制动(EPB),关闭雨刮器并启用雨刮片维修位置。拆除雨刮片时应使用软质毛巾保护挡风玻璃,防止刮伤。确保新安装的雨刮片适合您的车型,以避免车辆损伤。", 1017 | "keywords": [ 1018 | "电子驻车制动", 1019 | "雨刮片", 1020 | "雨刮臂", 1021 | "挡风玻璃", 1022 | "维修位置" 1023 | ] 1024 | }, 1025 | { 1026 | "question": "在更换雨刮片时需要注意什么?", 1027 | "answer": "在更换雨刮片时,需要注意以下几点:\n1. 确保温度适宜,如果在冰点以下,需使用防冻洗涤液以防冰冻。\n2. 车辆应处于驻车挡(P)并启用电子驻车制动(EPB)。\n3. 关闭雨刮器并启用雨刮片维修位置。\n4. 拆除和安装过程中,使用软质毛巾保护挡风玻璃,防止刮伤。\n5. 确保新雨刮片适合你的车型,否则可能损坏车辆。\n6. 安装时要确保雨刮臂与雨刮片正确连接。\n\n另外,如果雨刮臂离开挡风玻璃,在未放回之前不要启动雨刮器,以免损坏机舱盖油漆和雨刮片。在拆卸前,要确保雨刮片未冻结在挡风玻璃上。", 1028 | "keywords": [ 1029 | "电子驻车制动", 1030 | "雨刮片", 1031 | "雨刮臂", 1032 | "挡风玻璃", 1033 | "防冻洗涤液" 1034 | ] 1035 | }, 1036 | { 1037 | "question": "如何添加香氛精油?", 1038 | "answer": "添加香氛精油的步骤如下:\n1. 打开手套箱。\n2. 向下取下香氛胶囊。\n3. 将香氛精油均匀地涂抹在香芯棒上(确保三面都被涂抹到)。\n4. 静置一分钟。\n5. 将香氛胶囊安装回原位。", 1039 | "keywords": [ 1040 | "手套箱", 1041 | "香氛胶囊", 1042 | "香芯棒", 1043 | "香氛精油", 1044 | "安装回原位" 1045 | ] 1046 | }, 1047 | { 1048 | "question": "我应该在哪里添加香氛精油?", 1049 | "answer": "香氛精油应该添加在手套箱内的香氛胶囊中。", 1050 | "keywords": [ 1051 | "手套箱", 1052 | "香氛胶囊", 1053 | "香氛精油" 1054 | ] 1055 | }, 1056 | { 1057 | "question": "什么是转向助力系统?", 1058 | "answer": "转向助力系统是一种电动助力转向系统,它能在驾驶员转动方向盘时提供助力,根据车速智能调节助力大小,以提升车辆的操控性和稳定性。", 1059 | "keywords": [ 1060 | "车速", 1061 | "转向助力系统", 1062 | "电动助力转向系统", 1063 | "操控性", 1064 | "方向盘" 1065 | ] 1066 | }, 1067 | { 1068 | "question": "涉水行驶前应注意什么?", 1069 | "answer": "涉水行驶前应先确定水深,水位最高不得超过车身下边缘,并且应低速通过积水路段,任何情况下都不要在水中停车、倒车。", 1070 | "keywords": [ 1071 | "涉水行驶", 1072 | "积水路段", 1073 | "车身下边缘", 1074 | "水中停车", 1075 | "水深" 1076 | ] 1077 | }, 1078 | { 1079 | "question": "涉水驾驶后需要进行哪些检查?", 1080 | "answer": "涉水驾驶后,应在车辆安全时立即执行以下检查:\n1. 轻踩制动踏板使制动器干燥,并检查制动器是否正常工作。\n2. 检查喇叭是否正常工作。\n3. 转动方向盘以检查转向助力是否正常工作。\n4. 检查外部车灯是否正常工作。", 1081 | "keywords": [ 1082 | "转向助力", 1083 | "车灯", 1084 | "喇叭", 1085 | "制动器", 1086 | "涉水驾驶" 1087 | ] 1088 | }, 1089 | { 1090 | "question": "什么时候应该为车辆打蜡?", 1091 | "answer": "当水在干净的油漆表面上不再形成水滴而是直接流下时,就可以为车辆打蜡。即使经常在清洗车辆时使用含蜡溶剂,也建议每年至少为车辆打一次蜡,以保护车漆。", 1092 | "keywords": [ 1093 | "保护车漆", 1094 | "打蜡", 1095 | "清洗车辆", 1096 | "含蜡溶剂", 1097 | "车漆" 1098 | ] 1099 | } 1100 | ] -------------------------------------------------------------------------------- /data/test_question.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "question": "中国足球的队长是谁", 4 | "answer_1": "", 5 | "answer_2": "", 6 | "answer_3": "" 7 | }, 8 | { 9 | "question": "新冠肺炎如何预防?", 10 | "answer_1": "", 11 | "answer_2": "", 12 | "answer_3": "" 13 | }, 14 | { 15 | "question": "交通事故如何处理?", 16 | "answer_1": "", 17 | "answer_2": "", 18 | "answer_3": "" 19 | }, 20 | { 21 | "question": "怎样加热座椅?", 22 | "answer_1": "", 23 | "answer_2": "", 24 | "answer_3": "" 25 | }, 26 | { 27 | "question": "自动模式下,中央显示屏是如何切换日间和夜间模式的?", 28 | "answer_1": "", 29 | "answer_2": "", 30 | "answer_3": "" 31 | }, 32 | { 33 | "question": "如何通过中央显示屏进行副驾驶员座椅设置?", 34 | "answer_1": "", 35 | "answer_2": "", 36 | "answer_3": "" 37 | }, 38 | { 39 | "question": "副仪表台按钮如何操作中央显示屏?", 40 | "answer_1": "", 41 | "answer_2": "", 42 | "answer_3": "" 43 | }, 44 | { 45 | "question": "如何从锁定状态唤醒中央显示器?", 46 | "answer_1": "", 47 | "answer_2": "", 48 | "answer_3": "" 49 | }, 50 | { 51 | "question": "如何正确使用颈椎保护系统?", 52 | "answer_1": "", 53 | "answer_2": "", 54 | "answer_3": "" 55 | }, 56 | { 57 | "question": "前方交叉路口预警系统(FCTA)的作用是什么?", 58 | "answer_1": "", 59 | "answer_2": "", 60 | "answer_3": "" 61 | }, 62 | { 63 | "question": "在使用FCTA时需要注意哪些事项?", 64 | "answer_1": "", 65 | "answer_2": "", 66 | "answer_3": "" 67 | }, 68 | { 69 | "question": "如何打开车辆尾门?", 70 | "answer_1": "", 71 | "answer_2": "", 72 | "answer_3": "" 73 | }, 74 | { 75 | "question": "在哪些情况下智能钥匙可能会受到干扰,导致功能异常?", 76 | "answer_1": "", 77 | "answer_2": "", 78 | "answer_3": "" 79 | }, 80 | { 81 | "question": "车辆尾门的防夹保护功能是如何工作的?", 82 | "answer_1": "", 83 | "answer_2": "", 84 | "answer_3": "" 85 | }, 86 | { 87 | "question": "在操作电动后备厢时需要注意哪些事项?", 88 | "answer_1": "", 89 | "answer_2": "", 90 | "answer_3": "" 91 | }, 92 | { 93 | "question": "如何进入车辆功能界面?", 94 | "answer_1": "", 95 | "answer_2": "", 96 | "answer_3": "" 97 | }, 98 | { 99 | "question": "在车辆功能界面有哪些操作选项?", 100 | "answer_1": "", 101 | "answer_2": "", 102 | "answer_3": "" 103 | }, 104 | { 105 | "question": "如何编辑快捷开关图标?", 106 | "answer_1": "", 107 | "answer_2": "", 108 | "answer_3": "" 109 | }, 110 | { 111 | "question": "如何减少车辆腐蚀风险?", 112 | "answer_1": "", 113 | "answer_2": "", 114 | "answer_3": "" 115 | }, 116 | { 117 | "question": "如何通过空调系统面板调节空调风量?", 118 | "answer_1": "", 119 | "answer_2": "", 120 | "answer_3": "" 121 | }, 122 | { 123 | "question": "如何创建新的Lynk&CoID?", 124 | "answer_1": "", 125 | "answer_2": "", 126 | "answer_3": "" 127 | }, 128 | { 129 | "question": "什么是车主账户?", 130 | "answer_1": "", 131 | "answer_2": "", 132 | "answer_3": "" 133 | }, 134 | { 135 | "question": "如何创建人脸识别?", 136 | "answer_1": "", 137 | "answer_2": "", 138 | "answer_3": "" 139 | }, 140 | { 141 | "question": "如何添加亲情账号?", 142 | "answer_1": "", 143 | "answer_2": "", 144 | "answer_3": "" 145 | }, 146 | { 147 | "question": "如何开启或关闭用车偏好自动同步?", 148 | "answer_1": "", 149 | "answer_2": "", 150 | "answer_3": "" 151 | }, 152 | { 153 | "question": "如何熄火我的车辆?", 154 | "answer_1": "", 155 | "answer_2": "", 156 | "answer_3": "" 157 | }, 158 | { 159 | "question": "如何通过遥控钥匙启动车辆?", 160 | "answer_1": "", 161 | "answer_2": "", 162 | "answer_3": "" 163 | }, 164 | { 165 | "question": "如果遥控钥匙电池电量低,我应该如何启动车辆?", 166 | "answer_1": "", 167 | "answer_2": "", 168 | "answer_3": "" 169 | }, 170 | { 171 | "question": "如何调节外后视镜?", 172 | "answer_1": "", 173 | "answer_2": "", 174 | "answer_3": "" 175 | }, 176 | { 177 | "question": "外部反光境显示物体距离是否准确?", 178 | "answer_1": "", 179 | "answer_2": "", 180 | "answer_3": "" 181 | }, 182 | { 183 | "question": "什么是自动驻车系统?", 184 | "answer_1": "", 185 | "answer_2": "", 186 | "answer_3": "" 187 | }, 188 | { 189 | "question": "在什么情况下会停用AutoHold并启用EPB功能?", 190 | "answer_1": "", 191 | "answer_2": "", 192 | "answer_3": "" 193 | }, 194 | { 195 | "question": "中央扶手箱的USB接口有几个?它们分别是什么类型?", 196 | "answer_1": "", 197 | "answer_2": "", 198 | "answer_3": "" 199 | }, 200 | { 201 | "question": "中央扶手箱所支持的U盘及数据传输格式有哪些?", 202 | "answer_1": "", 203 | "answer_2": "", 204 | "answer_3": "" 205 | }, 206 | { 207 | "question": "如何通过中央显示屏调节驾驶员侧座椅通风强度?", 208 | "answer_1": "", 209 | "answer_2": "", 210 | "answer_3": "" 211 | }, 212 | { 213 | "question": "如何关闭前排座行车通风功能?", 214 | "answer_1": "", 215 | "answer_2": "", 216 | "answer_3": "" 217 | }, 218 | { 219 | "question": "如何进入系统设置界面?", 220 | "answer_1": "", 221 | "answer_2": "", 222 | "answer_3": "" 223 | }, 224 | { 225 | "question": "在系统界面可以进行哪些操作?", 226 | "answer_1": "", 227 | "answer_2": "", 228 | "answer_3": "" 229 | }, 230 | { 231 | "question": "什么是无钥匙进入系统?", 232 | "answer_1": "", 233 | "answer_2": "", 234 | "answer_3": "" 235 | }, 236 | { 237 | "question": "如何设置无钥匙解锁模式?", 238 | "answer_1": "", 239 | "answer_2": "", 240 | "answer_3": "" 241 | }, 242 | { 243 | "question": "设置无钥匙解锁中单门和全车的区别在于什么?", 244 | "answer_1": "", 245 | "answer_2": "", 246 | "answer_3": "" 247 | }, 248 | { 249 | "question": "驾驶车辆时应遵守哪些注意事项?", 250 | "answer_1": "", 251 | "answer_2": "", 252 | "answer_3": "" 253 | }, 254 | { 255 | "question": "如何启用或停用手套箱密码保护功能?", 256 | "answer_1": "", 257 | "answer_2": "", 258 | "answer_3": "" 259 | }, 260 | { 261 | "question": "驾驶员状态监测系统是如何工作的?", 262 | "answer_1": "", 263 | "answer_2": "", 264 | "answer_3": "" 265 | }, 266 | { 267 | "question": "什么情况下会影响到驾驶员状态监测系统的工作?", 268 | "answer_1": "", 269 | "answer_2": "", 270 | "answer_3": "" 271 | }, 272 | { 273 | "question": "如何启用后排儿童锁功能?", 274 | "answer_1": "", 275 | "answer_2": "", 276 | "answer_3": "" 277 | }, 278 | { 279 | "question": "安全气囊是什么?它的作用是什么?", 280 | "answer_1": "", 281 | "answer_2": "", 282 | "answer_3": "" 283 | }, 284 | { 285 | "question": "如果未使用或未正确使用安全带,会对安全气囊有何影响? ", 286 | "answer_1": "", 287 | "answer_2": "", 288 | "answer_3": "" 289 | }, 290 | { 291 | "question": "在使用车辆时,有哪些安全气囊的注意事项?", 292 | "answer_1": "", 293 | "answer_2": "", 294 | "answer_3": "" 295 | }, 296 | { 297 | "question": "如何开启动力电池电量保持功能?", 298 | "answer_1": "", 299 | "answer_2": "", 300 | "answer_3": "" 301 | }, 302 | { 303 | "question": "在开启动力电池的情况下,选择经济性优先和充电速度优先有什么区别?", 304 | "answer_1": "", 305 | "answer_2": "", 306 | "answer_3": "" 307 | }, 308 | { 309 | "question": "后方碰撞预警系统在什么情况下会启动?", 310 | "answer_1": "", 311 | "answer_2": "", 312 | "answer_3": "" 313 | }, 314 | { 315 | "question": "如何调整方向盘的位置?", 316 | "answer_1": "", 317 | "answer_2": "", 318 | "answer_3": "" 319 | }, 320 | { 321 | "question": "什么情况下不能调节车辆的方向盘?", 322 | "answer_1": "", 323 | "answer_2": "", 324 | "answer_3": "" 325 | }, 326 | { 327 | "question": "如何通过手机APP启动车辆?", 328 | "answer_1": "", 329 | "answer_2": "", 330 | "answer_3": "" 331 | }, 332 | { 333 | "question": "什么是陡坡缓降系统(HDC什么是陡坡缓降系统(HDC)?", 334 | "answer_1": "", 335 | "answer_2": "", 336 | "answer_3": "" 337 | }, 338 | { 339 | "question": "当坡度过大时,如何操作才能使车辆保持匀速地行驶?", 340 | "answer_1": "", 341 | "answer_2": "", 342 | "answer_3": "" 343 | }, 344 | { 345 | "question": "在什么情况下HDC会激活?", 346 | "answer_1": "", 347 | "answer_2": "", 348 | "answer_3": "" 349 | }, 350 | { 351 | "question": "在什么情况下无法激活或自动退出HDC功能?", 352 | "answer_1": "", 353 | "answer_2": "", 354 | "answer_3": "" 355 | }, 356 | { 357 | "question": "开启陡坡缓降系统后,组合仪表显示什么颜色的指示灯?", 358 | "answer_1": "", 359 | "answer_2": "", 360 | "answer_3": "" 361 | }, 362 | { 363 | "question": "激活陡坡缓降系统时,组合仪表显示什么颜色的指示灯?", 364 | "answer_1": "", 365 | "answer_2": "", 366 | "answer_3": "" 367 | }, 368 | { 369 | "question": "当陡坡缓降系统出现故障时,组合仪表会显示怎样的提示?", 370 | "answer_1": "", 371 | "answer_2": "", 372 | "answer_3": "" 373 | }, 374 | { 375 | "question": "坡道辅助系统的主要功能是什么?", 376 | "answer_1": "", 377 | "answer_2": "", 378 | "answer_3": "" 379 | }, 380 | { 381 | "question": "使用坡道辅助系统时需要注意哪些警告?", 382 | "answer_1": "", 383 | "answer_2": "", 384 | "answer_3": "" 385 | }, 386 | { 387 | "question": "在有路缘石的上坡和下坡驻车时,应如何操作?", 388 | "answer_1": "", 389 | "answer_2": "", 390 | "answer_3": "" 391 | }, 392 | { 393 | "question": "新车在最初的2000km行驶期间应遵守哪些事项?", 394 | "answer_1": "", 395 | "answer_2": "", 396 | "answer_3": "" 397 | }, 398 | { 399 | "question": "当刹车片将达到最小安全厚度时会有什么表现?", 400 | "answer_1": "", 401 | "answer_2": "", 402 | "answer_3": "" 403 | }, 404 | { 405 | "question": "如果听到持续的高频尖锐噪声应该怎么做?", 406 | "answer_1": "", 407 | "answer_2": "", 408 | "answer_3": "" 409 | }, 410 | { 411 | "question": "风挡摄像头和前雷达在什么情况下会弹出警示消息?", 412 | "answer_1": "", 413 | "answer_2": "", 414 | "answer_3": "" 415 | }, 416 | { 417 | "question": "如何通过蓝牙钥匙启动车辆?", 418 | "answer_1": "", 419 | "answer_2": "", 420 | "answer_3": "" 421 | }, 422 | { 423 | "question": "在什么情况下无法通过蓝牙钥匙启动车辆?", 424 | "answer_1": "", 425 | "answer_2": "", 426 | "answer_3": "" 427 | }, 428 | { 429 | "question": "如何通过网络远程启动发动机?", 430 | "answer_1": "", 431 | "answer_2": "", 432 | "answer_3": "" 433 | }, 434 | { 435 | "question": "变道辅助系统由哪三部分组成?", 436 | "answer_1": "", 437 | "answer_2": "", 438 | "answer_3": "" 439 | }, 440 | { 441 | "question": "如何检查制动液液位?", 442 | "answer_1": "", 443 | "answer_2": "", 444 | "answer_3": "" 445 | }, 446 | { 447 | "question": "如何调节车辆的背光亮度?", 448 | "answer_1": "", 449 | "answer_2": "", 450 | "answer_3": "" 451 | }, 452 | { 453 | "question": "在白天和夜晚,开启和未开启整车背光联动时,转动调光旋钮有什么不同?", 454 | "answer_1": "", 455 | "answer_2": "", 456 | "answer_3": "" 457 | }, 458 | { 459 | "question": "当车辆发生故障时,应该如何处理?", 460 | "answer_1": "", 461 | "answer_2": "", 462 | "answer_3": "" 463 | }, 464 | { 465 | "question": "为什么不建议自行修理车辆故障?", 466 | "answer_1": "", 467 | "answer_2": "", 468 | "answer_3": "" 469 | }, 470 | { 471 | "question": "什么是能量回收系统?", 472 | "answer_1": "", 473 | "answer_2": "", 474 | "answer_3": "" 475 | }, 476 | { 477 | "question": "如何知道车辆正在进行能量回收?", 478 | "answer_1": "", 479 | "answer_2": "", 480 | "answer_3": "" 481 | }, 482 | { 483 | "question": "影响能量回收多少的因素有哪些?", 484 | "answer_1": "", 485 | "answer_2": "", 486 | "answer_3": "" 487 | }, 488 | { 489 | "question": "什么是智能远近光控制系统?", 490 | "answer_1": "", 491 | "answer_2": "", 492 | "answer_3": "" 493 | }, 494 | { 495 | "question": "在什么情况下可以开启智能远近光控制系统?", 496 | "answer_1": "", 497 | "answer_2": "", 498 | "answer_3": "" 499 | }, 500 | { 501 | "question": "哪些因素可能导致智能远近光控制系统无法正常工作?", 502 | "answer_1": "", 503 | "answer_2": "", 504 | "answer_3": "" 505 | }, 506 | { 507 | "question": "如何启用前除霜/除雾功能?", 508 | "answer_1": "", 509 | "answer_2": "", 510 | "answer_3": "" 511 | }, 512 | { 513 | "question": "当我开启前挡风玻璃的去冰去雾模式时,车辆会有什么反应?", 514 | "answer_1": "", 515 | "answer_2": "", 516 | "answer_3": "" 517 | }, 518 | { 519 | "question": "为什么驾驶之前需要确保挡风玻璃无冰渣、积雪或冷凝水?", 520 | "answer_1": "", 521 | "answer_2": "", 522 | "answer_3": "" 523 | }, 524 | { 525 | "question": "什么情况下车辆需要进行报废处理?", 526 | "answer_1": "", 527 | "answer_2": "", 528 | "answer_3": "" 529 | }, 530 | { 531 | "question": "什么是遥控泊车(RPA)?", 532 | "answer_1": "", 533 | "answer_2": "", 534 | "answer_3": "" 535 | }, 536 | { 537 | "question": "使用Lynk&CoApp的RPA功能需要注意什么?", 538 | "answer_1": "", 539 | "answer_2": "", 540 | "answer_3": "" 541 | }, 542 | { 543 | "question": "如何开启方向盘助力与驾驶模式联动?", 544 | "answer_1": "", 545 | "answer_2": "", 546 | "answer_3": "" 547 | }, 548 | { 549 | "question": "有哪些可选的方向盘转向助力模式?", 550 | "answer_1": "", 551 | "answer_2": "", 552 | "answer_3": "" 553 | }, 554 | { 555 | "question": "主动式座舱清洁系统的作用是什么?", 556 | "answer_1": "", 557 | "answer_2": "", 558 | "answer_3": "" 559 | }, 560 | { 561 | "question": "全景天窗是由几部分组成的?", 562 | "answer_1": "", 563 | "answer_2": "", 564 | "answer_3": "" 565 | }, 566 | { 567 | "question": "如何使用位置记忆功能?", 568 | "answer_1": "", 569 | "answer_2": "", 570 | "answer_3": "" 571 | }, 572 | { 573 | "question": "如何更换后挡风玻璃雨刮片?", 574 | "answer_1": "", 575 | "answer_2": "", 576 | "answer_3": "" 577 | }, 578 | { 579 | "question": "在更换雨刮片时需要注意什么?", 580 | "answer_1": "", 581 | "answer_2": "", 582 | "answer_3": "" 583 | }, 584 | { 585 | "question": "如何添加香氛精油?", 586 | "answer_1": "", 587 | "answer_2": "", 588 | "answer_3": "" 589 | }, 590 | { 591 | "question": "我应该在哪里添加香氛精油?", 592 | "answer_1": "", 593 | "answer_2": "", 594 | "answer_3": "" 595 | }, 596 | { 597 | "question": "什么是转向助力系统?", 598 | "answer_1": "", 599 | "answer_2": "", 600 | "answer_3": "" 601 | }, 602 | { 603 | "question": "涉水行驶前应注意什么?", 604 | "answer_1": "", 605 | "answer_2": "", 606 | "answer_3": "" 607 | }, 608 | { 609 | "question": "涉水驾驶后需要进行哪些检查?", 610 | "answer_1": "", 611 | "answer_2": "", 612 | "answer_3": "" 613 | }, 614 | { 615 | "question": "什么时候应该为车辆打蜡?", 616 | "answer_1": "", 617 | "answer_2": "", 618 | "answer_3": "" 619 | } 620 | ] 621 | -------------------------------------------------------------------------------- /example_test.py: -------------------------------------------------------------------------------- 1 | from chatgpt_proxy import ChatGPTProxy 2 | from rerank_model import reRankLLM 3 | from vllm_model import ChatLLM 4 | from retriever.m3e_retriever import M3eRetriever 5 | from retriever.bge_retriever import BgeRetriever 6 | from retriever.bm25_retriever import Bm25Retriever 7 | from retriever.tfidf_retriever import TfidfRetriever 8 | from generate_answer import get_emb_distribute_rerank 9 | 10 | 11 | def test_example(model_name="qwen2", 12 | reranker_name="bce", 13 | m3e_embeddings_model_path="./pre_train_model/m3e-large", 14 | bge_embeddings_model_path="./pre_train_model/bge-m3", 15 | data_path="./all_text.txt", 16 | m3e_vector_path="./vector_db/faiss_m3e_index", 17 | bge_vector_path="./vector_db/faiss_bge_index", 18 | mutil_max_length=4000, 19 | mutil_top_k=6): 20 | 21 | # 调用大模型 22 | if "gpt" in model_name: 23 | llm = ChatGPTProxy(model=model_name) 24 | else: 25 | llm = ChatLLM(model_name) 26 | print("LLM model load ok") 27 | 28 | # 初始化检索器 29 | m3e_retriever = M3eRetriever(m3e_embeddings_model_path, data_path, m3e_vector_path) 30 | bge_retriever = BgeRetriever(bge_embeddings_model_path, data_path, bge_vector_path) 31 | bm25 = Bm25Retriever(data_path) 32 | tfidf = TfidfRetriever(data_path) 33 | print("Retriever load ok") 34 | 35 | # 调用reRank模型 36 | rerank = reRankLLM(reranker_name) 37 | print("rerank model load ok") 38 | 39 | while True: 40 | query = input("请输入问题(输入 'exit' 退出):") 41 | if query.lower() == "exit": 42 | print("退出程序。") 43 | break 44 | 45 | # 多路召回检索文档 46 | m3e_context = m3e_retriever.GetTopK(query, 15) 47 | bge_context = bge_retriever.GetTopK(query, 15) 48 | bm25_context = bm25.GetBM25TopK(query, 15) 49 | tfidf_context = tfidf.GetBM25TopK(query, 15) 50 | 51 | # 重排检索文档 52 | mutil_rerank_inputs = get_emb_distribute_rerank(rerank, m3e_context, bge_context, bm25_context, tfidf_context, 53 | query, max_length=mutil_max_length, top_k=mutil_top_k) 54 | 55 | # 获取回答 56 | answer = llm.infer([mutil_rerank_inputs]) 57 | 58 | print("query: ", query) 59 | print("answer: ", answer) 60 | print("=" * 100) 61 | 62 | 63 | if __name__ == '__main__': 64 | test_example() 65 | -------------------------------------------------------------------------------- /generate_answer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from tqdm import tqdm 4 | from vllm_model import ChatLLM 5 | from chatgpt_proxy import ChatGPTProxy 6 | from rerank_model import reRankLLM 7 | from retriever.m3e_retriever import M3eRetriever 8 | from retriever.bge_retriever import BgeRetriever 9 | from retriever.bm25_retriever import Bm25Retriever 10 | from retriever.tfidf_retriever import TfidfRetriever 11 | 12 | 13 | # 获取输入模版 14 | def get_prompt_template(docs, query): 15 | prompt_template = """基于以下已知信息,简洁和专业的来回答用户的问题。]如果无法从中得到答案,请说"无答案" ,不允许在答案中添加编造成分,答案请使用中文。\n\ 16 | 已知内容为吉利控股集团汽车销售有限公司的吉利用户手册:\n{retriever_text}\n问题: {question}\n回答:""".format( 17 | retriever_text=docs, question=query) 18 | return prompt_template 19 | 20 | 21 | # 基于召回的文档构造一个提示模版 22 | def get_emb_docs(m3e_context, query, max_length=4000, top_k=6): 23 | m3e_min_score = 0.0 24 | if (len(m3e_context) > 0): 25 | m3e_min_score = m3e_context[0][1] 26 | cnt = 0 27 | emb_ans = "" 28 | for doc, score in m3e_context: 29 | cnt = cnt + 1 30 | if (len(emb_ans + doc.page_content) > max_length): 31 | break 32 | emb_ans = emb_ans + doc.page_content 33 | if (cnt > top_k): 34 | break 35 | m3e_prompt_template = get_prompt_template(emb_ans, query) 36 | return m3e_prompt_template, m3e_min_score 37 | 38 | 39 | # 基于召回的文档构造一个提示模版 40 | def get_distribute_docs(bm25_context, query, max_length=4000, top_k=6): 41 | bm25_ans = "" 42 | cnt = 0 43 | for doc in bm25_context: 44 | cnt = cnt + 1 45 | if (len(bm25_ans + doc.page_content) > max_length): 46 | break 47 | bm25_ans = bm25_ans + doc.page_content 48 | if (cnt > top_k): 49 | break 50 | bm25_prompt_template = get_prompt_template(bm25_ans, query) 51 | return bm25_prompt_template 52 | 53 | 54 | # 基于多路召回的文档构造一个提示模版 55 | def get_emb_distribute_rerank(rerank_model, m3e_context, bge_context, bm25_context, tfidf_context, query, 56 | max_length=4000, top_k=6): 57 | items = [] 58 | for doc, score in m3e_context: 59 | items.append(doc) 60 | for doc, score in bge_context: 61 | items.append(doc) 62 | items.extend(bm25_context) 63 | items.extend(tfidf_context) 64 | rerank_ans = rerank_model.predict(query, items) 65 | rerank_ans_k = rerank_ans[:top_k] 66 | rerank_text = "" 67 | for doc in rerank_ans_k: 68 | if len(rerank_text + doc.page_content) > max_length: 69 | break 70 | rerank_text = rerank_text + doc.page_content 71 | mutil_rerank_prompt_template = get_prompt_template(rerank_text, query) 72 | return mutil_rerank_prompt_template 73 | 74 | 75 | # 对测试数据集进行rag评测 76 | def question_test(model_name=None, reranker_name=None, m3e_embeddings_model_path=None, bge_embeddings_model_path=None, 77 | pdf_path=None, test_path=None, output_path=None, data_path=None, m3e_vector_path=None, prompt_enhance=True, 78 | bge_vector_path=None, single_max_length=4000, single_top_k=6, mutil_max_length=4000, mutil_top_k=6): 79 | start = time.time() 80 | 81 | m3e_retriever = M3eRetriever(m3e_embeddings_model_path, data_path, m3e_vector_path, pdf_path) 82 | print("m3e_retriever load ok") 83 | bge_retriever = BgeRetriever(bge_embeddings_model_path, data_path, bge_vector_path, pdf_path) 84 | print("bge_retriever load ok") 85 | bm25 = Bm25Retriever(data_path, pdf_path) 86 | print("bm25 load ok") 87 | tfidf = TfidfRetriever(data_path, pdf_path) 88 | print("tf-idf load ok") 89 | 90 | # LLM大模型 91 | if "gpt" in model_name: 92 | llm = ChatGPTProxy(model=model_name) 93 | else: 94 | llm = ChatLLM(model_name) 95 | print("llm load ok") 96 | 97 | # reRank模型 98 | rerank = reRankLLM(reranker_name) 99 | print("rerank model load ok") 100 | 101 | # 对每一条测试问题,做答案生成处理 102 | with open(test_path, "r", encoding="utf-8") as file: 103 | jdata = json.loads(file.read()) 104 | print(len(jdata)) 105 | for idx, line in tqdm(enumerate(jdata), total=len(jdata)): 106 | query = line["question"] 107 | retriever_query = query 108 | if prompt_enhance: 109 | prompt = "请简洁地回答下面的问题,只需要输出答案,不要重复问题,不允许在答案中添加编造成分,注意输出内容控制在16个字以内。\n问题:" + query 110 | answer = llm.infer([prompt]) 111 | retriever_query = query + answer[0] 112 | 113 | # 召回文档 114 | m3e_context = m3e_retriever.GetTopK(retriever_query, 15) 115 | bge_context = bge_retriever.GetTopK(retriever_query, 15) 116 | bm25_context = bm25.GetBM25TopK(retriever_query, 15) 117 | tfidf_context = tfidf.GetBM25TopK(retriever_query, 15) 118 | 119 | # 重排文档 120 | m3e_inputs, m3e_min_score = get_emb_docs(m3e_context, query, max_length=single_max_length, 121 | top_k=single_top_k) 122 | bge_inputs, bge_min_score = get_emb_docs(bge_context, query, max_length=single_max_length, 123 | top_k=single_top_k) 124 | bm25_inputs = get_distribute_docs(bm25_context, query, max_length=single_max_length, top_k=single_top_k) 125 | tfidf_inputs = get_distribute_docs(tfidf_context, query, max_length=single_max_length, top_k=single_top_k) 126 | mutil_rerank_inputs = get_emb_distribute_rerank(rerank, m3e_context, bge_context, bm25_context, 127 | tfidf_context, query, max_length=mutil_max_length, 128 | top_k=mutil_top_k) 129 | 130 | # 基于同一个问题构建一组batch 131 | batch_input = [] 132 | batch_input.append(mutil_rerank_inputs) 133 | batch_input.append(m3e_inputs) 134 | batch_input.append(bge_inputs) 135 | batch_input.append(bm25_inputs) 136 | batch_input.append(tfidf_inputs) 137 | # 执行batch推理 138 | batch_output = llm.infer(batch_input) 139 | line["answer_1"] = batch_output[0] # 多路召回重排序后的结果 140 | line["answer_2"] = batch_output[1] # m3e召回的结果 141 | line["answer_3"] = batch_output[2] # bge召回的结果 142 | line["answer_4"] = batch_output[3] # bm25召回的结果 143 | line["answer_5"] = batch_output[4] # tfidf召回结果 144 | 145 | # 如果m3e或bge检索跟query的距离高于500,输出无答案 146 | if m3e_min_score > 500: 147 | line["answer_6"] = "无答案" 148 | if bge_min_score > 500: 149 | line["answer_7"] = "无答案" 150 | 151 | # 保存结果 152 | json.dump(jdata, open(output_path, "w", encoding='utf-8'), ensure_ascii=False, indent=2) 153 | end = time.time() 154 | 155 | print("cost time: " + str(int(end - start) / 60) + "minutes") 156 | -------------------------------------------------------------------------------- /images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangzg1/rag_with_chat/9f3632bbf355244ea4e52722edda080cd10d7c48/images/.DS_Store -------------------------------------------------------------------------------- /images/image_RiYKWHwtQa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangzg1/rag_with_chat/9f3632bbf355244ea4e52722edda080cd10d7c48/images/image_RiYKWHwtQa.png -------------------------------------------------------------------------------- /images/image_fChhMjnifo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangzg1/rag_with_chat/9f3632bbf355244ea4e52722edda080cd10d7c48/images/image_fChhMjnifo.png -------------------------------------------------------------------------------- /images/image_tL0rUhQiZB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhangzg1/rag_with_chat/9f3632bbf355244ea4e52722edda080cd10d7c48/images/image_tL0rUhQiZB.png -------------------------------------------------------------------------------- /models/Baichuan2-7B-Chat/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat -------------------------------------------------------------------------------- /models/Qwen2-7B-Instruct/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/Qwen/Qwen2-7B-Instruct 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/Qwen/Qwen2-7B-Instruct -------------------------------------------------------------------------------- /models/chatglm3-6b/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/THUDM/chatglm3-6b 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/ZhipuAI/chatglm3-6b -------------------------------------------------------------------------------- /pdf_parse.py: -------------------------------------------------------------------------------- 1 | import pdfplumber 2 | from PyPDF2 import PdfReader 3 | 4 | 5 | class DataProcess(object): 6 | 7 | def __init__(self, pdf_path): 8 | self.pdf_path = pdf_path 9 | self.data = [] 10 | 11 | def SlidingWindow(self, sentences, kernel=512, stride=1): 12 | sz = len(sentences) 13 | cur = "" 14 | fast = 0 15 | slow = 0 16 | while (fast < len(sentences)): 17 | sentence = sentences[fast] 18 | if (len(cur + sentence) > kernel and (cur + sentence) not in self.data): 19 | self.data.append(cur + sentence + "。") 20 | cur = cur[len(sentences[slow] + "。"):] 21 | slow = slow + 1 22 | cur = cur + sentence + "。" 23 | fast = fast + 1 24 | 25 | ''' 26 | pdf滑窗法解析法,把整个PDF的数据文档按句号分割,然后构建滑动窗口来存储文本数据 27 | ''' 28 | 29 | def ParseAllPage(self, max_seq=512, min_len=6): 30 | all_content = "" 31 | for idx, page in enumerate(PdfReader(self.pdf_path).pages): 32 | page_content = "" 33 | text = page.extract_text() 34 | words = text.split("\n") 35 | for idx, word in enumerate(words): 36 | text = word.strip().strip("\n") 37 | if ("...................." in text or "目录" in text): 38 | continue 39 | if (len(text) < 1): 40 | continue 41 | if (text.isdigit()): 42 | continue 43 | page_content = page_content + text 44 | if (len(page_content) < min_len): 45 | continue 46 | all_content = all_content + page_content 47 | sentences = all_content.split("。") 48 | self.SlidingWindow(sentences, kernel=max_seq) 49 | 50 | # 数据过滤 51 | def Datafilter(self, line, header, pageid, max_seq=1024): 52 | sz = len(line) 53 | if (sz < 6): 54 | return 55 | if (sz > max_seq): 56 | if ("■" in line): 57 | sentences = line.split("■") 58 | elif ("•" in line): 59 | sentences = line.split("•") 60 | elif ("\t" in line): 61 | sentences = line.split("\t") 62 | else: 63 | sentences = line.split("。") 64 | for subsentence in sentences: 65 | subsentence = subsentence.replace("\n", "") 66 | if (len(subsentence) < max_seq and len(subsentence) > 5): 67 | subsentence = subsentence.replace(",", "").replace("\n", "").replace("\t", "") 68 | if (subsentence not in self.data): 69 | self.data.append(subsentence) 70 | else: 71 | line = line.replace("\n", "").replace(",", "").replace("\t", "") 72 | if (line not in self.data): 73 | self.data.append(line) 74 | 75 | def GetHeader(self, page): 76 | try: 77 | lines = page.extract_words()[::] 78 | except: 79 | return None 80 | if (len(lines) > 0): 81 | for line in lines: 82 | if ("目录" in line["text"] or ".........." in line["text"]): 83 | return None 84 | if (line["top"] < 20 and line["top"] > 17): 85 | return line["text"] 86 | return lines[0]["text"] 87 | return None 88 | 89 | ''' 90 | pdf分块解析法,尽量保证一个小标题+对应文档在一个文档块,然后对每个文档块的文本内容进行提取并存储 91 | ''' 92 | 93 | def ParseBlock(self, max_seq=1024): 94 | with pdfplumber.open(self.pdf_path) as pdf: 95 | for i, p in enumerate(pdf.pages): 96 | header = self.GetHeader(p) 97 | if (header == None): 98 | continue 99 | texts = p.extract_words(use_text_flow=True, extra_attrs=["size"])[::] 100 | squence = "" 101 | lastsize = 0 102 | for idx, line in enumerate(texts): 103 | if (idx < 1): 104 | continue 105 | if (idx == 1): 106 | if (line["text"].isdigit()): 107 | continue 108 | cursize = line["size"] 109 | text = line["text"] 110 | if (text == "□" or text == "•"): 111 | continue 112 | elif (text == "警告!" or text == "注意!" or text == "说明!"): 113 | if (len(squence) > 0): 114 | self.Datafilter(squence, header, i, max_seq=max_seq) 115 | squence = "" 116 | elif (format(lastsize, ".5f") == format(cursize, ".5f")): 117 | if (len(squence) > 0): 118 | squence = squence + text 119 | else: 120 | squence = text 121 | else: 122 | lastsize = cursize 123 | if (len(squence) < 15 and len(squence) > 0): 124 | squence = squence + text 125 | else: 126 | if (len(squence) > 0): 127 | self.Datafilter(squence, header, i, max_seq=max_seq) 128 | squence = text 129 | if (len(squence) > 0): 130 | self.Datafilter(squence, header, i, max_seq=max_seq) 131 | 132 | ''' 133 | pdf非滑窗法解析法,把整个PDF的数据文档按句号分割,然后使用简单的重叠块进行数据存储 134 | ''' 135 | 136 | def ParseOnePageWithRule(self, max_seq=512, min_len=6): 137 | for idx, page in enumerate(PdfReader(self.pdf_path).pages): 138 | page_content = "" 139 | text = page.extract_text() 140 | words = text.split("\n") 141 | for idx, word in enumerate(words): 142 | text = word.strip().strip("\n") 143 | if ("...................." in text or "目录" in text): 144 | continue 145 | if (len(text) < 1): 146 | continue 147 | if (text.isdigit()): 148 | continue 149 | page_content = page_content + text 150 | if (len(page_content) < min_len): 151 | continue 152 | if (len(page_content) < max_seq): 153 | if (page_content not in self.data): 154 | self.data.append(page_content) 155 | else: 156 | sentences = page_content.split("。") 157 | cur = "" 158 | for idx, sentence in enumerate(sentences): 159 | if (len(cur + sentence) > max_seq and (cur + sentence) not in self.data): 160 | self.data.append(cur + sentence) 161 | cur = sentence 162 | else: 163 | cur = cur + sentence 164 | 165 | 166 | if __name__ == "__main__": 167 | dp = DataProcess(pdf_path="data/car_user_manual.pdf") 168 | # 使用两次pdf分块解析法,分别设置最大序列长度为 1024 和 512 169 | dp.ParseBlock(max_seq=1024) 170 | dp.ParseBlock(max_seq=512) 171 | print(len(dp.data)) 172 | # 使用两次pdf滑块解析法,分别设置最大序列长度为 256 和 512 173 | dp.ParseAllPage(max_seq=256) 174 | dp.ParseAllPage(max_seq=512) 175 | print(len(dp.data)) 176 | # 使用两次pdf非滑块解析法,分别设置最大序列长度为 256 和 512 177 | dp.ParseOnePageWithRule(max_seq=256) 178 | dp.ParseOnePageWithRule(max_seq=512) 179 | print(len(dp.data)) 180 | # 这里得到的最终文本数据是经过6次PDF文档解析得到的 181 | data = dp.data 182 | out = open("all_text.txt", "w") 183 | for line in data: 184 | line = line.strip("\n") 185 | out.write(line) 186 | out.write("\n") 187 | out.close() 188 | -------------------------------------------------------------------------------- /pre_train_model/bce-reranker-base_v1/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/InfiniFlow/bce-reranker-base_v1 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/maple77/bce-reranker-base_v1 -------------------------------------------------------------------------------- /pre_train_model/bge-m3/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/BAAI/bge-m3 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/BAAI/bge-m3 -------------------------------------------------------------------------------- /pre_train_model/bge-reranker-large/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/BAAI/bge-reranker-large 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/BAAI/bge-reranker-large -------------------------------------------------------------------------------- /pre_train_model/m3e-large/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/moka-ai/m3e-large 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/AI-ModelScope/m3e-large -------------------------------------------------------------------------------- /pre_train_model/text2vec-base-chinese/README.md: -------------------------------------------------------------------------------- 1 | Hugging Face download URL:https://huggingface.co/shibing624/text2vec-base-chinese 2 | 3 | Modelscope download URL:https://www.modelscope.cn/models/thomas/text2vec-base-chinese -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.29.3 2 | faiss-gpu==1.7.2 3 | jieba==0.42.1 4 | langchain==0.1.16 5 | langchain-community==0.0.34 6 | modelscope==1.17.1 7 | pdfplumber==0.11.0 8 | PyPDF2==3.0.1 9 | rank-bm25==0.2.2 10 | sentence-transformers==2.7.0 11 | text2vec==1.2.9 12 | transformers==4.40.2 13 | torch==2.1.2 14 | vllm==0.4.0.post1 -------------------------------------------------------------------------------- /rerank_model.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForSequenceClassification, AutoTokenizer 2 | from retriever.bm25_retriever import Bm25Retriever 3 | from retriever.tfidf_retriever import TfidfRetriever 4 | from retriever.m3e_retriever import M3eRetriever 5 | from retriever.bge_retriever import BgeRetriever 6 | from config import * 7 | 8 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 9 | 10 | DEVICE = LLM_DEVICE 11 | DEVICE_ID = "0" 12 | CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE 13 | 14 | 15 | # 释放gpu上没有用到的显存以及显存碎片 16 | def torch_gc(): 17 | if torch.cuda.is_available(): 18 | with torch.cuda.device(CUDA_DEVICE): 19 | torch.cuda.empty_cache() 20 | torch.cuda.ipc_collect() 21 | 22 | 23 | # 加载reranker模型 24 | class reRankLLM(object): 25 | def __init__(self, reranker_name, max_length=512): 26 | if reranker_name == "bce": 27 | self.reranker_path = BCE_reranker_model 28 | if reranker_name == "bge": 29 | self.reranker_path = BGE_reranker_model 30 | self.tokenizer = AutoTokenizer.from_pretrained(self.reranker_path) 31 | self.model = AutoModelForSequenceClassification.from_pretrained(self.reranker_path) 32 | self.model.eval() 33 | self.model.half() 34 | self.model.cuda() 35 | self.max_length = max_length 36 | 37 | # 基于召回模型检索得到的文档对,返回每一对(query, doc)的相关得分,并从大到小排序 38 | def predict(self, query, docs): 39 | pairs = [(query, doc.page_content) for doc in docs] 40 | inputs = self.tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', 41 | max_length=self.max_length).to("cuda") 42 | with torch.no_grad(): 43 | scores = self.model(**inputs).logits 44 | scores = scores.detach().cpu().clone().numpy() 45 | response = [doc for score, doc in sorted(zip(scores, docs), reverse=True, key=lambda x: x[0])] 46 | 47 | torch_gc() 48 | return response 49 | 50 | 51 | if __name__ == "__main__": 52 | reranker_name = "bce" 53 | m3e_embeddings_model_path = "pre_train_model/m3e-large" 54 | bge_embeddings_model_path = "pre_train_model/bge-m3" 55 | m3e_vector_path = "vector_db/faiss_m3e_index" 56 | bge_vector_path = "vector_db/faiss_bge_index" 57 | data_path = "./all_text.txt" 58 | pdf_path = "data/car_user_manual.pdf" 59 | 60 | docs = [] 61 | query = "交通事故如何处理" 62 | 63 | # m3e召回 64 | m3e_retriever = M3eRetriever(m3e_embeddings_model_path, data_path, m3e_vector_path) 65 | m3e_ans = m3e_retriever.GetTopK(query, 6) 66 | for doc, score in m3e_ans: 67 | docs.append(doc) 68 | 69 | # bge召回 70 | bge_retriever = BgeRetriever(bge_embeddings_model_path, data_path, bge_vector_path) 71 | bge_ans = bge_retriever.GetTopK(query, 6) 72 | for doc, score in bge_ans: 73 | docs.append(doc) 74 | 75 | # bm25召回 76 | bm25 = Bm25Retriever(data_path) 77 | bm25_ans = bm25.GetBM25TopK(query, 6) 78 | docs.extend(bm25_ans) 79 | 80 | # tf_idf召回 81 | tfidf = TfidfRetriever(data_path) 82 | tfidf_ans = tfidf.GetBM25TopK(query, 6) 83 | docs.extend(tfidf_ans) 84 | 85 | rerank = reRankLLM(reranker_name) 86 | rerank_text = rerank.predict(query, docs) 87 | print(rerank_text) 88 | -------------------------------------------------------------------------------- /retriever/bge_retriever.py: -------------------------------------------------------------------------------- 1 | from langchain.schema import Document 2 | from langchain.vectorstores import FAISS 3 | from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings 4 | from pdf_parse import DataProcess 5 | import torch 6 | import os 7 | 8 | script_dir = os.path.dirname(__file__) 9 | 10 | 11 | # Sparse语义召回BGE-M3 12 | class BgeRetriever(object): 13 | def __init__(self, embeddings_model_path=None, data_path=None, vector_path=None, pdf_path=None): 14 | self.embeddings = HuggingFaceBgeEmbeddings( 15 | model_name=embeddings_model_path, 16 | model_kwargs={"device": "cuda"}, 17 | encode_kwargs={'normalize_embeddings': True} 18 | ) 19 | if vector_path is None: 20 | docs = [] 21 | if data_path is not None: 22 | with open(data_path, "r", encoding="utf-8") as file: 23 | docs = self.data_process(file) 24 | if pdf_path is not None: 25 | dp = DataProcess(pdf_path) 26 | dp.ParseBlock(max_seq=1024) 27 | dp.ParseBlock(max_seq=512) 28 | dp.ParseAllPage(max_seq=256) 29 | dp.ParseAllPage(max_seq=512) 30 | dp.ParseOnePageWithRule(max_seq=256) 31 | dp.ParseOnePageWithRule(max_seq=512) 32 | print("bge pdf_parse is ok") 33 | docs = self.data_process(dp.data) 34 | self.vector_store = FAISS.from_documents(docs, self.embeddings) 35 | self.vector_store.save_local(os.path.normpath(os.path.join(script_dir, "../vector_db/faiss_bge_index"))) 36 | print("bge faiss vector_db is ok") 37 | else: 38 | self.vector_store = FAISS.load_local(vector_path, self.embeddings, allow_dangerous_deserialization=True) 39 | 40 | del self.embeddings 41 | torch.cuda.empty_cache() 42 | 43 | # 对pdf解析后的文本数据进行处理 44 | def data_process(self, data): 45 | docs = [] 46 | for idx, line in enumerate(data): 47 | line = line.strip("\n").strip() 48 | words = line.split("\t") 49 | docs.append(Document(page_content=words[0], metadata={"id": idx})) 50 | return docs 51 | 52 | # 获取top_K分数最高的文档块 53 | def GetTopK(self, query, k): 54 | context = self.vector_store.similarity_search_with_score(query, k=k) 55 | return context 56 | 57 | def GetvectorStore(self): 58 | return self.vector_store 59 | 60 | 61 | if __name__ == "__main__": 62 | embeddings_model_path = "../pre_train_model/bge-m3" 63 | data_path = "../all_text.txt" 64 | vector_path = "../vector_db/faiss_bge_index" 65 | pdf_path = "../data/car_user_manual.pdf" 66 | 67 | # faiss_retriever = BgeRetriever(embeddings_model_path, pdf_path) 68 | faiss_retriever = BgeRetriever(embeddings_model_path, data_path, vector_path) 69 | faiss_ans = faiss_retriever.GetTopK("座椅加热", 3) 70 | print(faiss_ans) 71 | -------------------------------------------------------------------------------- /retriever/bm25_retriever.py: -------------------------------------------------------------------------------- 1 | from langchain.retrievers import BM25Retriever 2 | from langchain.schema import Document 3 | from pdf_parse import DataProcess 4 | import jieba 5 | 6 | 7 | # BM25召回 8 | class Bm25Retriever(object): 9 | def __init__(self, data_path=None, pdf_path=None): 10 | docs = [] 11 | full_docs = [] 12 | if data_path is not None: 13 | with open(data_path, "r", encoding="utf-8") as file: 14 | docs, full_docs = self.data_process(file) 15 | if pdf_path is not None: 16 | dp = DataProcess(pdf_path) 17 | dp.ParseBlock(max_seq=1024) 18 | dp.ParseBlock(max_seq=512) 19 | dp.ParseAllPage(max_seq=256) 20 | dp.ParseAllPage(max_seq=512) 21 | dp.ParseOnePageWithRule(max_seq=256) 22 | dp.ParseOnePageWithRule(max_seq=512) 23 | print("bm25 pdf_parse is ok") 24 | docs, full_docs = self.data_process(dp.data) 25 | self.documents = docs 26 | self.full_documents = full_docs 27 | self.retriever = self._init_bm25() 28 | 29 | # 对pdf解析后的文本数据进行处理 30 | def data_process(self, data): 31 | docs = [] 32 | full_docs = [] 33 | for idx, line in enumerate(data): 34 | line = line.strip("\n").strip() 35 | if (len(line) < 5): 36 | continue 37 | tokens = " ".join(jieba.cut_for_search(line)) 38 | docs.append(Document(page_content=tokens, metadata={"id": idx})) 39 | words = line.split("\t") 40 | full_docs.append(Document(page_content=words[0], metadata={"id": idx})) 41 | return docs, full_docs 42 | 43 | # 初始化BM25的知识库 44 | def _init_bm25(self): 45 | return BM25Retriever.from_documents(self.documents) 46 | 47 | # 获得得分在top_k的文档和分数 48 | def GetBM25TopK(self, query, top_k): 49 | self.retriever.k = top_k 50 | query = " ".join(jieba.cut_for_search(query)) 51 | ans_docs = self.retriever.get_relevant_documents(query) 52 | ans = [] 53 | for line in ans_docs: 54 | ans.append(self.full_documents[line.metadata["id"]]) 55 | break 56 | return ans 57 | 58 | 59 | if __name__ == "__main__": 60 | data_path = "../all_text.txt" 61 | pdf_path = "../data/car_user_manual.pdf" 62 | 63 | # bm25 = Bm25Retriever(pdf_path) 64 | bm25 = Bm25Retriever(data_path) 65 | res = bm25.GetBM25TopK("座椅加热", 3) 66 | print(res) 67 | -------------------------------------------------------------------------------- /retriever/m3e_retriever.py: -------------------------------------------------------------------------------- 1 | from langchain.schema import Document 2 | from langchain.vectorstores import FAISS 3 | from langchain.embeddings.huggingface import HuggingFaceEmbeddings 4 | from pdf_parse import DataProcess 5 | import torch 6 | import os 7 | 8 | script_dir = os.path.dirname(__file__) 9 | 10 | 11 | # Dense语义召回M3E 12 | class M3eRetriever(object): 13 | def __init__(self, embeddings_model_path=None, data_path=None, vector_path=None, pdf_path=None): 14 | self.embeddings = HuggingFaceEmbeddings( 15 | model_name=embeddings_model_path, 16 | model_kwargs={"device": "cuda"}, 17 | encode_kwargs={"batch_size": 64} 18 | ) 19 | if vector_path is None: 20 | docs = [] 21 | if data_path is not None: 22 | with open(data_path, "r", encoding="utf-8") as file: 23 | docs = self.data_process(file) 24 | if pdf_path is not None: 25 | dp = DataProcess(pdf_path) 26 | dp.ParseBlock(max_seq=1024) 27 | dp.ParseBlock(max_seq=512) 28 | dp.ParseAllPage(max_seq=256) 29 | dp.ParseAllPage(max_seq=512) 30 | dp.ParseOnePageWithRule(max_seq=256) 31 | dp.ParseOnePageWithRule(max_seq=512) 32 | print("m3e pdf_parse is ok") 33 | docs = self.data_process(dp.data) 34 | self.vector_store = FAISS.from_documents(docs, self.embeddings) 35 | self.vector_store.save_local(os.path.normpath(os.path.join(script_dir, "../vector_db/faiss_m3e_index"))) 36 | print("m3e faiss vector_db is ok") 37 | else: 38 | self.vector_store = FAISS.load_local(vector_path, self.embeddings, allow_dangerous_deserialization=True) 39 | 40 | del self.embeddings 41 | torch.cuda.empty_cache() 42 | 43 | # 对pdf解析后的文本数据进行处理 44 | def data_process(self, data): 45 | docs = [] 46 | for idx, line in enumerate(data): 47 | line = line.strip("\n").strip() 48 | words = line.split("\t") 49 | docs.append(Document(page_content=words[0], metadata={"id": idx})) 50 | return docs 51 | 52 | # 获取top_K分数最高的文档块 53 | def GetTopK(self, query, k): 54 | context = self.vector_store.similarity_search_with_score(query, k=k) 55 | return context 56 | 57 | def GetvectorStore(self): 58 | return self.vector_store 59 | 60 | 61 | if __name__ == "__main__": 62 | embeddings_model_path = "../pre_train_model/m3e-large" 63 | data_path = "../all_text.txt" 64 | vector_path = "../vector_db/faiss_m3e_index" 65 | pdf_path = "../data/car_user_manual.pdf" 66 | 67 | # m3e_retriever = M3eRetriever(embeddings_model_path, pdf_path) 68 | m3e_retriever = M3eRetriever(embeddings_model_path, data_path, vector_path) 69 | m3e_ans = m3e_retriever.GetTopK("座椅加热", 3) 70 | print(m3e_ans) 71 | -------------------------------------------------------------------------------- /retriever/tfidf_retriever.py: -------------------------------------------------------------------------------- 1 | from langchain.retrievers import TFIDFRetriever 2 | from langchain.schema import Document 3 | from pdf_parse import DataProcess 4 | import jieba 5 | 6 | 7 | # TF-IDF召回 8 | class TfidfRetriever(object): 9 | def __init__(self, data_path=None, pdf_path=None): 10 | docs = [] 11 | full_docs = [] 12 | if data_path is not None: 13 | with open(data_path, "r", encoding="utf-8") as file: 14 | docs, full_docs = self.data_process(file) 15 | if pdf_path is not None: 16 | dp = DataProcess(pdf_path) 17 | dp.ParseBlock(max_seq=1024) 18 | dp.ParseBlock(max_seq=512) 19 | dp.ParseAllPage(max_seq=256) 20 | dp.ParseAllPage(max_seq=512) 21 | dp.ParseOnePageWithRule(max_seq=256) 22 | dp.ParseOnePageWithRule(max_seq=512) 23 | print("tfidf pdf_parse is ok") 24 | docs, full_docs = self.data_process(dp.data) 25 | self.documents = docs 26 | self.full_documents = full_docs 27 | self.retriever = self._init_tf_ide() 28 | 29 | # 对pdf解析后的文本数据进行处理 30 | def data_process(self, data): 31 | docs = [] 32 | full_docs = [] 33 | for idx, line in enumerate(data): 34 | line = line.strip("\n").strip() 35 | if (len(line) < 5): 36 | continue 37 | tokens = " ".join(jieba.cut_for_search(line)) 38 | docs.append(Document(page_content=tokens, metadata={"id": idx})) 39 | words = line.split("\t") 40 | full_docs.append(Document(page_content=words[0], metadata={"id": idx})) 41 | return docs, full_docs 42 | 43 | # 初始化BM25的知识库 44 | def _init_tf_ide(self): 45 | return TFIDFRetriever.from_documents(self.documents) 46 | 47 | # 获得得分在top_k的文档和分数 48 | def GetBM25TopK(self, query, top_k): 49 | self.retriever.k = top_k 50 | query = " ".join(jieba.cut_for_search(query)) 51 | ans_docs = self.retriever.get_relevant_documents(query) 52 | ans = [] 53 | for line in ans_docs: 54 | ans.append(self.full_documents[line.metadata["id"]]) 55 | return ans 56 | 57 | 58 | if __name__ == "__main__": 59 | data_path = "../all_text.txt" 60 | pdf_path = "../data/car_user_manual.pdf" 61 | 62 | # bm25 = TfidfRetriever(pdf_path) 63 | bm25 = TfidfRetriever(data_path) 64 | res = bm25.GetBM25TopK("座椅加热", 6) 65 | print(res) 66 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from config import M3E_embeddings_model_path, BGE_embeddings_model_path, SimModel_path 3 | from generate_answer import question_test 4 | from test_score import test_metrics 5 | 6 | 7 | parser = argparse.ArgumentParser(description='Intelligent Cabin Automotive Knowledge Q&A System') 8 | 9 | parser.add_argument('--llm_name', default='qwen2', type=str, 10 | choices=['qwen2', 'baichuan2', 'chatglm3', "gpt-3.5-turbo", "gpt-4o"], 11 | help='Select the Large Language Model for Generating Responses') 12 | 13 | parser.add_argument('--reranker_name', default='bce', type=str, choices=['bce', 'bge'], 14 | help='Select the reranking model used for reordering the retrieved documents') 15 | 16 | parser.add_argument('--m3e_embeddings_model', default=M3E_embeddings_model_path, type=str, 17 | help='The path to the text embedding model for recall used by M3E') 18 | 19 | parser.add_argument('--bge_embeddings_model', default=BGE_embeddings_model_path, type=str, 20 | help='The path to the text embedding model for recall used by BGE') 21 | 22 | parser.add_argument('--prompt_enhance', default=True, type=str, 23 | help='Choose to optimize the prompt') 24 | 25 | parser.add_argument('--single_max_length', default=4000, type=int, 26 | help='The maximum text length for single-path recall') 27 | 28 | parser.add_argument('--single_top_k', default=6, type=int, 29 | help='The maximum number of retrievals for single-path recall') 30 | 31 | parser.add_argument('--mutil_max_length', default=4000, type=int, 32 | help='The maximum text length for multi-path recall') 33 | 34 | parser.add_argument('--mutil_top_k', default=6, type=int, 35 | help='The maximum number of retrievals for multi-path recall') 36 | 37 | parser.add_argument('--pdf_path', default="./data/car_user_manual.pdf", type=str, 38 | help='The path to the PDF file') 39 | 40 | parser.add_argument('--test_path', default="./data/test_question.json", type=str, 41 | help='The path to the test dataset') 42 | 43 | parser.add_argument('--predict_path', default="./data/result.json", type=str, 44 | help='The storage path for the prediction results') 45 | 46 | parser.add_argument('--gold_path', default="./data/gold_result.json", type=str, 47 | help='The path to the standard answer dataset') 48 | 49 | parser.add_argument('--simModel_path', default=SimModel_path, type=str, 50 | help='The similarity model used for calculating scores') 51 | 52 | parser.add_argument('--metric_path', default="./data/metrics.json", type=str, 53 | help='The storage path for evaluation metric data') 54 | 55 | parser.add_argument('--data_path', default="./all_text.txt", type=str, 56 | help='The storage path after parsing the PDF file') 57 | 58 | parser.add_argument('--m3e_vector_path', default="./vector_db/faiss_m3e_index", type=str, 59 | help='The vector database based on M3E recall') 60 | 61 | parser.add_argument('--bge_vector_path', default="./vector_db/faiss_bge_index", type=str, 62 | help='The vector database based on BGE recall') 63 | 64 | args = parser.parse_args() 65 | 66 | 67 | if __name__ == '__main__': 68 | 69 | # 基于rag的测试集预测 70 | question_test( 71 | model_name=args.llm_name, 72 | reranker_name=args.reranker_name, 73 | m3e_embeddings_model_path=args.m3e_embeddings_model, 74 | bge_embeddings_model_path=args.bge_embeddings_model, 75 | test_path=args.test_path, 76 | output_path=args.predict_path, 77 | prompt_enhance=args.prompt_enhance, 78 | single_max_length=args.single_max_length, 79 | single_top_k=args.single_top_k, 80 | mutil_max_length=args.mutil_max_length, 81 | mutil_top_k=args.mutil_top_k, 82 | pdf_path=args.pdf_path, 83 | # data_path=args.data_path, # pdf文件处理后的缓存数据 84 | # m3e_vector_path=args.m3e_vector_path, # m3e召回的向量数据缓存 85 | # bge_vector_path=args.bge_vector_path, # bge召回的向量数据缓存 86 | ) 87 | 88 | # 测试集预测结果的综合得分计算 89 | test_metrics( 90 | gold_path=args.gold_path, 91 | predict_path=args.predict_path, 92 | metric_path=args.metric_path, 93 | simModel_path=args.simModel_path, 94 | ) 95 | -------------------------------------------------------------------------------- /test_score.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from text2vec import SentenceModel, semantic_search 4 | 5 | 6 | # 计算两个列表的Jaccard相似度 7 | def calc_jaccard(list_a, list_b, threshold=0.3): 8 | size_a, size_b = len(list_a), len(list_b) 9 | list_c = [i for i in list_a if i in list_b] 10 | size_c = len(list_c) 11 | score = size_c / (size_b + 1e-6) 12 | if score > threshold: 13 | return 1 14 | else: 15 | return 0 16 | 17 | 18 | # 根据标准答案和预测结果计算评分 19 | def report_score(gold_path, predict_path, sim_model): 20 | gold_info = json.load(open(gold_path)) 21 | pred_info = json.load(open(predict_path)) 22 | idx = 0 23 | 24 | for gold, pred in zip(gold_info, pred_info): 25 | question = gold["question"] 26 | keywords = gold["keywords"] 27 | gold = gold["answer"].strip() 28 | pred = pred["answer_1"].strip() 29 | if gold == "无答案" and pred != gold: 30 | score = 0.0 31 | elif gold == "无答案" and pred == gold: 32 | score = 1.0 33 | else: 34 | semantic_score = semantic_search(sim_model.encode([gold]), sim_model.encode(pred), top_k=1)[0][0]['score'] 35 | join_keywords = [word for word in keywords if word in pred] 36 | keyword_score = calc_jaccard(join_keywords, keywords) 37 | # 综合评分:语义相似度和关键词匹配得分各占50% 38 | score = 0.5 * keyword_score + 0.5 * semantic_score 39 | gold_info[idx]["score"] = score 40 | gold_info[idx]["predict"] = pred 41 | idx += 1 42 | print(f"预测: {question}, 得分: {score}") 43 | 44 | return gold_info 45 | 46 | 47 | # 计算所有预测结果的评分指标 48 | def test_metrics(gold_path, predict_path, metric_path, simModel_path): 49 | print("Read gold from %s" % gold_path) 50 | print("Read predict file from %s" % predict_path) 51 | 52 | simModel = SentenceModel(model_name_or_path=simModel_path, device='cuda:0') 53 | results = report_score(gold_path, predict_path, simModel) 54 | final_score = np.mean([item["score"] for item in results]) 55 | print("\n") 56 | print("=" * 100) 57 | print(f"预测问题数:{len(results)}, 预测最终得分:{final_score}") 58 | print("=" * 100) 59 | 60 | # 结果文件路径 61 | results_info = json.dumps(results, ensure_ascii=False, indent=2) 62 | with open(metric_path, "w", encoding="utf-8") as fd: 63 | fd.write(results_info) 64 | print(f"\n结果文件保存至{metric_path}") 65 | -------------------------------------------------------------------------------- /vllm_model.py: -------------------------------------------------------------------------------- 1 | import time 2 | from config import * 3 | from vllm import LLM, SamplingParams 4 | from transformers import AutoTokenizer 5 | 6 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 7 | 8 | DEVICE = LLM_DEVICE 9 | DEVICE_ID = "0" 10 | CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE 11 | 12 | 13 | # 释放gpu显存 14 | def torch_gc(): 15 | if torch.cuda.is_available(): 16 | with torch.cuda.device(CUDA_DEVICE): 17 | torch.cuda.empty_cache() 18 | torch.cuda.ipc_collect() 19 | 20 | 21 | class ChatLLM(object): 22 | def __init__(self, model_name): 23 | self.model_name = model_name 24 | if self.model_name == "qwen2": 25 | self.model_path = Qwen2_path 26 | if self.model_name == "baichuan2": 27 | self.model_path = Baichuan_path 28 | if self.model_name == "chatglm3": 29 | self.model_path = ChatGLM_path 30 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, padding_side='left', trust_remote_code=True) 31 | self.model = LLM(model=self.model_path, 32 | tokenizer=self.model_path, 33 | tensor_parallel_size=1, # 如果是多卡,可以自己把这个并行度设置为卡数N 34 | trust_remote_code=True, 35 | gpu_memory_utilization=0.9, # 可以根据gpu的利用率自己调整这个比例,默认0.9 36 | dtype="bfloat16") 37 | # LLM的采样参数 38 | sampling_kwargs = { 39 | "stop_token_ids": [self.tokenizer.eos_token_id], 40 | "early_stopping": False, 41 | "top_p": 1.0, 42 | "top_k": -1, # 当使用束搜索时top_k必须为-1 43 | "temperature": 0.0, 44 | "max_tokens": 2000, 45 | "repetition_penalty": 1.05, 46 | "n": 1, 47 | "best_of": 2, # 生成的候选数量和最佳选择数量 48 | "use_beam_search": True # 是否使用束搜索 49 | } 50 | self.sampling_params = SamplingParams(**sampling_kwargs) 51 | 52 | # 批量推理,输入一个batch,返回一个batch的答案 53 | def infer(self, prompts): 54 | batch_text = [] 55 | for q in prompts: 56 | if self.model_name == "qwen2": 57 | text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{q}<|im_end|>\n<|im_start|>assistant\n" 58 | if self.model_name == "baichuan2": 59 | text = f"{q}" 60 | if self.model_name == "chatglm3": 61 | text = f"<|system|>\nYou are a helpful assistant.\n<|user|>\n{q}\n<|assistant|>\n" 62 | batch_text.append(text) 63 | 64 | outputs = self.model.generate(batch_text, sampling_params=self.sampling_params) 65 | batch_response = [] 66 | for output in outputs: 67 | output_str = output.outputs[0].text 68 | if self.tokenizer.eos_token in output_str: 69 | output_str = output_str[:-len(self.tokenizer.eos_token)] 70 | if self.tokenizer.pad_token in output_str: 71 | output_str = output_str[:-len(self.tokenizer.pad_token)] 72 | batch_response.append(output_str) 73 | 74 | torch_gc() 75 | return batch_response 76 | 77 | 78 | if __name__ == "__main__": 79 | model_name = "qwen2" 80 | start = time.time() 81 | llm = ChatLLM(model_name) 82 | test = ["你好", "吉利汽车语音组手唤醒", "自动驾驶功能介绍"] 83 | generated_text = llm.infer(test) 84 | print(generated_text) 85 | end = time.time() 86 | print("cost time: " + str((end - start) / 60) + "minutes") 87 | --------------------------------------------------------------------------------