├── .gitignore ├── .template.clash_config.yaml ├── .template.env ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── app.py ├── chatbot.py ├── configs └── config.py ├── docker-compose.yml ├── environment.yml ├── ingest.py ├── requirements.txt ├── schema ├── __init__.py └── schemas.py ├── templates ├── __init__.py ├── condense_prompt.py ├── conversational_prompt.py └── qa_prompt.py ├── textsplitter ├── __init__.py └── chinese_text_splitter.py ├── utils ├── __init__.py ├── callback.py ├── log.py └── tools.py └── vectorstore.pkl /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .chroma 163 | images 164 | vector_store 165 | draf.md 166 | temp 167 | docs/* 168 | !docs/周易 169 | question.md 170 | logs 171 | chat_history.json 172 | clash_config.yaml 173 | .DS_Store 174 | node_modules 175 | tot 176 | guidance_tool 177 | guidance_bot.py 178 | retrieval_chatbot.py 179 | tot_bot.py -------------------------------------------------------------------------------- /.template.clash_config.yaml: -------------------------------------------------------------------------------- 1 | # 务必指定external-ui 2 | external-ui: /opt/clash/ui 3 | # 否则UI服务无法启动 -------------------------------------------------------------------------------- /.template.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="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" 2 | PINECONE_API_KEY="" 3 | PINECONE_ENVIRONMENT="" 4 | PINECONE_INDEX="" 5 | https_proxy="http://127.0.0.1:7890" 6 | http_proxy="http://127.0.0.1:7890" 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 AS builder 2 | 3 | RUN sed -i 's#archive.ubuntu.com#mirrors.aliyun.com#g' /etc/apt/sources.list \ 4 | && sed -i 's#security.ubuntu.com#mirrors.aliyun.com#g' /etc/apt/sources.list 5 | 6 | ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN:zh LC_ALL=zh_CN.UTF-8 DEBIAN_FRONTEND=noninteractive 7 | 8 | RUN rm -rf /etc/apt/sources.list.d/ && apt update 9 | 10 | RUN apt-get update && apt-get install -y --no-install-recommends \ 11 | zsh \ 12 | vim \ 13 | curl \ 14 | wget \ 15 | unzip \ 16 | supervisor \ 17 | ca-certificates \ 18 | language-pack-zh-hans 19 | 20 | RUN locale-gen zh_CN.UTF-8 21 | RUN dpkg-reconfigure locales 22 | 23 | CMD ["supervisord", "-n"] 24 | 25 | 26 | FROM builder AS conda 27 | ENV MINICONDA_VERSION 3 28 | ENV CONDA_FORGE https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge 29 | RUN chsh -s `which zsh` 30 | RUN curl -o ~/miniconda.sh -O https://mirrors.bfsu.edu.cn/anaconda/miniconda/Miniconda${MINICONDA_VERSION}-latest-Linux-x86_64.sh && \ 31 | chmod +x ~/miniconda.sh && \ 32 | ~/miniconda.sh -b -p /opt/conda && \ 33 | rm ~/miniconda.sh 34 | RUN ln /opt/conda/bin/conda /usr/local/bin/conda 35 | RUN conda init zsh 36 | RUN conda install mamba -n base -c ${CONDA_FORGE} 37 | RUN ln /opt/conda/bin/mamba /usr/local/bin/mamba && mamba init zsh 38 | 39 | 40 | FROM conda AS python 41 | ENV WORKDIR /app 42 | WORKDIR ${WORKDIR} 43 | ADD environment.yml /environment.yml 44 | RUN mamba clean --all -y && mamba update -n base -c ${CONDA_FORGE} conda mamba -y && mamba env create -f /environment.yml && rm -rf /root/.cache 45 | 46 | RUN echo "\ 47 | [program:be]\n\ 48 | directory=/app\n\ 49 | command=/opt/conda/envs/py310/bin/python /app/app.py\n\ 50 | autorestart=true\n\ 51 | startretries=100\n\ 52 | redirect_stderr=true\n\ 53 | stdout_logfile=/var/log/be.log\n\ 54 | stdout_logfile_maxbytes=50MB\n\ 55 | environment=PYTHONUNBUFFERED=1, PYTHONIOENCODING=utf-8\n\ 56 | " > /etc/supervisor/conf.d/be.conf 57 | 58 | FROM python AS clash 59 | WORKDIR /opt/clash 60 | RUN mkdir -p /root/.config/clash && \ 61 | wget -O /root/.config/clash/Country.mmdb https://download.fastgit.ixmu.net/Dreamacro/maxmind-geoip/releases/latest/download/Country.mmdb 62 | RUN wget https://download.fastgit.ixmu.net/Dreamacro/clash/releases/download/v1.11.8/clash-linux-amd64-v1.11.8.gz && \ 63 | gunzip clash-linux-amd64-v1.11.8.gz && \ 64 | mv clash-linux-amd64-v1.11.8 clash && \ 65 | chmod +x clash 66 | RUN wget https://download.fastgit.ixmu.net/haishanh/yacd/archive/refs/heads/gh-pages.zip && \ 67 | unzip gh-pages.zip && \ 68 | mv yacd-gh-pages ui && \ 69 | rm gh-pages.zip 70 | ADD ./clash_config.yaml /opt/clash/config.yaml 71 | RUN echo "\ 72 | [program:clash] \n\ 73 | command=/opt/clash/clash -f /opt/clash/config.yaml\n\ 74 | autorestart=True\n\ 75 | autostart=True\n\ 76 | redirect_stderr = true\n\ 77 | stdout_logfile=/var/log/clash.log\n\ 78 | stdout_logfile_maxbytes=50MB\n\ 79 | " > /etc/supervisor/conf.d/clash.conf 80 | EXPOSE 7890 7891 9090 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Toran Bruce Richards 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | chatbot: 2 | python app.py 3 | 4 | kill: 5 | kill -9 `lsof -t -i:9000` 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 7 | 8 | # langchain-chatbot 9 | Langchain-Chatbot 是基于 Langchain 框架的检索式和生成式聊天机器人。该机器人能够以对话的方式理解并回应用户的输入​1​。 10 | 11 | ## Running by command 12 | ``` 13 | pip install -r requirements.txt 14 | ``` 15 | 16 | ## copy env 17 | ```bash 18 | cp .template.env .env 19 | ``` 20 | 21 | ## edit env 22 | 配置你自己的environment,包括openai-key和科学上网代理 23 | 24 | ## 构建知识库 25 | 运行以下命令构建知识库 26 | ``` 27 | python ingest.py 28 | ``` 29 | 30 | ## run chatbot 31 | 运行以下命令启动聊天机器人 32 | ``` 33 | python chatbot.py 34 | ``` 35 | 36 | ## 运行应用 37 | 运行以下命令启动应用 38 | ``` 39 | python app.py 40 | ``` 41 | 42 | ## 通过HTTP服务器测试 43 | 您可以通过向 'http://127.0.0.1:9000/chat' 发送 POST 请求,其 JSON 主体包含聊天机器人要响应的文本,来测试聊天机器人。例如: 44 | ```curl 45 | curl --location --request POST 'http://127.0.0.1:9000/chat' \ 46 | --header 'Content-Type: application/json' \ 47 | --data-raw '{ 48 | "text": "你好" 49 | }' 50 | ``` 51 | 52 | ## docker构建 53 | 运行以下命令构建Docker镜像并启动Docker容器 54 | ``` 55 | docker-compose up -d 56 | ``` 57 | 58 | ## 进程管理工具 supervisor 59 | 您可以使用 Supervisor 来管理进程。这里是一些有用的命令: 60 | - 查看进程 `supervisorctl status` 61 | - 查看进程日志 `supervisorctl tail -f clash` 62 | - 查看进程日志文件 `/var/log/clash.log` 63 | 64 | ## langchain过河记系列文章 65 | - [langchain过河记(一)](https://zhuanlan.zhihu.com/p/630925973) 66 | - [langchain过河记(二)](https://zhuanlan.zhihu.com/p/630930843) 67 | - [langchain过河记(三)](https://zhuanlan.zhihu.com/p/630971903) 68 | - [langchain过河记(四)](https://zhuanlan.zhihu.com/p/631600368) 69 | 70 | ## external link 71 | [liveportraitweb](https://www.liveportraitweb.com/) 72 | [novelling](https://www.novelling.com/) 73 | [Rewritifyai](https://www.rewritifyai.com/) 74 | [MMAudio](https://www.mmaudio.pro/) 75 | [Image To Video AI](https://imagetovideoai.space/) 76 | [Creator Viral Video](https://www.creatorviralvideo.com/) 77 | [Transpixar](https://www.transpixar.pro/) 78 | [Rednote](https://www.rednote.pro/) 79 | [RednoteApp](https://www.rednoteapp.pro/) 80 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-17 15:54:12 5 | LastEditTime: 2023-05-26 23:05:56 6 | ''' 7 | from fastapi.middleware.cors import CORSMiddleware 8 | from fastapi import FastAPI 9 | 10 | from schema import ChatItem 11 | from chatbot import get_chain 12 | 13 | from utils import logger, test_youtube_access 14 | 15 | from dotenv import load_dotenv 16 | load_dotenv() 17 | test_youtube_access() 18 | app = FastAPI() 19 | 20 | app.add_middleware(CORSMiddleware, 21 | allow_origins=["*"], 22 | allow_credentials=True, 23 | allow_methods=["*"], 24 | allow_headers=["*"]) 25 | 26 | conversation_chat = get_chain() 27 | 28 | @app.post("/chat", summary="chat接口", description="该接口为chat的接口") 29 | def chat(item: ChatItem): 30 | question = item.text 31 | result = conversation_chat(question) 32 | 33 | logger.info(f"chat result is {result['response']}") 34 | 35 | return { 36 | "result": result['response'] 37 | } 38 | 39 | @app.get("/ping") 40 | async def ping(): 41 | return "pong!!" 42 | 43 | if __name__ == "__main__": 44 | import uvicorn 45 | uvicorn.run(app, host="0.0.0.0", port=9000) 46 | -------------------------------------------------------------------------------- /chatbot.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-14 17:16:12 5 | LastEditTime: 2023-05-26 23:06:49 6 | ''' 7 | from langchain.memory import ConversationBufferWindowMemory 8 | from langchain.chat_models import ChatOpenAI 9 | from langchain.chains import ConversationChain 10 | from langchain.prompts.prompt import PromptTemplate 11 | 12 | from configs.config import * 13 | 14 | from dotenv import load_dotenv 15 | load_dotenv() 16 | 17 | import code 18 | 19 | def get_chain(): 20 | conversation_memory = ConversationBufferWindowMemory( 21 | memory_key="history", 22 | k=5 23 | ) 24 | 25 | chat_llm = ChatOpenAI( 26 | model_name="gpt-3.5-turbo", 27 | temperature=TEMPERTURE 28 | ) 29 | 30 | DEFAULT_TEMPLATE = """这是一个专门用于回答占卜相关问题的工具。只要你提出与占卜相关的问题,或者明确说出"占卜",这个工具就会被启动来寻找最合适的答案。无论是初次的占卜询问,还是后续的深入探讨,这个工具都可以提供协助。 31 | 最重要的一点,这个工具占卜的方式是周易占卜,针对所有的问题,都是通过聊天的模式实现周易占卜。 32 | 33 | Current conversation: 34 | {history} 35 | Human: {input} 36 | AI:""" 37 | PROMPT = PromptTemplate(input_variables=["history", "input"], template=DEFAULT_TEMPLATE) 38 | 39 | conversation = ConversationChain( 40 | llm=chat_llm, 41 | memory=conversation_memory, 42 | prompt=PROMPT 43 | ) 44 | 45 | return conversation 46 | 47 | 48 | if __name__ == "__main__": 49 | from colorama import init, Fore, Style 50 | init() 51 | 52 | conversation_chat = get_chain() 53 | 54 | while True: 55 | question = input("Please enter your question (or type 'exit' to end): ") 56 | if question.lower() == 'exit': 57 | break 58 | 59 | result = conversation_chat(question) 60 | 61 | print(f'{Fore.BLUE}{Style.BRIGHT}AI:{Fore.RESET}{Style.NORMAL} {result["response"]}') 62 | -------------------------------------------------------------------------------- /configs/config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-08 16:50:54 5 | LastEditTime: 2023-05-21 15:02:03 6 | ''' 7 | import os 8 | 9 | VS_METHOD = "faiss" # faiss/pinecone/chroma 10 | 11 | TEMPERTURE = 0.5 12 | 13 | DOCS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs") 14 | 15 | MAX_TOKENS_LIMIT = 2000 16 | 17 | # 文本分句长度 18 | SENTENCE_SIZE = 2000 19 | 20 | # 匹配后单段上下文长度 21 | CHUNK_SIZE = 1000 22 | CHUNK_OVERLAP = 0 23 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | langchain-chatbot: 4 | hostname: langchain-chatbot 5 | container_name: langchain-chatbot 6 | restart: always 7 | image: langchain-chatbot 8 | privileged: true 9 | ipc: host 10 | tty: true 11 | # working_dir: /workspace 12 | ports: 13 | - '4080:8080' 14 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: py310 2 | channels: 3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ # Anocanda清华镜像 4 | - defaults 5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ 6 | dependencies: 7 | - python=3.10 8 | - ipython 9 | - pip 10 | - pip: 11 | - openai 12 | - fastapi 13 | - black 14 | - isort 15 | - websockets 16 | - pydantic 17 | - langchain 18 | - uvicorn 19 | - jinja2 20 | - faiss-cpu 21 | - bs4 22 | - unstructured 23 | - libmagic 24 | - colorama==0.4.6 25 | - pinecone-client==2.2.1 26 | - streamlit==1.22.0 27 | - pymongo 28 | - loguru 29 | - python-dotenv 30 | - -i https://mirror.baidu.com/pypi/simple 31 | prefix: /opt/conda/envs/py310 -------------------------------------------------------------------------------- /ingest.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-10 14:12:34 5 | LastEditTime: 2023-05-16 15:04:47 6 | ''' 7 | import pickle 8 | 9 | from langchain.document_loaders import TextLoader, DirectoryLoader 10 | from langchain.embeddings.openai import OpenAIEmbeddings 11 | from textsplitter import ChineseTextSplitter 12 | from langchain.vectorstores.faiss import FAISS 13 | from configs.config import * 14 | 15 | from dotenv import load_dotenv 16 | load_dotenv() 17 | 18 | def ingest(): 19 | loader = DirectoryLoader(DOCS_ROOT_PATH, glob="**/*.txt", loader_cls=TextLoader) 20 | documents = loader.load() 21 | 22 | text_splitter = ChineseTextSplitter( 23 | chunk_size=CHUNK_SIZE, 24 | chunk_overlap=CHUNK_OVERLAP 25 | ) 26 | 27 | documents = text_splitter.split_documents(documents) 28 | embeddings = OpenAIEmbeddings(model='text-embedding-ada-002') 29 | 30 | vector_store = FAISS.from_documents(documents, embeddings) 31 | 32 | # Save vectorstore 33 | with open("vectorstore.pkl", "wb") as f: 34 | pickle.dump(vector_store, f) 35 | 36 | if __name__ == "__main__": 37 | ingest() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | fastapi 3 | black 4 | isort 5 | websockets 6 | pydantic 7 | langchain 8 | uvicorn 9 | jinja2 10 | faiss-cpu 11 | bs4 12 | unstructured 13 | libmagic 14 | colorama==0.4.6 15 | pinecone-client==2.2.1 16 | streamlit==1.22.0 17 | pymongo 18 | loguru 19 | -------------------------------------------------------------------------------- /schema/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-17 15:54:12 5 | LastEditTime: 2023-05-18 15:15:04 6 | ''' 7 | from .schemas import ChatResponse, ChatItem -------------------------------------------------------------------------------- /schema/schemas.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-17 15:54:12 5 | LastEditTime: 2023-05-17 16:43:13 6 | ''' 7 | """Schemas for the chat app.""" 8 | from pydantic import BaseModel, validator 9 | 10 | 11 | class ChatItem(BaseModel): 12 | text: str 13 | history: list=[] 14 | 15 | 16 | class ChatResponse(BaseModel): 17 | """Chat response schema.""" 18 | 19 | sender: str 20 | message: str 21 | type: str 22 | 23 | @validator("sender") 24 | def sender_must_be_bot_or_you(cls, v): 25 | if v not in ["bot", "you"]: 26 | raise ValueError("sender must be bot or you") 27 | return v 28 | 29 | @validator("type") 30 | def validate_message_type(cls, v): 31 | if v not in ["start", "stream", "end", "error", "info"]: 32 | raise ValueError("type must be start, stream or end") 33 | return v 34 | -------------------------------------------------------------------------------- /templates/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-09 15:25:46 5 | LastEditTime: 2023-05-24 19:09:23 6 | ''' 7 | from .condense_prompt import CONDENSE_PROMPT 8 | from .qa_prompt import QA_PROMPT 9 | from .conversational_prompt import PREFIX, SUFFIX, TEMPLATE_TOOL_RESPONSE, FORMAT_INSTRUCTIONS 10 | -------------------------------------------------------------------------------- /templates/condense_prompt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-09 17:39:11 5 | LastEditTime: 2023-05-21 14:12:31 6 | ''' 7 | CONDENSE_PROMPT = """ 8 | 根据以下对话和一个后续问题,将后续问题改写成一个独立的问题。 9 | 10 | 聊天记录: 11 | {chat_history} 12 | 13 | 后续输入:{question} 14 | 独立问题: 15 | """ -------------------------------------------------------------------------------- /templates/conversational_prompt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-24 19:08:36 5 | LastEditTime: 2023-05-24 19:15:42 6 | ''' 7 | PREFIX = """AI占卜助手是一个大型的语言模型,由OpenAI进行训练。它被设计成能够帮助进行周易占卜,并根据占卜的结果提供解释。 8 | 9 | AI占卜助手已经学习了周易占卜的知识,可以帮助用户进行占卜。它可以指导用户如何提出问题,如何进行占卜,以及如何解读占卜的结果。 10 | 11 | AI占卜助手的占卜步骤如下: 12 | 13 | 1. 提问:首先,AI占卜助手会引导用户提出一个特定的问题,这个问题应该是开放性的,不能简单地用"是"或"否"来回答。 14 | 15 | 2. 产生爻:然后,AI占卜助手会通过某种方式产生六个爻以形成卦象。这个过程可以是随机的,也可以是通过某种算法实现的。 16 | 17 | 3. 解读卦象:得到卦象后,AI占卜助手会解释这个卦象的含义。这个解释是基于周易的知识,也会考虑到用户的问题和情况。 18 | 19 | 4. 理解动爻:如果在产生爻的过程中有动爻(即6或9),AI占卜助手会解释这个动爻如何改变了卦象,以及这个改变如何影响到解答。 20 | 21 | 5. 反思与解答:最后,AI占卜助手会帮助用户理解卦象和动爻的含义,应用到他们的问题上,给出一个反思和解答。 22 | 23 | 请注意,尽管AI占卜助手具有进行周易占卜和解释结果的能力,但是它仍然只是一个AI模型,它的解答并不能预知未来,也不能替代专业的咨询或建议。请用户在理解和使用AI占卜助手的解答时,持有理性和批判性的态度。 24 | 25 | 当回答问题时,AI占卜助手必须使用以下语言:中文。 26 | """ 27 | 28 | 29 | SUFFIX = """TOOLS 30 | ------ 31 | Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are: 32 | 33 | {{tools}} 34 | 35 | {format_instructions} 36 | 37 | USER'S INPUT 38 | -------------------- 39 | Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else): 40 | 41 | {{{{input}}}}""" 42 | 43 | 44 | TEMPLATE_TOOL_RESPONSE = """TOOL RESPONSE: 45 | --------------------- 46 | {observation} 47 | 48 | USER'S INPUT 49 | -------------------- 50 | 51 | Okay, so what is the response to my last comment? If using information obtained from the tools you must mention it explicitly without mentioning the tool names - I have forgotten all TOOL RESPONSES! Remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else.""" 52 | 53 | 54 | FORMAT_INSTRUCTIONS = """RESPONSE FORMAT INSTRUCTIONS 55 | ---------------------------- 56 | 57 | When responding to me, please output a response in one of two formats: 58 | 59 | **Option 1:** 60 | Use this if you want the human to use a tool. 61 | Markdown code snippet formatted in the following schema: 62 | 63 | ```json 64 | {{{{ 65 | "action": string \\ The action to take. Must be one of {tool_names} 66 | "action_input": string \\ The input to the action 67 | }}}} 68 | ``` 69 | 70 | **Option #2:** 71 | Use this if you want to respond directly to the human. Markdown code snippet formatted in the following schema: 72 | 73 | ```json 74 | {{{{ 75 | "action": "Final Answer", 76 | "action_input": string \\ You should put what you want to return to use here 77 | }}}} 78 | ```""" 79 | -------------------------------------------------------------------------------- /templates/qa_prompt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-09 15:24:53 5 | LastEditTime: 2023-05-21 14:12:39 6 | ''' 7 | QA_PROMPT = """ 8 | 您是一个有用的AI助手。请使用以下上下文信息来回答最后的问题。 9 | 如果您不知道答案,请直接说您不知道。请不要试图编造答案。 10 | 如果问题与上下文无关,请礼貌地回应您只能回答与上下文相关的问题。 11 | 回答时尽可能详细。 12 | 13 | {context} 14 | 15 | 问题: {question} 16 | 有帮助的答案: 17 | """ -------------------------------------------------------------------------------- /textsplitter/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-10 11:35:55 5 | LastEditTime: 2023-05-10 11:37:51 6 | ''' 7 | from .chinese_text_splitter import ChineseTextSplitter -------------------------------------------------------------------------------- /textsplitter/chinese_text_splitter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-10 11:35:55 5 | LastEditTime: 2023-05-14 17:11:13 6 | ''' 7 | from langchain.text_splitter import CharacterTextSplitter 8 | import re 9 | from typing import List 10 | from configs.config import SENTENCE_SIZE 11 | 12 | """ 13 | class ChineseTextSplitter(CharacterTextSplitter): 14 | def __init__(self, **kwargs): 15 | super().__init__(**kwargs) 16 | 17 | def split_text(self, text: str) -> List[str]: 18 | text = re.sub(r'([;;.!?。!?\?])([^”’])', r"\1\n\2", text) # 单字符断句符 19 | text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text) # 英文省略号 20 | text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text) # 中文省略号 21 | text = re.sub(r'([;;!?。!?\?]["’”」』]{0,2})([^;;!?,。!?\?])', r'\1\n\2', text) 22 | text = text.replace("\u3000", "") 23 | # 如果双引号前有终止符,那么双引号才是句子的终点,把分句符\n放到双引号后,注意前面的几句都小心保留了双引号 24 | text = text.rstrip() 25 | ls = [text[i:i + SENTENCE_SIZE] for i in range(0, len(text), SENTENCE_SIZE)] 26 | 27 | return ls 28 | """ 29 | 30 | class ChineseTextSplitter(CharacterTextSplitter): 31 | def __init__(self, pdf: bool = False, **kwargs): 32 | super().__init__(**kwargs) 33 | self.pdf = pdf 34 | 35 | def split_text(self, text: str) -> List[str]: 36 | if self.pdf: 37 | text = re.sub(r"\n{3,}", "\n", text) 38 | text = re.sub('\s', ' ', text) 39 | text = text.replace("\n\n", "") 40 | sent_sep_pattern = re.compile( 41 | '([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') 42 | sent_list = [] 43 | for ele in sent_sep_pattern.split(text): 44 | if sent_sep_pattern.match(ele) and sent_list: 45 | sent_list[-1] += ele 46 | elif ele: 47 | sent_list.append(ele) 48 | return sent_list 49 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-21 14:11:51 5 | LastEditTime: 2023-05-26 19:24:25 6 | ''' 7 | from .log import logger 8 | from .callback import StreamingLLMCallbackHandler, QuestionGenCallbackHandler 9 | from .tools import load_tools, test_youtube_access 10 | -------------------------------------------------------------------------------- /utils/callback.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-10 17:21:02 5 | LastEditTime: 2023-05-10 17:22:19 6 | ''' 7 | 8 | """Callback handlers used in the app.""" 9 | import sys 10 | 11 | sys.path.append("..") 12 | 13 | from typing import Any, Dict, List 14 | from langchain.callbacks.base import AsyncCallbackHandler 15 | from schema import ChatResponse 16 | 17 | 18 | class StreamingLLMCallbackHandler(AsyncCallbackHandler): 19 | """Callback handler for streaming LLM responses.""" 20 | 21 | def __init__(self, websocket): 22 | self.websocket = websocket 23 | 24 | async def on_llm_new_token(self, token: str, **kwargs: Any) -> None: 25 | resp = ChatResponse(sender="bot", message=token, type="stream") 26 | await self.websocket.send_json(resp.dict()) 27 | 28 | 29 | class QuestionGenCallbackHandler(AsyncCallbackHandler): 30 | """Callback handler for question generation.""" 31 | 32 | def __init__(self, websocket): 33 | self.websocket = websocket 34 | 35 | async def on_llm_start( 36 | self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any 37 | ) -> None: 38 | """Run when LLM starts running.""" 39 | resp = ChatResponse( 40 | sender="bot", message="Synthesizing question...", type="info" 41 | ) 42 | await self.websocket.send_json(resp.dict()) 43 | -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2022-08-30 11:39:10 5 | LastEditTime: 2023-05-18 18:35:33 6 | ''' 7 | import os 8 | from loguru import logger 9 | 10 | currentdirPath = os.path.dirname(__file__) 11 | logger_path = os.path.join(currentdirPath, '../logs/file_{time}.log') 12 | 13 | logger.add(logger_path, rotation="1 MB", enqueue=True, backtrace=True, diagnose=True, serialize=True) -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Author: colin gao 4 | Date: 2023-05-26 17:54:39 5 | LastEditTime: 2023-05-26 23:06:42 6 | ''' 7 | import sys 8 | sys.path.append("..") 9 | 10 | from langchain.chains import RetrievalQA 11 | from langchain.chat_models import ChatOpenAI 12 | from langchain.callbacks import get_openai_callback 13 | 14 | from configs.config import * 15 | from .log import logger 16 | 17 | def load_tools(vectorstore): 18 | def searchVector(key_word): 19 | chat_llm = ChatOpenAI( 20 | model_name="gpt-3.5-turbo", 21 | temperature=TEMPERTURE 22 | ) 23 | 24 | retriever = RetrievalQA.from_chain_type( 25 | llm=chat_llm, 26 | chain_type="stuff", 27 | retriever=vectorstore.as_retriever(), 28 | return_source_documents=False 29 | ) 30 | 31 | result = retriever.run(key_word) 32 | 33 | return result 34 | 35 | dict_tools = { 36 | 'Vector Search': searchVector 37 | } 38 | return dict_tools 39 | 40 | 41 | def count_tokens(chain, query): 42 | with get_openai_callback() as cb: 43 | result = chain.run(query) 44 | logger.info(f'Spent a total of {cb.total_tokens} tokens') 45 | 46 | return result 47 | 48 | import requests 49 | def test_youtube_access(in_logger=''): 50 | logger = print if not in_logger else in_logger.info 51 | url = "https://www.youtube.com" 52 | headers = { 53 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" 54 | } 55 | 56 | try: 57 | response = requests.get(url, headers=headers, timeout=5) 58 | if response.status_code == 200: 59 | logger("成功访问YouTube") 60 | return True 61 | else: 62 | logger(f"访问YouTube失败,状态码:{response.status_code}") 63 | return False 64 | except requests.exceptions.RequestException as e: 65 | logger(f"访问YouTube时出现异常: {e}") 66 | return False 67 | -------------------------------------------------------------------------------- /vectorstore.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GaoQ1/langchain-chatbot/7050791ee0d6dceb707f025159b633af6ea24a04/vectorstore.pkl --------------------------------------------------------------------------------